In [6]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import urllib.parse
import re  # <--- Text safai ke liye naya library
from google.colab import files

# --- SETTINGS ---
WEBSITE_URL = "https://www.urdunovelbanks.com/"
BOARD_NAME = "Farhat Ishtiaq Novels"
TARGET_LABEL = "Farhat Ishtiaq"
# ----------------

def get_blogger_posts_for_pinterest(site_url, board_name, target_label):

    base_url = site_url.rstrip('/')

    if target_label:
        encoded_label = urllib.parse.quote(target_label)
        rss_url = f"{base_url}/feeds/posts/default/-/{encoded_label}?alt=rss&max-results=1500"
        print(f"Filtering for Label: {target_label}")
    else:
        rss_url = f"{base_url}/feeds/posts/default?alt=rss&max-results=500"
        print("Fetching ALL posts...")

    print(f"Fetching data from: {rss_url}")

    try:
        response = requests.get(rss_url)
        if response.status_code != 200:
            print("Error: Shayad Label ka spelling ghalat hai ya posts nahi hain.")
            return []

        soup = BeautifulSoup(response.content, 'xml')
        items = soup.find_all('item')

        print(f"Total posts found: {len(items)}")

        data = []

        for item in items:
            title = item.title.text
            link = item.link.text
            description_html = item.description.text

            # --- 1. Image URL nikalna ---
            image_url = ""
            desc_soup = BeautifulSoup(description_html, 'html.parser')
            img_tag = desc_soup.find('img')

            if img_tag and 'src' in img_tag.attrs:
                image_url = img_tag['src']

            # --- 2. Description (Text) nikalna (New Logic) ---
            # HTML se sirf text nikalna
            raw_text = desc_soup.get_text(separator='\n')

            # Text ki safayi (extra spaces aur khali lines hatana)
            clean_lines = [line.strip() for line in raw_text.split('\n') if line.strip()]

            # Pehli 6 lines ko uthana aur jorna
            # Agar lines kam hain to saari utha lega
            selected_text = ' '.join(clean_lines[:6])

            # Pinterest Limit Check (Max 500 chars permitted)
            # Hum 490 rakhenge taake error na aye.
            if len(selected_text) > 490:
                final_description = selected_text[:485] + "..."
            else:
                final_description = selected_text

            # Agar description bilkul khali ho to title use karein
            if not final_description:
                final_description = f"{title} - Read complete novel online."

            # --- Data Save ---
            if image_url:
                data.append({
                    'Title': title,
                    'Media URL': image_url,
                    'Pinterest board': board_name,
                    'Description': final_description, # Ab yahan asli text ayega
                    'Link': link
                })

        return data

    except Exception as e:
        print(f"Error: {e}")
        return []

# Script run karna
posts_data = get_blogger_posts_for_pinterest(WEBSITE_URL, BOARD_NAME, TARGET_LABEL)

if posts_data:
    df = pd.DataFrame(posts_data)

    filename_label = TARGET_LABEL.replace(" ", "_") if TARGET_LABEL else "All_Posts"
    csv_filename = f'pinterest_upload_{filename_label}.csv'

    df.to_csv(csv_filename, index=False)

    print("\nSUCCESS! CSV file ban gayi hai.")
    print(f"File Name: {csv_filename}")
    print("Description mein ab post ki pehli 5-6 lines shamil hain.")
    print("Downloading start ho rahi hai...")

    files.download(csv_filename)

else:
    print("Koi data nahi mila. Label ki spelling check karein.")

Filtering for Label: Farhat Ishtiaq
Fetching data from: https://www.urdunovelbanks.com/feeds/posts/default/-/Farhat%20Ishtiaq?alt=rss&max-results=1500
Total posts found: 20

SUCCESS! CSV file ban gayi hai.
File Name: pinterest_upload_Farhat_Ishtiaq.csv
Description mein ab post ki pehli 5-6 lines shamil hain.
Downloading start ho rahi hai...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import urllib.parse
from google.colab import files  # <--- Ye naye line add ki hai auto-download ke liye

# --- SETTINGS ---
WEBSITE_URL = "https://www.urdunovelbanks.com/"
BOARD_NAME = "Alia Bukhari"
TARGET_LABEL = "Alia Bukhari"
# ----------------

def get_blogger_posts_for_pinterest(site_url, board_name, target_label):

    base_url = site_url.rstrip('/')

    if target_label:
        encoded_label = urllib.parse.quote(target_label)
        rss_url = f"{base_url}/feeds/posts/default/-/{encoded_label}?alt=rss&max-results=500"
        print(f"Filtering for Label: {target_label}")
    else:
        rss_url = f"{base_url}/feeds/posts/default?alt=rss&max-results=500"
        print("Fetching ALL posts...")

    print(f"Fetching data from: {rss_url}")

    try:
        response = requests.get(rss_url)
        if response.status_code != 200:
            print("Error: Shayad Label ka spelling ghalat hai ya posts nahi hain.")
            return []

        soup = BeautifulSoup(response.content, 'xml')
        items = soup.find_all('item')

        print(f"Total posts found: {len(items)}")

        data = []

        for item in items:
            title = item.title.text
            link = item.link.text
            description_html = item.description.text

            # Image URL nikalna
            image_url = ""
            desc_soup = BeautifulSoup(description_html, 'html.parser')
            img_tag = desc_soup.find('img')

            if img_tag and 'src' in img_tag.attrs:
                image_url = img_tag['src']

            if image_url:
                data.append({
                    'Title': title,
                    'Media URL': image_url,
                    'Pinterest board': board_name,
                    'Description': f"{title} - Read this novel online on Smart Urdu Novel Bank.",
                    'Link': link
                })

        return data

    except Exception as e:
        print(f"Error: {e}")
        return []

# Script run karna
posts_data = get_blogger_posts_for_pinterest(WEBSITE_URL, BOARD_NAME, TARGET_LABEL)

if posts_data:
    df = pd.DataFrame(posts_data)

    # File name set karna
    filename_label = TARGET_LABEL.replace(" ", "_") if TARGET_LABEL else "All_Posts"
    csv_filename = f'pinterest_upload_{filename_label}.csv'

    # CSV save karna
    df.to_csv(csv_filename, index=False)

    print("\nSUCCESS! CSV file ban gayi hai.")
    print(f"File Name: {csv_filename}")
    print("Downloading start ho rahi hai...")

    # --- AUTO DOWNLOAD COMMAND ---
    files.download(csv_filename)
    # -----------------------------

else:
    print("Koi data nahi mila. Label ki spelling check karein.")

Filtering for Label: Alia Bukhari
Fetching data from: https://www.urdunovelbanks.com/feeds/posts/default/-/Alia%20Bukhari?alt=rss&max-results=500
Total posts found: 13

SUCCESS! CSV file ban gayi hai.
File Name: pinterest_upload_Alia_Bukhari.csv
Downloading start ho rahi hai...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import urllib.parse # Label ke naam mein spaces handle karne ke liye

# --- SETTINGS (Yahan apni details dalein) ---
WEBSITE_URL = "https://www.urdunovelbanks.com/"
BOARD_NAME = "Alia Bukhari"  # Pinterest Board ka naam (Jis label ke liye bana rahe hain)

# NEECHE APNA LABEL LIKHEIN (Bohot Zaroori)
# Agar saari posts chahiye to isay khali chor dein: TARGET_LABEL = ""
TARGET_LABEL = "Alia Bukhari"
# --------------------------------------------

def get_blogger_posts_for_pinterest(site_url, board_name, target_label):

    # URL Banana (Label ke sath ya Label ke baghair)
    base_url = site_url.rstrip('/')

    if target_label:
        # Agar label diya gaya hai, to URL change ho jaye ga
        # Spaces ko %20 mein convert karna zaroori hai
        encoded_label = urllib.parse.quote(target_label)
        rss_url = f"{base_url}/feeds/posts/default/-/{encoded_label}?alt=rss&max-results=500"
        print(f"Filtering for Label: {target_label}")
    else:
        # Agar label khali hai to sab posts uthao
        rss_url = f"{base_url}/feeds/posts/default?alt=rss&max-results=500"
        print("Fetching ALL posts...")

    print(f"Fetching data from: {rss_url}")

    try:
        response = requests.get(rss_url)
        # Check agar label ghalat ho ya koi post na mile
        if response.status_code != 200:
            print("Error: Shayad Label ka spelling ghalat hai ya posts nahi hain.")
            return []

        soup = BeautifulSoup(response.content, 'xml')
        items = soup.find_all('item')

        print(f"Total posts found: {len(items)}")

        data = []

        for item in items:
            title = item.title.text
            link = item.link.text
            description_html = item.description.text

            # Image URL nikalna
            image_url = ""
            desc_soup = BeautifulSoup(description_html, 'html.parser')
            img_tag = desc_soup.find('img')

            if img_tag and 'src' in img_tag.attrs:
                image_url = img_tag['src']

            if image_url:
                data.append({
                    'Title': title,
                    'Media URL': image_url,
                    'Pinterest board': board_name,
                    'Description': f"{title} - Read this novel online on Smart Urdu Novel Bank.",
                    'Link': link
                })

        return data

    except Exception as e:
        print(f"Error: {e}")
        return []

# Script run karna
posts_data = get_blogger_posts_for_pinterest(WEBSITE_URL, BOARD_NAME, TARGET_LABEL)

if posts_data:
    df = pd.DataFrame(posts_data)

    # File ka naam label ke mutabiq rakhna taake mix na ho
    filename_label = TARGET_LABEL if TARGET_LABEL else "All_Posts"
    csv_filename = f'pinterest_upload_{filename_label}.csv'

    df.to_csv(csv_filename, index=False)

    print("\nSUCCESS! CSV file ban gayi hai.")
    print(f"File Name: {csv_filename}")
    print("Ab aap left side folder se ise download kar lein.")
else:
    print("Koi data nahi mila. Label ki spelling check karein.")