In [0]:
import requests
from io import StringIO
import pandas as pd

def fetch_nse_equity_csv():
    # Start session
    session = requests.Session()

    # Add headers to mimic a real browser
    headers = {
        "User-Agent": (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/117.0.0.0 Safari/537.36"
        ),
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.5",
        "Referer": "https://www.nseindia.com/"
    }

    # Step 1: Hit NSE home page to set cookies
    session.get("https://www.nseindia.com", headers=headers, timeout=10)

    # Step 2: Download CSV with same headers + cookies
    url = "https://nsearchives.nseindia.com/content/equities/EQUITY_L.csv"
    response = session.get(url, headers=headers, timeout=30)
    return response.text

def fetch_nse_etf_csv():
    # Start session
    session = requests.Session()

    # Add headers to mimic a real browser
    headers = {
        "User-Agent": (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/117.0.0.0 Safari/537.36"
        ),
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.5",
        "Referer": "https://www.nseindia.com/"
    }

    # Step 1: Hit NSE home page to set cookies
    session.get("https://www.nseindia.com", headers=headers, timeout=10)

    # Step 2: Download CSV with same headers + cookies
    url = "https://nsearchives.nseindia.com/content/equities/eq_etfseclist.csv"
    response = session.get(url, headers=headers, timeout=30)
    return response.text

def fetch_nse_invit_csv():
    # Start session
    session = requests.Session()

    # Add headers to mimic a real browser
    headers = {
        "User-Agent": (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/117.0.0.0 Safari/537.36"
        ),
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.5",
        "Referer": "https://www.nseindia.com/"
    }

    # Step 1: Hit NSE home page to set cookies
    session.get("https://www.nseindia.com", headers=headers, timeout=10)

    # Step 2: Download CSV with same headers + cookies
    url = "https://nsearchives.nseindia.com/content/equities/INVITS_L.csv"
    response = session.get(url, headers=headers, timeout=30)
    return response.text

In [0]:
def load_csv_to_spark_df(csv_text):
    csv_data = StringIO(csv_text)
    pandas_df = pd.read_csv(csv_data)
    return spark.createDataFrame(pandas_df)

In [0]:
csv_text = fetch_nse_equity_csv()
df = load_csv_to_spark_df(csv_text)
spark_df = df.toDF(*[c.strip().replace(" ", "_").replace("(","").replace(")","") for c in df.columns])

In [0]:
spark_df.write.mode("overwrite").saveAsTable("personal_finance.bronze.company_listing_info")

In [0]:
csv_text = fetch_nse_etf_csv()
df = load_csv_to_spark_df(csv_text)
spark_df = df.toDF(*[c.strip().replace(" ", "_").replace("(","").replace(")","") for c in df.columns])

display(spark_df)

In [0]:
spark_df.write.mode("overwrite").saveAsTable("personal_finance.bronze.etf_listing_info")

In [0]:
csv_text = fetch_nse_invit_csv()
df = load_csv_to_spark_df('\n'.join(csv_text.splitlines()[:-2]))
spark_df = df.toDF(*[c.strip().replace(" ", "_").replace("(","").replace(")","") for c in df.columns])

display(spark_df)

In [0]:
spark_df.write.mode("overwrite").saveAsTable("personal_finance.bronze.invit_listing_info")