# Explore NSE BRSR Reports

In the last notebook, we extracted 1174 records for NSE. Let's explore the data. We will start with importing the data into a DF. 

In [1]:
import pandas as pd
import requests
import time

In [2]:
df = pd.read_excel('nse_data.xlsx')
df

Unnamed: 0,Company,Symbol,FromYear,ToYear,PDFURL,XBRLURL,SubmissionDate,Sector
0,Varun Beverages Limited,VBL,2024,2024,https://nsearchives.nseindia.com/corporate/VBL...,https://nsearchives.nseindia.com/corporate/xbr...,2025-03-11,Fast Moving Consumer Goods
1,Castrol India Limited,CASTROLIND,2024,2024,https://nsearchives.nseindia.com/corporate/CAS...,https://nsearchives.nseindia.com/corporate/xbr...,2025-02-25,Oil Gas & Consumable Fuels
2,Cyient Limited,CYIENT,2023,2024,https://nsearchives.nseindia.com/corporate/CYI...,https://nsearchives.nseindia.com/corporate/xbr...,2025-02-19,Information Technology
3,Siemens Limited,SIEMENS,2023,2024,https://nsearchives.nseindia.com/corporate/SIE...,https://nsearchives.nseindia.com/corporate/xbr...,2025-01-14,Capital Goods
4,Indraprastha Gas Limited,IGL,2023,2024,https://nsearchives.nseindia.com/corporate/IGL...,https://nsearchives.nseindia.com/corporate/xbr...,2024-12-17,Oil Gas & Consumable Fuels
...,...,...,...,...,...,...,...,...
1169,Huhtamaki India Limited,HUHTAMAKI,2023,2023,https://nsearchives.nseindia.com/corporate/HUH...,https://nsearchives.nseindia.com/corporate/xbr...,2024-04-30,Capital Goods
1170,Sanofi India Limited,SANOFI,2023,2023,https://nsearchives.nseindia.com/corporate/SAN...,https://nsearchives.nseindia.com/corporate/xbr...,2024-04-23,Healthcare
1171,Transformers And Rectifiers (India) Limited,TARIL,2023,2024,https://nsearchives.nseindia.com/corporate/TRI...,https://nsearchives.nseindia.com/corporate/xbr...,2024-04-20,Capital Goods
1172,Rain Industries Limited,RAIN,2023,2023,https://nsearchives.nseindia.com/corporate/RAI...,https://nsearchives.nseindia.com/corporate/xbr...,2024-04-18,Chemicals


We have these 1174 excel files downloaded as well. Let's view one of them and see what it looks like.

Let's add sectors for each company. We have the symbols for them anyway

In [3]:
class NSE:
    def __init__(self):
        # Define a single global header for all NSE requests
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
            'Accept-Encoding': 'gzip, deflate, br',
            'Accept-Language': 'en-US,en;q=0.9',
            'Cache-Control': 'max-age=0',
            'Referer': 'https://www.nseindia.com/',
            'Sec-Ch-Ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
            'Sec-Ch-Ua-Mobile': '?0',
            'Sec-Ch-Ua-Platform': '"Windows"',
            'Sec-Fetch-Dest': 'document',
            'Sec-Fetch-Mode': 'navigate',
            'Sec-Fetch-Site': 'same-origin',
            'Sec-Fetch-User': '?1',
            'Upgrade-Insecure-Requests': '1',
            'Connection': 'keep-alive'
        }
        self.session = self._create_session()
        self.initialize_session()

    def _create_session(self):
        session = requests.Session()
        session.headers.update(self.headers)
        return session

    def initialize_session(self, specific_symbol=None):
        """Initializes or refreshes the session cookies by visiting NSE."""
        max_retries = 3
        retry_delay = 2  # seconds
        
        for attempt in range(1, max_retries + 1):
            try:
                print(f"Initializing/Refreshing NSE session (attempt {attempt}/{max_retries})")
                
                # Reset session for fresh attempt if not the first try
                if attempt > 1:
                    print("Creating fresh session")
                    self.session = self._create_session()
                
                # Visit homepage to get cookies
                homepage_url = "https://www.nseindia.com/"
                response = self.session.get(homepage_url, timeout=15)
                response.raise_for_status()
                
                # Check if we received cookies
                if not self.session.cookies:
                    print(f"No cookies received from NSE homepage (attempt {attempt})")
                    time.sleep(retry_delay)
                    continue
                    
                print(f"Successfully accessed NSE homepage. Cookies: {dict(self.session.cookies)}")
                
                # Small delay to ensure cookies are processed
                time.sleep(1)
                
                if specific_symbol:
                    # Visit quote page for specific symbol
                    quote_url = f"https://www.nseindia.com/get-quotes/equity?symbol={specific_symbol.upper()}"
                    print(f"Accessing quote page: {quote_url}")
                    
                    # Update referrer
                    self.session.headers.update({'Referer': homepage_url})
                    
                    response = self.session.get(quote_url, timeout=15)
                    response.raise_for_status()
                    print(f"Successfully accessed quote page for {specific_symbol}")
                    
                    # Additional delay after accessing the quote page
                    time.sleep(1)
                
                return True
                
            except requests.exceptions.RequestException as e:
                print(f"Error initializing NSE session (attempt {attempt}): {e}")
                
                if attempt < max_retries:
                    print(f"Retrying in {retry_delay} seconds...")
                    time.sleep(retry_delay)
                    retry_delay *= 2  # Exponential backoff
                else:
                    print("All NSE session initialization attempts failed")
                    return False
        
        return False

    def get_sector(self, symbol):
        
        # First make sure we have a valid session
        if not self.initialize_session(symbol):
            print(f"Failed to initialize NSE session for {symbol}, retrying once more")
            time.sleep(2)
            if not self.initialize_session(symbol):
                print(f"Failed to establish NSE session after multiple attempts")
                return pd.DataFrame()
        
        nse_url = f"https://www.nseindia.com/api/quote-equity?symbol={symbol}"
        try:
            # Set the proper referer
            self.session.headers.update({'Referer': f'https://www.nseindia.com/get-quotes/equity?symbol={symbol}'})
            
            response = self.session.get(nse_url, timeout=15)
            
            # If we get an error, try refreshing the session once
            if response.status_code != 200:
                print(f"Failed to get NSE data (status {response.status_code}), refreshing session")
                self.initialize_session(symbol)
                time.sleep(2)  # Short delay
                self.session.headers.update({'Referer': f'https://www.nseindia.com/get-quotes/equity?symbol={symbol}'})
                response = self.session.get(nse_url, timeout=15)
            
            if response.status_code == 200:
                response_json = response.json()
                data = response_json.get("industryInfo", [])
                x = data['sector']
            return x
        
        except ValueError as e:
            print(f"Error parsing NSE JSON response: {e}")
            raise HTTPException(status_code=500, detail=f"Error parsing NSE JSON response: {e}")


In [4]:
nse=NSE()
# Define a safe wrapper around the sector fetch
def fetch_sector(symbol):
    try:
        return nse.get_sector(symbol)
    except Exception:
        return ""  # Leave it empty if it fails

Initializing/Refreshing NSE session (attempt 1/3)
Successfully accessed NSE homepage. Cookies: {'AKA_A2': 'A', '_abck': 'BC1B84494804F45C6762396BA76125DC~-1~YAAQxFI2F4ca8YaXAQAA61aakQ4g05Eqxe4hQjswVLXbmvjR6xxUp8Gcx33yRmjOQ3A2g9ahLOiun5qgxwwc4/SbuaVWoEJCyL4lFkrmFHzL3Lb3SpDWodgV0F2dtzYlSP2BfaXDDf7VDwm3a0MulVNvx/GdBz5jHjrbUW4/DnuIE3pQS8vXGEu0bu+/btO4ZWzHsbzj9GqkUrIl1K3ZjLLEXVoxCcPJJFY/27qZNxmbiIOex3XMxf7fc5dKb3hdM77CsxXQ6Klm0CC4f+jQpyTsmHXz+1fj24oi0ry/YbRAxgw/eGi3KznaZ0bSkZEbnQkT58PeD2g4QZZUybji0NqKrqEn0tC4n5PN2imGuW58XnKQp4GLFWbP+i5yx+VEzGc4yBb3FRXBX8jzHI6er5ks2mOPYj+Wt5H99PaK4TmtnVn9u46wZ97rkrEbTjvuHavl0KmG~-1~-1~-1', 'ak_bmsc': '2BDDB94877448702A47247EB56BBA3E6~000000000000000000000000000000~YAAQxFI2F4ga8YaXAQAA61aakRzHTiwhNsYlu8jmb77feBbIz1Ado3KEnRvfCr3cDDLoj5yCHK9bKiFizxe0+pYdsrKsHTXbuoNIfIb95C4ez6Zm7N/N+cha2eZhkMJpJ0eGQcYW1dFSKEtAhSk725oZb1VBHnrIfOXorhG4ZMJLIOKAQjbA/OZ2R8GytHkUmaVq2VecIaOlCgb7jgZR/d3NXU3RULgCRsnGwFKTMaJOvIwOVkvlaYb4Y6heZjVQZDZFDmBP6nPgEjrjcCbUVLo+8BdBicesTFkg5hm8Inz

In [5]:
fetch_sector("NPST")

Initializing/Refreshing NSE session (attempt 1/3)
Successfully accessed NSE homepage. Cookies: {'AKA_A2': 'A', '_abck': 'BC1B84494804F45C6762396BA76125DC~-1~YAAQxFI2F4ca8YaXAQAA61aakQ4g05Eqxe4hQjswVLXbmvjR6xxUp8Gcx33yRmjOQ3A2g9ahLOiun5qgxwwc4/SbuaVWoEJCyL4lFkrmFHzL3Lb3SpDWodgV0F2dtzYlSP2BfaXDDf7VDwm3a0MulVNvx/GdBz5jHjrbUW4/DnuIE3pQS8vXGEu0bu+/btO4ZWzHsbzj9GqkUrIl1K3ZjLLEXVoxCcPJJFY/27qZNxmbiIOex3XMxf7fc5dKb3hdM77CsxXQ6Klm0CC4f+jQpyTsmHXz+1fj24oi0ry/YbRAxgw/eGi3KznaZ0bSkZEbnQkT58PeD2g4QZZUybji0NqKrqEn0tC4n5PN2imGuW58XnKQp4GLFWbP+i5yx+VEzGc4yBb3FRXBX8jzHI6er5ks2mOPYj+Wt5H99PaK4TmtnVn9u46wZ97rkrEbTjvuHavl0KmG~-1~-1~-1', 'ak_bmsc': '2BDDB94877448702A47247EB56BBA3E6~000000000000000000000000000000~YAAQxFI2F4ga8YaXAQAA61aakRzHTiwhNsYlu8jmb77feBbIz1Ado3KEnRvfCr3cDDLoj5yCHK9bKiFizxe0+pYdsrKsHTXbuoNIfIb95C4ez6Zm7N/N+cha2eZhkMJpJ0eGQcYW1dFSKEtAhSk725oZb1VBHnrIfOXorhG4ZMJLIOKAQjbA/OZ2R8GytHkUmaVq2VecIaOlCgb7jgZR/d3NXU3RULgCRsnGwFKTMaJOvIwOVkvlaYb4Y6heZjVQZDZFDmBP6nPgEjrjcCbUVLo+8BdBicesTFkg5hm8Inz

'Information Technology'

In [None]:
# Ensure 'Sector' column exists
if 'Sector' not in df.columns:
    df['Sector'] = ''

# Identify rows to process (i.e. where Sector is empty)
to_process = df[df['Sector'] == ''].copy()
batch_size = 100

def fetch_sector(symbol):
    try:
        return nse.get_sector(symbol)
    except Exception:
        return ''

# Process in batches
for start in range(0, len(to_process), batch_size):
    end = start + batch_size
    batch_indices = to_process.iloc[start:end].index
    symbols = df.loc[batch_indices, 'Symbol']

    print(f"Processing rows {start} to {end - 1}")

    # Apply the sector fetching function
    sectors = symbols.apply(fetch_sector)

    # Update the main dataframe
    df.loc[batch_indices, 'Sector'] = sectors

    # Save intermediate result to avoid progress loss
    df.to_excel('nse_data.xlsx', index=False)

    time.sleep(1)  # slight pause if needed to avoid overloading the API


Processing rows 0 to 99
Initializing/Refreshing NSE session (attempt 1/3)
Successfully accessed NSE homepage. Cookies: {'AKA_A2': 'A', '_abck': 'BC1B84494804F45C6762396BA76125DC~-1~YAAQxFI2F4ca8YaXAQAA61aakQ4g05Eqxe4hQjswVLXbmvjR6xxUp8Gcx33yRmjOQ3A2g9ahLOiun5qgxwwc4/SbuaVWoEJCyL4lFkrmFHzL3Lb3SpDWodgV0F2dtzYlSP2BfaXDDf7VDwm3a0MulVNvx/GdBz5jHjrbUW4/DnuIE3pQS8vXGEu0bu+/btO4ZWzHsbzj9GqkUrIl1K3ZjLLEXVoxCcPJJFY/27qZNxmbiIOex3XMxf7fc5dKb3hdM77CsxXQ6Klm0CC4f+jQpyTsmHXz+1fj24oi0ry/YbRAxgw/eGi3KznaZ0bSkZEbnQkT58PeD2g4QZZUybji0NqKrqEn0tC4n5PN2imGuW58XnKQp4GLFWbP+i5yx+VEzGc4yBb3FRXBX8jzHI6er5ks2mOPYj+Wt5H99PaK4TmtnVn9u46wZ97rkrEbTjvuHavl0KmG~-1~-1~-1', 'ak_bmsc': '2BDDB94877448702A47247EB56BBA3E6~000000000000000000000000000000~YAAQxFI2F4ga8YaXAQAA61aakRzHTiwhNsYlu8jmb77feBbIz1Ado3KEnRvfCr3cDDLoj5yCHK9bKiFizxe0+pYdsrKsHTXbuoNIfIb95C4ez6Zm7N/N+cha2eZhkMJpJ0eGQcYW1dFSKEtAhSk725oZb1VBHnrIfOXorhG4ZMJLIOKAQjbA/OZ2R8GytHkUmaVq2VecIaOlCgb7jgZR/d3NXU3RULgCRsnGwFKTMaJOvIwOVkvlaYb4Y6heZjVQZDZFDmBP6nPgEjrjcCb

In [8]:
df

Unnamed: 0,Company,Symbol,FromYear,ToYear,PDFURL,XBRLURL,SubmissionDate,Sector
0,Varun Beverages Limited,VBL,2024,2024,https://nsearchives.nseindia.com/corporate/VBL...,https://nsearchives.nseindia.com/corporate/xbr...,2025-03-11,Fast Moving Consumer Goods
1,Castrol India Limited,CASTROLIND,2024,2024,https://nsearchives.nseindia.com/corporate/CAS...,https://nsearchives.nseindia.com/corporate/xbr...,2025-02-25,Oil Gas & Consumable Fuels
2,Cyient Limited,CYIENT,2023,2024,https://nsearchives.nseindia.com/corporate/CYI...,https://nsearchives.nseindia.com/corporate/xbr...,2025-02-19,Information Technology
3,Siemens Limited,SIEMENS,2023,2024,https://nsearchives.nseindia.com/corporate/SIE...,https://nsearchives.nseindia.com/corporate/xbr...,2025-01-14,Capital Goods
4,Indraprastha Gas Limited,IGL,2023,2024,https://nsearchives.nseindia.com/corporate/IGL...,https://nsearchives.nseindia.com/corporate/xbr...,2024-12-17,Oil Gas & Consumable Fuels
...,...,...,...,...,...,...,...,...
1169,Huhtamaki India Limited,HUHTAMAKI,2023,2023,https://nsearchives.nseindia.com/corporate/HUH...,https://nsearchives.nseindia.com/corporate/xbr...,2024-04-30,Capital Goods
1170,Sanofi India Limited,SANOFI,2023,2023,https://nsearchives.nseindia.com/corporate/SAN...,https://nsearchives.nseindia.com/corporate/xbr...,2024-04-23,Healthcare
1171,Transformers And Rectifiers (India) Limited,TARIL,2023,2024,https://nsearchives.nseindia.com/corporate/TRI...,https://nsearchives.nseindia.com/corporate/xbr...,2024-04-20,Capital Goods
1172,Rain Industries Limited,RAIN,2023,2023,https://nsearchives.nseindia.com/corporate/RAI...,https://nsearchives.nseindia.com/corporate/xbr...,2024-04-18,Chemicals


In [9]:
df[df['Sector']=='']['Company']

704      Infobeans Technologies Limited
717    The Jammu & Kashmir Bank Limited
779       Suven Pharmaceuticals Limited
Name: Company, dtype: object

In [11]:
def update_sector(df, company_name, new_sector):
    df.loc[df['Company'] == company_name, 'Sector'] = new_sector


In [12]:
update_sector(df, 'Infobeans Technologies Limited', 'Information Technology')
update_sector(df, 'The Jammu & Kashmir Bank Limited', 'Financial Services')
# Suven Pharma name changed - updated it manually 

In [13]:
df

Unnamed: 0,Company,Symbol,FromYear,ToYear,PDFURL,XBRLURL,SubmissionDate,Sector
0,Varun Beverages Limited,VBL,2024,2024,https://nsearchives.nseindia.com/corporate/VBL...,https://nsearchives.nseindia.com/corporate/xbr...,2025-03-11,Fast Moving Consumer Goods
1,Castrol India Limited,CASTROLIND,2024,2024,https://nsearchives.nseindia.com/corporate/CAS...,https://nsearchives.nseindia.com/corporate/xbr...,2025-02-25,Oil Gas & Consumable Fuels
2,Cyient Limited,CYIENT,2023,2024,https://nsearchives.nseindia.com/corporate/CYI...,https://nsearchives.nseindia.com/corporate/xbr...,2025-02-19,Information Technology
3,Siemens Limited,SIEMENS,2023,2024,https://nsearchives.nseindia.com/corporate/SIE...,https://nsearchives.nseindia.com/corporate/xbr...,2025-01-14,Capital Goods
4,Indraprastha Gas Limited,IGL,2023,2024,https://nsearchives.nseindia.com/corporate/IGL...,https://nsearchives.nseindia.com/corporate/xbr...,2024-12-17,Oil Gas & Consumable Fuels
...,...,...,...,...,...,...,...,...
1169,Huhtamaki India Limited,HUHTAMAKI,2023,2023,https://nsearchives.nseindia.com/corporate/HUH...,https://nsearchives.nseindia.com/corporate/xbr...,2024-04-30,Capital Goods
1170,Sanofi India Limited,SANOFI,2023,2023,https://nsearchives.nseindia.com/corporate/SAN...,https://nsearchives.nseindia.com/corporate/xbr...,2024-04-23,Healthcare
1171,Transformers And Rectifiers (India) Limited,TARIL,2023,2024,https://nsearchives.nseindia.com/corporate/TRI...,https://nsearchives.nseindia.com/corporate/xbr...,2024-04-20,Capital Goods
1172,Rain Industries Limited,RAIN,2023,2023,https://nsearchives.nseindia.com/corporate/RAI...,https://nsearchives.nseindia.com/corporate/xbr...,2024-04-18,Chemicals


In [4]:
empty_counts = (df.fillna('').applymap(str).applymap(str.strip) == '').sum()
print(empty_counts)
df[df.fillna('').applymap(str).applymap(str.strip).eq('').any(axis=1)]

Company           0
Symbol            0
FromYear          0
ToYear            0
PDFURL            0
XBRLURL           0
SubmissionDate    0
Sector            0
dtype: int64


  empty_counts = (df.fillna('').applymap(str).applymap(str.strip) == '').sum()
  df[df.fillna('').applymap(str).applymap(str.strip).eq('').any(axis=1)]


Unnamed: 0,Company,Symbol,FromYear,ToYear,PDFURL,XBRLURL,SubmissionDate,Sector
