## Extracting Data from Excel files

### Extracting Profile details for all the companies

In [33]:
import os
import pandas as pd
import glob

folder_path = "excel_files"

# Get a list of all .xlsx file paths
xlsx_files = glob.glob(os.path.join(folder_path, "*.xlsx"))


In [48]:
from tqdm import tqdm
import pandas as pd
import os

data_list = []

# List all .xlsx files in the folder
excel_files = [f for f in os.listdir(folder_path) if f.endswith('.xlsx')]

for file_name in tqdm(excel_files, desc="Processing files", unit="file"):
    file_path = os.path.join(folder_path, file_name)
    df = pd.read_excel(file_path)

    # Helper function to extract a single value or blank
    def get_value(element):
        values = df.loc[df['Element Name'] == element, 'Fact Value'].values
        return values[0] if len(values) > 0 else ''

    # Extract all required fields
    corporate_identity = get_value('CorporateIdentityNumber')
    company_name = get_value('NameOfTheCompany')
    date_of_incorporation = get_value('DateOfIncorporation')
    regd_office_address = get_value('AddressOfRegisteredOfficeOfCompany')
    corp_office_address = get_value('AddressOfCorporateOfficeOfCompany')
    company_email = get_value('EMailOfTheCompany')
    company_phone = get_value('TelephoneOfCompany')
    company_website = get_value('WebsiteOfCompany')
    paid_up_value = get_value('ValueOfSharesPaidUp')
    contact_name = get_value('NameOfContactPerson')
    contact_number = get_value('ContactNumberOfContactPerson')
    contact_email = get_value('EMailOfContactPerson')

    # Handle multiple exchange listings
    exchange_vals = df.loc[
        df['Element Name'] == 'NameOfStockExchangeWhereTheCompanyIsListed', 'Fact Value'
    ].dropna().unique().tolist()

    exchange_listed = ' and '.join(exchange_vals) if exchange_vals else ''

    # Append to data list
    data_list.append([
        corporate_identity, company_name, date_of_incorporation,
        regd_office_address, corp_office_address, company_email,
        company_phone, company_website, paid_up_value,
        contact_name, contact_number, contact_email, exchange_listed
    ])



Processing files: 100%|██████████████████████████████████████████████████████████| 1174/1174 [06:46<00:00,  2.88file/s]


In [49]:
 profile_df = pd.DataFrame(data_list, columns=[
    'CIN', 'Company', 'Incorporation Date',
    'Registered Address', 'Corporate Address',
    'Company Email', 'Company Telephone', 'Company Website',
    'Paid up share capital', 'Contact Person Name',
    'Contact Person Number', 'Contact Person Email',
    'Listed On'
]).sort_values(by='Company')

In [90]:
profile_df.shape

(1174, 14)

In [91]:
empty_counts = (profile_df.fillna('').applymap(str).applymap(str.strip) == '').sum() 
print(empty_counts) 
profile_df[profile_df.fillna('').applymap(str).applymap(str.strip).eq('').any(axis=1)]

CIN                       0
Company                   0
Incorporation Date       12
Registered Address        0
Corporate Address         5
Company Email             0
Company Telephone         0
Company Website           0
Paid up share capital     0
Contact Person Name       0
Contact Person Number     0
Contact Person Email      0
Listed On                 4
company_lower             0
dtype: int64


  empty_counts = (profile_df.fillna('').applymap(str).applymap(str.strip) == '').sum()
  profile_df[profile_df.fillna('').applymap(str).applymap(str.strip).eq('').any(axis=1)]


Unnamed: 0,CIN,Company,Incorporation Date,Registered Address,Corporate Address,Company Email,Company Telephone,Company Website,Paid up share capital,Contact Person Name,Contact Person Number,Contact Person Email,Listed On,company_lower
10,L32202KA1949PLC032923,ABB India Limited,,"“Disha”, Corporate Office, 3rd Floor, Plot No....","“Disha”, Corporate Office, 3rd Floor, Plot No....",investor.helpdesk@in.abb.com,080 22949113 / 080 22949122,www.abb.com,423816800.0,Dhenuka Srinivasan,080-22949151,dhenuka.srinivasan@in.abb.com,BSE and NSE,abb india limited
54,L45400HR1985PLC021622,Anant Raj Limited,1985-07-30,"Plot No. CP-1, Sector-8 IMT Manesar, Gurugram-...",,manojpahwa@anantrajlimited.com,011-43034400,www.anantrajlimited.com,683781500.0,A.K. Prashar,011-43034426,ak.prashar@anantrajlimited.com,BSE and NSE,anant raj limited
272,L74899DL1995PLC065388,Dr. Lal Path Labs Limited,1995-02-14,"Block E, Sector-18, Rohini, New Delhi-110085","12th Floor, Tower B, SAS Tower, Medicity, Sect...",cs@lalpathlabs.com,+91-124-3016-500,www.lalpathlabs.com,834778500.0,Mr. Manoj Kumar Garg (Group Chief Human Resour...,+ 91-124-3016-500,manoj.garg@lalpathlabs.com,,dr. lal path labs limited
334,L24294PN1958PLC011052,Foseco India Limited,,"Foseco India Limited, Gat Nos. 922 and 923, Sa...","Foseco India Limited, Gat Nos. 922 and 923, Sa...",investor.grievance@vesuvius.com,02137 – 668100,www.fosecoindia.com,63864590.0,Mr. Mahendra Kumar Dutia,02137-668100,investor.grievance@vesuvius.com,BSE and NSE,foseco india limited
369,L15500MH1981PLC025809,Gm Breweries Limited,,"Ganesh Niwas Ground Floor, Veer Savarkar Marg,...","Ganesh Niwas Ground Floor, Veer Savarkar Marg,...",gmbreweries.cs@gmail.com,022-24331150,www.gmbreweries.com,182775400.0,Mr. Sandeep Kutchhi,(022) 24331150,investors_complaints@gmbreweries.com,BSE and NSE,gm breweries limited
381,L15400GJ2009PLC058781,Gopal Snacks Limited,2009-12-07,"PLOT NO. G2322, G2323 & G2324, GIDC METODA TAL...",,cs@gopalsnacks.com,9924271217,https://www.gopalnamkeen.com,124604400.0,Mayur Popatbhai Gangani,91 99242 71217,cs@gopalsnacks.com,BSE and NSE,gopal snacks limited
453,L74110UP2008PLC034977,Hma Agro Industries Limited,2008-04-09,"18A/5/3 TAJVIEW CROSSING FATEHABAD ROAD, Agra,...",,cs@hmaagro.com,+91 7217018161,www.hmagroup.co,500769800.0,Mr. Nikhil Sundrani,+91 7217018161,cs@hmaagro.com,BSE and NSE,hma agro industries limited
461,L21011MH1950FLC145537,Huhtamaki India Limited,,"A-802, Crescenzo, C-38/39, G Block, Bandra-Kur...","7th Floor, Bellona, The Walk, Hiranandani Esta...",investor.communication@huhtamaki.com,+91 (022) 6174 0100,www.flexibles.huhtamaki.in,151100000.0,Mr. Abhijaat Sinha,+91 (022) 6174 0100,abhijaat.sinha@huhtamaki.com,NSE and BSE,huhtamaki india limited
509,L99999GJ1976PLC018945,Inox India Limited,,9TH FLOOR K P PLATINA RACE COURSE VADODARA 390007,9TH FLOOR K P PLATINA RACE COURSE VADODARA 390007,inox@inoxcva.com,+91 (265)6160100,www.inoxcva.com,181527000.0,"Mr. Deepak Acharya, CEO",+91 9824089963,deepak.acharya@inoxcva.com,BSE and NSE,inox india limited
728,L92111DL1988PLC033099,New Delhi Television Limited,1988-09-08,"W-17, 2nd Floor, Greater Kailash-I, New Delhi ...",,secretarial@ndtv.com,+91 11- 4157 7777,www.ndtv.com,257885100.0,Parinita Bhutani Duggal,Company Secretary and Compliance Officer,secretarial@ndtv.com,BSE and NSE,new delhi television limited


In [52]:
profile_df

Unnamed: 0,CIN,Company,Incorporation Date,Registered Address,Corporate Address,Company Email,Company Telephone,Company Website,Paid up share capital,Contact Person Name,Contact Person Number,Contact Person Email,Listed On
0,L74140MH2008PLC177884,360 One Wam Limited,2008-01-17,"360 ONE Centre, Kamala City, Senapati Bapat Ma...","360 ONE Centre, Kamala City, Senapati Bapat Ma...",sustainability@360.one,+91-22-48765600,www.360.one,358862640,"Mr. Rohit Bhase, Company Secretary & Complianc...",+91-22-48765600,sustainability@360.one,BSE and NSE
1,L67120MH1993PLC074411,3I Infotech Limited,1993-10-11,"Tower # 5, International Infotech Park, Vashi ...","Tower 2, 6th Floor, E Wing, Seawoods Grand Cen...",investors@3i-infotech.com,+91-22-7123 8000,www.3i-infotech.com,169230842,Mrs. Varika Rastogi,+91-22-7123 8000,compliance@3i-infotech.com,BSE and NSE
2,L31300KA1987PLC013543,3M India Limited,1987-07-04,"Plot Nos. 48-51, Electronics City, Hosur Road,...","WeWork Prestige Central, 3rd floor, 36 Infantr...",investorhelpdesk.in@mmm.com,+91-80-2223 1414,https://www.3mindia.in,112650700,Ms. Smitha Gopalkrishnan,+91-80-2223 1414,sgopalkrishnan@mmm.com,BSE and NSE
3,L67190MH2007PLC289249,5paisa Capital Limited,2007-07-10,"IIFL House, Sun Infotech Park, Road No. 16V, B...","IIFL House, Sun Infotech Park, Road No. 16V, B...",csteam@5paisa.com,022-41035000,www.5paisa.com,311909730,Namita Godbole,022 41035000,csteam@5paisa.com,NSE and BSE
4,L29142TN1988PLC015586,63 Moons Technologies Limited,1988-04-12,"Shakti Tower -II, 4th Floor, Premises -J 766, ...","FT Tower, CTS Nos.256 & 257, Suren Road, Andhe...",info@63moons.com,02266868010,www.63moons.com,92157074,Hariraj Chouhan,02266868010,info@63moons.com,BSE and NSE
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1171,L24231GJ2000PLC038352,Zota Health Care Limited,2000-07-12,ZOTA HOUSE 2/896 HIRA MODI STREET SAGRAMPURA S...,"ZOTA HOUSE, BHAGWAN AIYAPPA COMPLEX, NEXT TO B...",info@zotahealthcare.com,+91 261 2331601,www.zotahealthcare.com,258473270,CS ASHVIN VARIYA,+91 261 2331601,cszota@zotahealthcare.com,NSE
1172,L24230GJ1995PLC025878,Zydus Lifesciences Limited,1995-05-15,"""Zydus Corporate Park"", Scheme No. 63, Survey ...","""Zydus Corporate Park"", Scheme No. 63, Survey ...",dhavalsoni@zyduslife.com,"+917948040000, +917971800000",www.zyduslife.com,1006233990,Mr. Vishal J. Gor,+917948040235,vishalgor@zyduslife.com,BSE and NSE
1173,L15201GJ1994PLC023490,Zydus Wellness Limited,1994-11-01,"Zydus Corporate Park, Scheme No. 63, Survey No...","Zydus Corporate Park, Scheme No. 63, Survey No...",investor.grievance@zyduswellness.com,079-48040000,www.zyduswellness.com,636321440,Mr. Umesh V Parikh CFO,"079-48040000, Ext. No. 1330",umesh.parikh@zyduswellness.com,BSE and NSE
279,L72200MH2000PLC125319,eClerx Services Limited,2000-03-24,"SONAWALA BUILDING, 1ST FLOOR, 29 BANK STREET F...","4th Floor, Express Towers, Nariman Point, Mumb...",investor@eclerx.com,+91 (022) 6614 8301,www.eclerx.com,490253590,Srinivasan Nadadhur,+91 (022) 6614 8301,esg@eclerx.com,BSE and NSE


### Data Cleaning
From observation, we see that the names of companies are not in proper case in some instances. Also, the amil IDs and websites are also not propely in lowercase. The share capital is also stored as a string. Let's fix that.

In [92]:
profile_df['Company Email'] = profile_df['Company Email'].astype(str).str.lower()
profile_df['Contact Person Email'] = profile_df['Contact Person Email'].astype(str).str.lower()
profile_df['Company Website'] = profile_df['Company Website'].astype(str).str.lower()

import re

def clean_numeric(val):
    if pd.isna(val):
        return None
    val = str(val)
    val = re.sub(r'[^\d.]', '', val)  # remove everything except digits and dots
    return pd.to_numeric(val, errors='coerce')

profile_df['Paid up share capital'] = profile_df['Paid up share capital'].apply(clean_numeric)

# Now see what couldn't be converted
invalid_rows = profile_df[profile_df['Paid up share capital'].isna()]
if not invalid_rows.empty:
    print("\nStill invalid entries:")
    print(invalid_rows[['Company', 'Paid up share capital']])

In [93]:
profile_df.to_excel('profile.xlsx', index=False)

### Add sectors
We must add sector to the df because the analysis is sector-wise. Sectors for each of the companies are present in nse_data.xlsx that we extarcted earlier.

In [94]:
profile_df

Unnamed: 0,CIN,Company,Incorporation Date,Registered Address,Corporate Address,Company Email,Company Telephone,Company Website,Paid up share capital,Contact Person Name,Contact Person Number,Contact Person Email,Listed On,company_lower
0,L74140MH2008PLC177884,360 One Wam Limited,2008-01-17,"360 ONE Centre, Kamala City, Senapati Bapat Ma...","360 ONE Centre, Kamala City, Senapati Bapat Ma...",sustainability@360.one,+91-22-48765600,www.360.one,3.588626e+08,"Mr. Rohit Bhase, Company Secretary & Complianc...",+91-22-48765600,sustainability@360.one,BSE and NSE,360 one wam limited
1,L67120MH1993PLC074411,3I Infotech Limited,1993-10-11,"Tower # 5, International Infotech Park, Vashi ...","Tower 2, 6th Floor, E Wing, Seawoods Grand Cen...",investors@3i-infotech.com,+91-22-7123 8000,www.3i-infotech.com,1.692308e+08,Mrs. Varika Rastogi,+91-22-7123 8000,compliance@3i-infotech.com,BSE and NSE,3i infotech limited
2,L31300KA1987PLC013543,3M India Limited,1987-07-04,"Plot Nos. 48-51, Electronics City, Hosur Road,...","WeWork Prestige Central, 3rd floor, 36 Infantr...",investorhelpdesk.in@mmm.com,+91-80-2223 1414,https://www.3mindia.in,1.126507e+08,Ms. Smitha Gopalkrishnan,+91-80-2223 1414,sgopalkrishnan@mmm.com,BSE and NSE,3m india limited
3,L67190MH2007PLC289249,5paisa Capital Limited,2007-07-10,"IIFL House, Sun Infotech Park, Road No. 16V, B...","IIFL House, Sun Infotech Park, Road No. 16V, B...",csteam@5paisa.com,022-41035000,www.5paisa.com,3.119097e+08,Namita Godbole,022 41035000,csteam@5paisa.com,NSE and BSE,5paisa capital limited
4,L29142TN1988PLC015586,63 Moons Technologies Limited,1988-04-12,"Shakti Tower -II, 4th Floor, Premises -J 766, ...","FT Tower, CTS Nos.256 & 257, Suren Road, Andhe...",info@63moons.com,02266868010,www.63moons.com,9.215707e+07,Hariraj Chouhan,02266868010,info@63moons.com,BSE and NSE,63 moons technologies limited
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1171,L24231GJ2000PLC038352,Zota Health Care Limited,2000-07-12,ZOTA HOUSE 2/896 HIRA MODI STREET SAGRAMPURA S...,"ZOTA HOUSE, BHAGWAN AIYAPPA COMPLEX, NEXT TO B...",info@zotahealthcare.com,+91 261 2331601,www.zotahealthcare.com,2.584733e+08,CS ASHVIN VARIYA,+91 261 2331601,cszota@zotahealthcare.com,NSE,zota health care limited
1172,L24230GJ1995PLC025878,Zydus Lifesciences Limited,1995-05-15,"""Zydus Corporate Park"", Scheme No. 63, Survey ...","""Zydus Corporate Park"", Scheme No. 63, Survey ...",dhavalsoni@zyduslife.com,"+917948040000, +917971800000",www.zyduslife.com,1.006234e+09,Mr. Vishal J. Gor,+917948040235,vishalgor@zyduslife.com,BSE and NSE,zydus lifesciences limited
1173,L15201GJ1994PLC023490,Zydus Wellness Limited,1994-11-01,"Zydus Corporate Park, Scheme No. 63, Survey No...","Zydus Corporate Park, Scheme No. 63, Survey No...",investor.grievance@zyduswellness.com,079-48040000,www.zyduswellness.com,6.363214e+08,Mr. Umesh V Parikh CFO,"079-48040000, Ext. No. 1330",umesh.parikh@zyduswellness.com,BSE and NSE,zydus wellness limited
279,L72200MH2000PLC125319,eClerx Services Limited,2000-03-24,"SONAWALA BUILDING, 1ST FLOOR, 29 BANK STREET F...","4th Floor, Express Towers, Nariman Point, Mumb...",investor@eclerx.com,+91 (022) 6614 8301,www.eclerx.com,4.902536e+08,Srinivasan Nadadhur,+91 (022) 6614 8301,esg@eclerx.com,BSE and NSE,eclerx services limited


So, let's do this: Take in data from the nse_data.xlsx file which was downloaded from NSE website for BRSR. Then compare the names of companies and match them with the Symbol and add them to the profile_df.

In [95]:
nse_data = pd.read_excel('nse_data.xlsx')
nse_data

Unnamed: 0,Company,Symbol,FromYear,ToYear,PDFURL,XBRLURL,SubmissionDate,Sector
0,Varun Beverages Limited,VBL,2024,2024,https://nsearchives.nseindia.com/corporate/VBL...,https://nsearchives.nseindia.com/corporate/xbr...,2025-03-11,Fast Moving Consumer Goods
1,Castrol India Limited,CASTROLIND,2024,2024,https://nsearchives.nseindia.com/corporate/CAS...,https://nsearchives.nseindia.com/corporate/xbr...,2025-02-25,Oil Gas & Consumable Fuels
2,Cyient Limited,CYIENT,2023,2024,https://nsearchives.nseindia.com/corporate/CYI...,https://nsearchives.nseindia.com/corporate/xbr...,2025-02-19,Information Technology
3,Siemens Limited,SIEMENS,2023,2024,https://nsearchives.nseindia.com/corporate/SIE...,https://nsearchives.nseindia.com/corporate/xbr...,2025-01-14,Capital Goods
4,Indraprastha Gas Limited,IGL,2023,2024,https://nsearchives.nseindia.com/corporate/IGL...,https://nsearchives.nseindia.com/corporate/xbr...,2024-12-17,Oil Gas & Consumable Fuels
...,...,...,...,...,...,...,...,...
1169,Huhtamaki India Limited,HUHTAMAKI,2023,2023,https://nsearchives.nseindia.com/corporate/HUH...,https://nsearchives.nseindia.com/corporate/xbr...,2024-04-30,Capital Goods
1170,Sanofi India Limited,SANOFI,2023,2023,https://nsearchives.nseindia.com/corporate/SAN...,https://nsearchives.nseindia.com/corporate/xbr...,2024-04-23,Healthcare
1171,Transformers and Rectifiers (India) Limited,TARIL,2023,2024,https://nsearchives.nseindia.com/corporate/TRI...,https://nsearchives.nseindia.com/corporate/xbr...,2024-04-20,Capital Goods
1172,Rain Industries Limited,RAIN,2023,2023,https://nsearchives.nseindia.com/corporate/RAI...,https://nsearchives.nseindia.com/corporate/xbr...,2024-04-18,Chemicals


In [96]:
# Create helper columns with lowercase names
profile_df['company_lower'] = profile_df['Company'].str.lower().str.strip()
nse_data['company_lower'] = nse_data['Company'].str.lower().str.strip()

# Merge using lowercase names, but keep original casing from profile_df
merged_df = profile_df.merge(
    nse_data[['company_lower', 'Symbol']],
    on='company_lower',
    how='left'
)

# Display companies where Sector was not matched
unmatched = merged_df[merged_df['Symbol'].isna()]
if not unmatched.empty:
    print("\nCompanies with no matching sector:")
    print(unmatched[['Company']].drop_duplicates().sort_values('Company'))

# Drop helper column if no longer needed
merged_df.drop(columns=['company_lower'], inplace=True)



Companies with no matching sector:
                                     Company
19                    Abans Holdings Limited
38                   Agro Tech Foods Limited
51               Allsec Technologies Limited
56                      Ami Organics Limited
199    Century Textiles & Industries Limited
299       Equinox India Developments Limited
366           Glenmark Life Sciences Limited
409                HBL Power Systems Limited
471                  IIFL Securities Limited
616            Lakshmi Machine Works Limited
941     Shreyas Shipping & Logistics Limited
1005             Suven Life Sciences Limited
1071  Tide Water Oil Company (India) Limited
1075                 Tips Industries Limited
1168                          Zomato Limited


In [97]:
merged_df

Unnamed: 0,CIN,Company,Incorporation Date,Registered Address,Corporate Address,Company Email,Company Telephone,Company Website,Paid up share capital,Contact Person Name,Contact Person Number,Contact Person Email,Listed On,Symbol
0,L74140MH2008PLC177884,360 One Wam Limited,2008-01-17,"360 ONE Centre, Kamala City, Senapati Bapat Ma...","360 ONE Centre, Kamala City, Senapati Bapat Ma...",sustainability@360.one,+91-22-48765600,www.360.one,3.588626e+08,"Mr. Rohit Bhase, Company Secretary & Complianc...",+91-22-48765600,sustainability@360.one,BSE and NSE,360ONE
1,L67120MH1993PLC074411,3I Infotech Limited,1993-10-11,"Tower # 5, International Infotech Park, Vashi ...","Tower 2, 6th Floor, E Wing, Seawoods Grand Cen...",investors@3i-infotech.com,+91-22-7123 8000,www.3i-infotech.com,1.692308e+08,Mrs. Varika Rastogi,+91-22-7123 8000,compliance@3i-infotech.com,BSE and NSE,3IINFOLTD
2,L31300KA1987PLC013543,3M India Limited,1987-07-04,"Plot Nos. 48-51, Electronics City, Hosur Road,...","WeWork Prestige Central, 3rd floor, 36 Infantr...",investorhelpdesk.in@mmm.com,+91-80-2223 1414,https://www.3mindia.in,1.126507e+08,Ms. Smitha Gopalkrishnan,+91-80-2223 1414,sgopalkrishnan@mmm.com,BSE and NSE,3MINDIA
3,L67190MH2007PLC289249,5paisa Capital Limited,2007-07-10,"IIFL House, Sun Infotech Park, Road No. 16V, B...","IIFL House, Sun Infotech Park, Road No. 16V, B...",csteam@5paisa.com,022-41035000,www.5paisa.com,3.119097e+08,Namita Godbole,022 41035000,csteam@5paisa.com,NSE and BSE,5PAISA
4,L29142TN1988PLC015586,63 Moons Technologies Limited,1988-04-12,"Shakti Tower -II, 4th Floor, Premises -J 766, ...","FT Tower, CTS Nos.256 & 257, Suren Road, Andhe...",info@63moons.com,02266868010,www.63moons.com,9.215707e+07,Hariraj Chouhan,02266868010,info@63moons.com,BSE and NSE,63MOONS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1169,L24231GJ2000PLC038352,Zota Health Care Limited,2000-07-12,ZOTA HOUSE 2/896 HIRA MODI STREET SAGRAMPURA S...,"ZOTA HOUSE, BHAGWAN AIYAPPA COMPLEX, NEXT TO B...",info@zotahealthcare.com,+91 261 2331601,www.zotahealthcare.com,2.584733e+08,CS ASHVIN VARIYA,+91 261 2331601,cszota@zotahealthcare.com,NSE,ZOTA
1170,L24230GJ1995PLC025878,Zydus Lifesciences Limited,1995-05-15,"""Zydus Corporate Park"", Scheme No. 63, Survey ...","""Zydus Corporate Park"", Scheme No. 63, Survey ...",dhavalsoni@zyduslife.com,"+917948040000, +917971800000",www.zyduslife.com,1.006234e+09,Mr. Vishal J. Gor,+917948040235,vishalgor@zyduslife.com,BSE and NSE,ZYDUSLIFE
1171,L15201GJ1994PLC023490,Zydus Wellness Limited,1994-11-01,"Zydus Corporate Park, Scheme No. 63, Survey No...","Zydus Corporate Park, Scheme No. 63, Survey No...",investor.grievance@zyduswellness.com,079-48040000,www.zyduswellness.com,6.363214e+08,Mr. Umesh V Parikh CFO,"079-48040000, Ext. No. 1330",umesh.parikh@zyduswellness.com,BSE and NSE,ZYDUSWELL
1172,L72200MH2000PLC125319,eClerx Services Limited,2000-03-24,"SONAWALA BUILDING, 1ST FLOOR, 29 BANK STREET F...","4th Floor, Express Towers, Nariman Point, Mumb...",investor@eclerx.com,+91 (022) 6614 8301,www.eclerx.com,4.902536e+08,Srinivasan Nadadhur,+91 (022) 6614 8301,esg@eclerx.com,BSE and NSE,ECLERX


As it turns out,

**Abans Holdings Limited** changed its name to **Abans Financial Services Limited** and its symbol was changed to **AFSL**.

**Agro Tech Foods Limited** changed its name to **Sundrop Brands Limited** and its symbol changed from **ATFL** to **SUNDROP**.

**Allsec Technologies Limited** changed its name to **Alldigi Tech Limited** and its symbol changed from **ALLSEC** to **ALLDIGI**.

**Ami Organics Limited** is changing its name to **Acutaas Chemicals Limited**, and its symbol will change from **AMIORG** to **ACUTAAS**, effective **June 2, 2025**.

**Century Textiles & Industries Limited** changed its name to **Aditya Birla Real Estate Limited** and its symbol was changed to **ABREL**.

**Equinox India Developments Limited** changed its name to **Embassy Developments Limited** and its symbol changed from **EMBDL** to **EMBASSY**.

**Glenmark Life Sciences Limited** changed its name to **Alivus Life Sciences Limited** and its symbol changed from **GLS** to **ALIVUS**, effective **January 20, 2025**.

**HBL Power Systems Limited** changed its name to **HBL Engineering Limited** and its symbol changed from **HBLPOWER** to **HBLENGINE**, effective **December 23, 2024**.

**IIFL Securities Limited** changed its name to **IIFL Capital Services Limited** and its symbol changed from **IIFLSEC** to **IIFLCAPS**, effective **December 3, 2024**.

**Lakshmi Machine Works Limited** changed its name to **LMW Limited** and its symbol changed from **LAXMIMACH** to **LMW**, effective **October 10, 2024**.

**Shreyas Shipping & Logistics Limited** changed its name to **Transworld Shipping Lines Limited** and its symbol changed from **SHREYAS** to **TRANSWORLD**, effective **October 23, 2024**.

**Suven Life Sciences Limited** changed its name to **Cohance Lifesciences Limited** and its symbol changed from **SUVENPHAR** to **COHANCE**, effective **May 7, 2025**.

**Tide Water Oil Company (India) Limited** changed its name to **Veedol Corporation Limited** and its symbol changed from **TWO** to **VEEDOL**, effective **October 9, 2024**.

**Tips Industries Limited** changed its name to **Tips Music Limited** and its symbol changed from **TIPSINDLTD** to **TIPSMUSIC**, effective **September 30, 2024**.

**Zomato Limited** changed its name to **Eternal Limited** and its symbol changed from **ZOMATO** to **ETERNAL**, effective **April 9, 2025**


In [98]:
merged_df.loc[merged_df["Company"] == "Abans Holdings Limited", "Symbol"] = "AFSL"
merged_df.loc[merged_df["Company"] == "Agro Tech Foods Limited", "Symbol"] = "SUNDROP"
merged_df.loc[merged_df["Company"] == "Allsec Technologies Limited", "Symbol"] = "ALLDIGI"
merged_df.loc[merged_df["Company"] == "Ami Organics Limited", "Symbol"] = "ACUTAAS"
merged_df.loc[merged_df["Company"] == "Century Textiles & Industries Limited", "Symbol"] = "ABREL"
merged_df.loc[merged_df["Company"] == "Equinox India Developments Limited", "Symbol"] = "EMBASSY"
merged_df.loc[merged_df["Company"] == "Glenmark Life Sciences Limited", "Symbol"] = "ALIVUS"
merged_df.loc[merged_df["Company"] == "HBL Power Systems Limited", "Symbol"] = "HBLENGINE"
merged_df.loc[merged_df["Company"] == "IIFL Securities Limited", "Symbol"] = "IIFLCAPS"
merged_df.loc[merged_df["Company"] == "Lakshmi Machine Works Limited", "Symbol"] = "LMW"
merged_df.loc[merged_df["Company"] == "Shreyas Shipping & Logistics Limited", "Symbol"] = "TRANSWORLD"
merged_df.loc[merged_df["Company"] == "Suven Life Sciences Limited", "Symbol"] = "COHANCE"
merged_df.loc[merged_df["Company"] == "Tide Water Oil Company (India) Limited", "Symbol"] = "VEEDOL"
merged_df.loc[merged_df["Company"] == "Tips Industries Limited", "Symbol"] = "TIPSMUSIC"
merged_df.loc[merged_df["Company"] == "Zomato Limited", "Symbol"] = "ETERNAL"


In [99]:
empty_counts = (merged_df.fillna('').applymap(str).applymap(str.strip) == '').sum() 
print(empty_counts) 
merged_df[merged_df.fillna('').applymap(str).applymap(str.strip).eq('').any(axis=1)]

CIN                       0
Company                   0
Incorporation Date       12
Registered Address        0
Corporate Address         5
Company Email             0
Company Telephone         0
Company Website           0
Paid up share capital     0
Contact Person Name       0
Contact Person Number     0
Contact Person Email      0
Listed On                 4
Symbol                    0
dtype: int64


  empty_counts = (merged_df.fillna('').applymap(str).applymap(str.strip) == '').sum()
  merged_df[merged_df.fillna('').applymap(str).applymap(str.strip).eq('').any(axis=1)]


Unnamed: 0,CIN,Company,Incorporation Date,Registered Address,Corporate Address,Company Email,Company Telephone,Company Website,Paid up share capital,Contact Person Name,Contact Person Number,Contact Person Email,Listed On,Symbol
5,L32202KA1949PLC032923,ABB India Limited,,"“Disha”, Corporate Office, 3rd Floor, Plot No....","“Disha”, Corporate Office, 3rd Floor, Plot No....",investor.helpdesk@in.abb.com,080 22949113 / 080 22949122,www.abb.com,423816800.0,Dhenuka Srinivasan,080-22949151,dhenuka.srinivasan@in.abb.com,BSE and NSE,ABB
59,L45400HR1985PLC021622,Anant Raj Limited,1985-07-30,"Plot No. CP-1, Sector-8 IMT Manesar, Gurugram-...",,manojpahwa@anantrajlimited.com,011-43034400,www.anantrajlimited.com,683781500.0,A.K. Prashar,011-43034426,ak.prashar@anantrajlimited.com,BSE and NSE,ANANTRAJ
271,L74899DL1995PLC065388,Dr. Lal Path Labs Limited,1995-02-14,"Block E, Sector-18, Rohini, New Delhi-110085","12th Floor, Tower B, SAS Tower, Medicity, Sect...",cs@lalpathlabs.com,+91-124-3016-500,www.lalpathlabs.com,834778500.0,Mr. Manoj Kumar Garg (Group Chief Human Resour...,+ 91-124-3016-500,manoj.garg@lalpathlabs.com,,LALPATHLAB
333,L24294PN1958PLC011052,Foseco India Limited,,"Foseco India Limited, Gat Nos. 922 and 923, Sa...","Foseco India Limited, Gat Nos. 922 and 923, Sa...",investor.grievance@vesuvius.com,02137 – 668100,www.fosecoindia.com,63864590.0,Mr. Mahendra Kumar Dutia,02137-668100,investor.grievance@vesuvius.com,BSE and NSE,FOSECOIND
371,L15500MH1981PLC025809,Gm Breweries Limited,,"Ganesh Niwas Ground Floor, Veer Savarkar Marg,...","Ganesh Niwas Ground Floor, Veer Savarkar Marg,...",gmbreweries.cs@gmail.com,022-24331150,www.gmbreweries.com,182775400.0,Mr. Sandeep Kutchhi,(022) 24331150,investors_complaints@gmbreweries.com,BSE and NSE,GMBREW
383,L15400GJ2009PLC058781,Gopal Snacks Limited,2009-12-07,"PLOT NO. G2322, G2323 & G2324, GIDC METODA TAL...",,cs@gopalsnacks.com,9924271217,https://www.gopalnamkeen.com,124604400.0,Mayur Popatbhai Gangani,91 99242 71217,cs@gopalsnacks.com,BSE and NSE,GOPAL
453,L74110UP2008PLC034977,Hma Agro Industries Limited,2008-04-09,"18A/5/3 TAJVIEW CROSSING FATEHABAD ROAD, Agra,...",,cs@hmaagro.com,+91 7217018161,www.hmagroup.co,500769800.0,Mr. Nikhil Sundrani,+91 7217018161,cs@hmaagro.com,BSE and NSE,HMAAGRO
459,L21011MH1950FLC145537,Huhtamaki India Limited,,"A-802, Crescenzo, C-38/39, G Block, Bandra-Kur...","7th Floor, Bellona, The Walk, Hiranandani Esta...",investor.communication@huhtamaki.com,+91 (022) 6174 0100,www.flexibles.huhtamaki.in,151100000.0,Mr. Abhijaat Sinha,+91 (022) 6174 0100,abhijaat.sinha@huhtamaki.com,NSE and BSE,HUHTAMAKI
513,L99999GJ1976PLC018945,Inox India Limited,,9TH FLOOR K P PLATINA RACE COURSE VADODARA 390007,9TH FLOOR K P PLATINA RACE COURSE VADODARA 390007,inox@inoxcva.com,+91 (265)6160100,www.inoxcva.com,181527000.0,"Mr. Deepak Acharya, CEO",+91 9824089963,deepak.acharya@inoxcva.com,BSE and NSE,INOXINDIA
730,L92111DL1988PLC033099,New Delhi Television Limited,1988-09-08,"W-17, 2nd Floor, Greater Kailash-I, New Delhi ...",,secretarial@ndtv.com,+91 11- 4157 7777,www.ndtv.com,257885100.0,Parinita Bhutani Duggal,Company Secretary and Compliance Officer,secretarial@ndtv.com,BSE and NSE,NDTV


### Add Sector Details using Symbol

In [109]:
import requests
import time
import pandas as pd
from functools import lru_cache

class NSE:
    def __init__(self, verbose=True):
        self.verbose = verbose
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
            'Accept-Encoding': 'gzip, deflate, br',
            'Accept-Language': 'en-US,en;q=0.9',
            'Cache-Control': 'max-age=0',
            'Referer': 'https://www.nseindia.com/',
            'Sec-Ch-Ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
            'Sec-Ch-Ua-Mobile': '?0',
            'Sec-Ch-Ua-Platform': '"Windows"',
            'Sec-Fetch-Dest': 'document',
            'Sec-Fetch-Mode': 'navigate',
            'Sec-Fetch-Site': 'same-origin',
            'Sec-Fetch-User': '?1',
            'Upgrade-Insecure-Requests': '1',
            'Connection': 'keep-alive'
        }
        self.session = self._create_session()
        self.initialize_session()

    def _log(self, message):
        if self.verbose:
            print(message)

    def _create_session(self):
        session = requests.Session()
        session.headers.update(self.headers)
        return session

    def initialize_session(self, specific_symbol=None):
        max_retries = 3
        retry_delay = 2

        for attempt in range(1, max_retries + 1):
            try:
                self._log(f"[{attempt}] Initializing NSE session")
                if attempt > 1:
                    self.session = self._create_session()

                response = self.session.get("https://www.nseindia.com/", timeout=10)
                response.raise_for_status()

                if not self.session.cookies:
                    self._log("No cookies received.")
                    time.sleep(retry_delay)
                    continue

                time.sleep(0.5)  # Small buffer

                if specific_symbol:
                    quote_url = f"https://www.nseindia.com/get-quotes/equity?symbol={specific_symbol.upper()}"
                    self.session.headers.update({'Referer': "https://www.nseindia.com/"})
                    quote_response = self.session.get(quote_url, timeout=10)
                    quote_response.raise_for_status()
                    time.sleep(0.5)

                return True

            except requests.RequestException as e:
                self._log(f"Session error: {e}")
                if attempt < max_retries:
                    time.sleep(retry_delay)
                    retry_delay *= 2
                else:
                    return False
        return False

    @lru_cache(maxsize=None)
    def get_sector_cached(self, symbol):
        return self._get_sector(symbol)

    def _get_sector(self, symbol):
        if not self.initialize_session(symbol):
            self._log(f"Retrying NSE session for {symbol}...")
            time.sleep(1)
            if not self.initialize_session(symbol):
                self._log("Final session retry failed.")
                return {}

        url = f"https://www.nseindia.com/api/quote-equity?symbol={symbol}"
        try:
            self.session.headers.update({'Referer': f'https://www.nseindia.com/get-quotes/equity?symbol={symbol}'})
            response = self.session.get(url, timeout=10)

            if response.status_code != 200:
                self._log(f"Retrying fetch for {symbol}, status {response.status_code}")
                if not self.initialize_session(symbol):
                    return {}
                time.sleep(1)
                response = self.session.get(url, timeout=10)

            if response.status_code == 200:
                data = response.json().get("industryInfo", [])
                return data
            else:
                self._log(f"Failed to fetch NSE data for {symbol} after retry.")
                return {}

        except Exception as e:
            self._log(f"Failed to parse data for {symbol}: {e}")
            return {}


In [110]:
x = nse.get_sector("RELIANCE")

Initializing/Refreshing NSE session (attempt 1/3)
Error initializing NSE session (attempt 1): ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Retrying in 2 seconds...
Initializing/Refreshing NSE session (attempt 2/3)
Creating fresh session
Accessing quote page: https://www.nseindia.com/get-quotes/equity?symbol=RELIANCE
Successfully accessed quote page for RELIANCE
{'macro': 'Energy', 'sector': 'Oil Gas & Consumable Fuels', 'industry': 'Petroleum Products', 'basicIndustry': 'Refineries & Marketing'}


In [111]:
x = nse.get_sector("ITI")

Initializing/Refreshing NSE session (attempt 1/3)
Accessing quote page: https://www.nseindia.com/get-quotes/equity?symbol=ITI
Successfully accessed quote page for ITI
{'macro': 'Telecommunication', 'sector': 'Telecommunication', 'industry': 'Telecom - Equipment & Accessories', 'basicIndustry': 'Telecom - Equipment & Accessories'}


In [112]:
x = nse.get_sector("SBIN")

Initializing/Refreshing NSE session (attempt 1/3)
Accessing quote page: https://www.nseindia.com/get-quotes/equity?symbol=SBIN
Successfully accessed quote page for SBIN
{'macro': 'Financial Services', 'sector': 'Financial Services', 'industry': 'Banks', 'basicIndustry': 'Public Sector Bank'}


So let's add all 4 of these to all the rows

In [106]:
merged_df = profile_df

In [113]:
# List to store missing company names
missing_companies = []

# Updated extractor with skip and log
def extract_sector_info_with_check(row):
    symbol = row["Symbol"]
    company = row["Company"]
    try:
        sector_data = nse.get_sector(symbol)
        if not sector_data:
            missing_companies.append(company)
            return pd.Series([None] * 4)
        return pd.Series([
            sector_data.get("macro", ""),
            sector_data.get("sector", ""),
            sector_data.get("industry", ""),
            sector_data.get("basicIndustry", "")
        ])
    except Exception:
        missing_companies.append(company)
        return pd.Series([None] * 4)

# Apply function
merged_df[["Macro Sector", "Sector", "Industry", "Basic Industry"]] = merged_df.apply(
    extract_sector_info_with_check, axis=1
)

# Print names of skipped companies
if missing_companies:
    print("Skipped the following companies due to missing sector information:")
    for company in missing_companies:
        print("-", company)


Initializing/Refreshing NSE session (attempt 1/3)
Accessing quote page: https://www.nseindia.com/get-quotes/equity?symbol=360ONE
Successfully accessed quote page for 360ONE
{'macro': 'Financial Services', 'sector': 'Financial Services', 'industry': 'Capital Markets', 'basicIndustry': 'Stockbroking & Allied'}
Initializing/Refreshing NSE session (attempt 1/3)
Accessing quote page: https://www.nseindia.com/get-quotes/equity?symbol=3IINFOLTD
Successfully accessed quote page for 3IINFOLTD
{'macro': 'Information Technology', 'sector': 'Information Technology', 'industry': 'IT - Software', 'basicIndustry': 'Computers - Software & Consulting'}
Initializing/Refreshing NSE session (attempt 1/3)
Accessing quote page: https://www.nseindia.com/get-quotes/equity?symbol=3MINDIA
Successfully accessed quote page for 3MINDIA
{'macro': 'Diversified', 'sector': 'Diversified', 'industry': 'Diversified', 'basicIndustry': 'Diversified'}
Initializing/Refreshing NSE session (attempt 1/3)
Accessing quote page: 

In [114]:
merged_df

Unnamed: 0,CIN,Company,Incorporation Date,Registered Address,Corporate Address,Company Email,Company Telephone,Company Website,Paid up share capital,Contact Person Name,Contact Person Number,Contact Person Email,Listed On,Symbol,Macro Sector,Sector,Industry,Basic Industry
0,L74140MH2008PLC177884,360 One Wam Limited,2008-01-17,"360 ONE Centre, Kamala City, Senapati Bapat Ma...","360 ONE Centre, Kamala City, Senapati Bapat Ma...",sustainability@360.one,+91-22-48765600,www.360.one,3.588626e+08,"Mr. Rohit Bhase, Company Secretary & Complianc...",+91-22-48765600,sustainability@360.one,BSE and NSE,360ONE,Financial Services,Financial Services,Capital Markets,Stockbroking & Allied
1,L67120MH1993PLC074411,3I Infotech Limited,1993-10-11,"Tower # 5, International Infotech Park, Vashi ...","Tower 2, 6th Floor, E Wing, Seawoods Grand Cen...",investors@3i-infotech.com,+91-22-7123 8000,www.3i-infotech.com,1.692308e+08,Mrs. Varika Rastogi,+91-22-7123 8000,compliance@3i-infotech.com,BSE and NSE,3IINFOLTD,Information Technology,Information Technology,IT - Software,Computers - Software & Consulting
2,L31300KA1987PLC013543,3M India Limited,1987-07-04,"Plot Nos. 48-51, Electronics City, Hosur Road,...","WeWork Prestige Central, 3rd floor, 36 Infantr...",investorhelpdesk.in@mmm.com,+91-80-2223 1414,https://www.3mindia.in,1.126507e+08,Ms. Smitha Gopalkrishnan,+91-80-2223 1414,sgopalkrishnan@mmm.com,BSE and NSE,3MINDIA,Diversified,Diversified,Diversified,Diversified
3,L67190MH2007PLC289249,5paisa Capital Limited,2007-07-10,"IIFL House, Sun Infotech Park, Road No. 16V, B...","IIFL House, Sun Infotech Park, Road No. 16V, B...",csteam@5paisa.com,022-41035000,www.5paisa.com,3.119097e+08,Namita Godbole,022 41035000,csteam@5paisa.com,NSE and BSE,5PAISA,Financial Services,Financial Services,Capital Markets,Stockbroking & Allied
4,L29142TN1988PLC015586,63 Moons Technologies Limited,1988-04-12,"Shakti Tower -II, 4th Floor, Premises -J 766, ...","FT Tower, CTS Nos.256 & 257, Suren Road, Andhe...",info@63moons.com,02266868010,www.63moons.com,9.215707e+07,Hariraj Chouhan,02266868010,info@63moons.com,BSE and NSE,63MOONS,Information Technology,Information Technology,IT - Software,Computers - Software & Consulting
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1169,L24231GJ2000PLC038352,Zota Health Care Limited,2000-07-12,ZOTA HOUSE 2/896 HIRA MODI STREET SAGRAMPURA S...,"ZOTA HOUSE, BHAGWAN AIYAPPA COMPLEX, NEXT TO B...",info@zotahealthcare.com,+91 261 2331601,www.zotahealthcare.com,2.584733e+08,CS ASHVIN VARIYA,+91 261 2331601,cszota@zotahealthcare.com,NSE,ZOTA,Healthcare,Healthcare,Pharmaceuticals & Biotechnology,Pharmaceuticals
1170,L24230GJ1995PLC025878,Zydus Lifesciences Limited,1995-05-15,"""Zydus Corporate Park"", Scheme No. 63, Survey ...","""Zydus Corporate Park"", Scheme No. 63, Survey ...",dhavalsoni@zyduslife.com,"+917948040000, +917971800000",www.zyduslife.com,1.006234e+09,Mr. Vishal J. Gor,+917948040235,vishalgor@zyduslife.com,BSE and NSE,ZYDUSLIFE,Healthcare,Healthcare,Pharmaceuticals & Biotechnology,Pharmaceuticals
1171,L15201GJ1994PLC023490,Zydus Wellness Limited,1994-11-01,"Zydus Corporate Park, Scheme No. 63, Survey No...","Zydus Corporate Park, Scheme No. 63, Survey No...",investor.grievance@zyduswellness.com,079-48040000,www.zyduswellness.com,6.363214e+08,Mr. Umesh V Parikh CFO,"079-48040000, Ext. No. 1330",umesh.parikh@zyduswellness.com,BSE and NSE,ZYDUSWELL,Fast Moving Consumer Goods,Fast Moving Consumer Goods,Food Products,Packaged Foods
1172,L72200MH2000PLC125319,eClerx Services Limited,2000-03-24,"SONAWALA BUILDING, 1ST FLOOR, 29 BANK STREET F...","4th Floor, Express Towers, Nariman Point, Mumb...",investor@eclerx.com,+91 (022) 6614 8301,www.eclerx.com,4.902536e+08,Srinivasan Nadadhur,+91 (022) 6614 8301,esg@eclerx.com,BSE and NSE,ECLERX,Services,Services,Commercial Services & Supplies,Business Process Outsourcing (BPO)/ Knowledge ...


In [115]:
merged_df.to_excel('final_profile.xlsx', index=False)

In [116]:
# The remaining errors I fixed manually.