## Extracting Data from Excel files

### Extracting Profile details for all the companies

In [1]:
import os
import pandas as pd
import glob

folder_path = "excel_files"

# Get a list of all .xlsx file paths
xlsx_files = glob.glob(os.path.join(folder_path, "*.xlsx"))


In [2]:
from tqdm import tqdm
import pandas as pd
import os

data_list = []

# List all .xlsx files in the folder
excel_files = [f for f in os.listdir(folder_path) if f.endswith('.xlsx')]

for file_name in tqdm(excel_files, desc="Processing files", unit="file"):
    file_path = os.path.join(folder_path, file_name)
    df = pd.read_excel(file_path)

    # Helper function to extract a single value or blank
    def get_value(element):
        values = df.loc[df['Element Name'] == element, 'Fact Value'].values
        return values[0] if len(values) > 0 else ''

    # Extract all required fields
    corporate_identity = get_value('CorporateIdentityNumber')
    company_name = get_value('NameOfTheCompany')
    date_of_incorporation = get_value('DateOfIncorporation')
    regd_office_address = get_value('AddressOfRegisteredOfficeOfCompany')
    corp_office_address = get_value('AddressOfCorporateOfficeOfCompany')
    company_email = get_value('EMailOfTheCompany')
    company_phone = get_value('TelephoneOfCompany')
    company_website = get_value('WebsiteOfCompany')
    paid_up_value = get_value('ValueOfSharesPaidUp')
    contact_name = get_value('NameOfContactPerson')
    contact_number = get_value('ContactNumberOfContactPerson')
    contact_email = get_value('EMailOfContactPerson')

    # Handle multiple exchange listings
    exchange_vals = df.loc[
        df['Element Name'] == 'NameOfStockExchangeWhereTheCompanyIsListed', 'Fact Value'
    ].dropna().unique().tolist()

    exchange_listed = ' and '.join(exchange_vals) if exchange_vals else ''

    # Append to data list
    data_list.append([
        corporate_identity, company_name, date_of_incorporation,
        regd_office_address, corp_office_address, company_email,
        company_phone, company_website, paid_up_value,
        contact_name, contact_number, contact_email, exchange_listed
    ])



Processing files:   0%|▎                                                            | 5/1174 [00:02<10:24,  1.87file/s]


KeyboardInterrupt: 

In [20]:
# Create DataFrame
final_df = pd.DataFrame(data_list, columns=[
    'Corporate Identity Number', 'Name Of The Company', 'Date Of Incorporation',
    'Address Of Registered Office Of Company', 'Address Of Corporate Office Of Company',
    'EMail Of The Company', 'Telephone Of Company', 'Website Of Company', 'Value Of Shares Paid Up',
    'Name Of Contact Person', 'Contact Number Of Contact Person', 'EMail Of Contact Person',
    'Stock Exchanges Where Listed'
]).sort_values(by='Name Of The Company')

In [21]:
final_df.shape

(1174, 13)

In [22]:
empty_counts = (df.fillna('').applymap(str).applymap(str.strip) == '').sum() 
print(empty_counts) 
df[df.fillna('').applymap(str).applymap(str.strip).eq('').any(axis=1)]

Corporate Identity Number                   0
Name Of The Company                         0
Date Of Incorporation                      12
Address Of Registered Office Of Company     0
Address Of Corporate Office Of Company      5
EMail Of The Company                        0
Telephone Of Company                        0
Website Of Company                          0
Value Of Shares Paid Up                     0
Name Of Contact Person                      0
Contact Number Of Contact Person            0
EMail Of Contact Person                     0
Stock Exchanges Where Listed                4
dtype: int64


  empty_counts = (df.fillna('').applymap(str).applymap(str.strip) == '').sum()
  df[df.fillna('').applymap(str).applymap(str.strip).eq('').any(axis=1)]


Unnamed: 0,Corporate Identity Number,Name Of The Company,Date Of Incorporation,Address Of Registered Office Of Company,Address Of Corporate Office Of Company,EMail Of The Company,Telephone Of Company,Website Of Company,Value Of Shares Paid Up,Name Of Contact Person,Contact Number Of Contact Person,EMail Of Contact Person,Stock Exchanges Where Listed
10,L32202KA1949PLC032923,ABB India Limited,,"“Disha”, Corporate Office, 3rd Floor, Plot No....","“Disha”, Corporate Office, 3rd Floor, Plot No....",investor.helpdesk@in.abb.com,080 22949113 / 080 22949122,www.abb.com,423816750,Dhenuka Srinivasan,080-22949151,dhenuka.srinivasan@in.abb.com,BSE and NSE
54,L45400HR1985PLC021622,Anant Raj Limited,1985-07-30,"Plot No. CP-1, Sector-8 IMT Manesar, Gurugram-...",,manojpahwa@anantrajlimited.com,011-43034400,www.anantrajlimited.com,683781506,A.K. Prashar,011-43034426,ak.prashar@anantrajlimited.com,BSE and NSE
272,L74899DL1995PLC065388,DR. LAL PATH LABS LTD.,1995-02-14,"Block E, Sector-18, Rohini, New Delhi-110085","12th Floor, Tower B, SAS Tower, Medicity, Sect...",cs@lalpathlabs.com,+91-124-3016-500,www.lalpathlabs.com,834778520,Mr. Manoj Kumar Garg (Group Chief Human Resour...,+ 91-124-3016-500,manoj.garg@lalpathlabs.com,
334,L24294PN1958PLC011052,FOSECO INDIA LIMITED,,"Foseco India Limited, Gat Nos. 922 and 923, Sa...","Foseco India Limited, Gat Nos. 922 and 923, Sa...",investor.grievance@vesuvius.com,02137 – 668100,www.fosecoindia.com,63864590,Mr. Mahendra Kumar Dutia,02137-668100,investor.grievance@vesuvius.com,BSE and NSE
369,L15500MH1981PLC025809,GM BREWERIES LIMITED,,"Ganesh Niwas Ground Floor, Veer Savarkar Marg,...","Ganesh Niwas Ground Floor, Veer Savarkar Marg,...",gmbreweries.cs@gmail.com,022-24331150,www.gmbreweries.com,182775380,Mr. Sandeep Kutchhi,(022) 24331150,investors_complaints@gmbreweries.com,BSE and NSE
381,L15400GJ2009PLC058781,GOPAL SNACKS LIMITED,2009-12-07,"PLOT NO. G2322, G2323 & G2324, GIDC METODA TAL...",,cs@gopalsnacks.com,9924271217,https://www.gopalnamkeen.com,124604370,Mayur Popatbhai Gangani,91 99242 71217,cs@gopalsnacks.com,BSE and NSE
453,L74110UP2008PLC034977,HMA AGRO INDUSTRIES LIMITED,2008-04-09,"18A/5/3 TAJVIEW CROSSING FATEHABAD ROAD, Agra,...",,cs@hmaagro.com,+91 7217018161,www.hmagroup.co,500769770,Mr. Nikhil Sundrani,+91 7217018161,cs@hmaagro.com,BSE and NSE
461,L21011MH1950FLC145537,Huhtamaki India Limited,,"A-802, Crescenzo, C-38/39, G Block, Bandra-Kur...","7th Floor, Bellona, The Walk, Hiranandani Esta...",investor.communication@huhtamaki.com,+91 (022) 6174 0100,www.flexibles.huhtamaki.in,151100000,Mr. Abhijaat Sinha,+91 (022) 6174 0100,abhijaat.sinha@huhtamaki.com,NSE and BSE
509,L99999GJ1976PLC018945,INOX INDIA LIMITED,,9TH FLOOR K P PLATINA RACE COURSE VADODARA 390007,9TH FLOOR K P PLATINA RACE COURSE VADODARA 390007,inox@inoxcva.com,+91 (265)6160100,www.inoxcva.com,181527000,"Mr. Deepak Acharya, CEO",+91 9824089963,deepak.acharya@inoxcva.com,BSE and NSE
728,L92111DL1988PLC033099,New Delhi Television Limited,1988-09-08,"W-17, 2nd Floor, Greater Kailash-I, New Delhi ...",,secretarial@ndtv.com,+91 11- 4157 7777,www.ndtv.com,257885068,Parinita Bhutani Duggal,Company Secretary and Compliance Officer,secretarial@ndtv.com,BSE and NSE


In [23]:
final_df.rename(columns={
    'Corporate Identity Number': 'CIN',
    'Name Of The Company': 'Company',
    'Date Of Incorporation': 'Incorporation Date',
    'Address Of Registered Office Of Company': 'Registered Address',
    'Address Of Corporate Office Of Company': 'Corporate Address',
    'EMail Of The Company': 'Company Email',
    'Telephone Of Company': 'Company Telephone',
    'Website Of Company': 'Company Website',
    'Value Of Shares Paid Up': 'Paid up share capital',
    'Name Of Contact Person': 'Contact Person Name',
    'Contact Number Of Contact Person': 'Contact Person Number',
    'EMail Of Contact Person': 'Contact Person Email',
    'Stock Exchanges Where Listed': 'Listed On'
}, inplace=True)

In [24]:
final_df

Unnamed: 0,CIN,Company,Incorporation Date,Registered Address,Corporate Address,Company Email,Company Telephone,Company Website,Paid up share capital,Contact Person Name,Contact Person Number,Contact Person Email,Listed On
0,L74140MH2008PLC177884,360 ONE WAM LIMITED,2008-01-17,"360 ONE Centre, Kamala City, Senapati Bapat Ma...","360 ONE Centre, Kamala City, Senapati Bapat Ma...",sustainability@360.one,+91-22-48765600,www.360.one,358862640,"Mr. Rohit Bhase, Company Secretary & Complianc...",+91-22-48765600,sustainability@360.one,BSE and NSE
1,L67120MH1993PLC074411,3I Infotech Limited,1993-10-11,"Tower # 5, International Infotech Park, Vashi ...","Tower 2, 6th Floor, E Wing, Seawoods Grand Cen...",investors@3i-infotech.com,+91-22-7123 8000,www.3i-infotech.com,169230842,Mrs. Varika Rastogi,+91-22-7123 8000,compliance@3i-infotech.com,BSE and NSE
2,L31300KA1987PLC013543,3M INDIA LIMITED,1987-07-04,"Plot Nos. 48-51, Electronics City, Hosur Road,...","WeWork Prestige Central, 3rd floor, 36 Infantr...",investorhelpdesk.in@mmm.com,+91-80-2223 1414,https://www.3mindia.in,112650700,Ms. Smitha Gopalkrishnan,+91-80-2223 1414,sgopalkrishnan@mmm.com,BSE and NSE
3,L67190MH2007PLC289249,5paisa Capital Limited,2007-07-10,"IIFL House, Sun Infotech Park, Road No. 16V, B...","IIFL House, Sun Infotech Park, Road No. 16V, B...",csteam@5paisa.com,022-41035000,www.5paisa.com,311909730,Namita Godbole,022 41035000,csteam@5paisa.com,NSE and BSE
4,L29142TN1988PLC015586,63 moons technologies limited,1988-04-12,"Shakti Tower -II, 4th Floor, Premises -J 766, ...","FT Tower, CTS Nos.256 & 257, Suren Road, Andhe...",info@63moons.com,02266868010,www.63moons.com,92157074,Hariraj Chouhan,02266868010,info@63moons.com,BSE and NSE
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1172,L24230GJ1995PLC025878,Zydus Lifesciences Limited,1995-05-15,"""Zydus Corporate Park"", Scheme No. 63, Survey ...","""Zydus Corporate Park"", Scheme No. 63, Survey ...",dhavalsoni@zyduslife.com,"+917948040000, +917971800000",www.zyduslife.com,1006233990,Mr. Vishal J. Gor,+917948040235,vishalgor@zyduslife.com,BSE and NSE
1173,L15201GJ1994PLC023490,Zydus Wellness Limited,1994-11-01,"Zydus Corporate Park, Scheme No. 63, Survey No...","Zydus Corporate Park, Scheme No. 63, Survey No...",investor.grievance@zyduswellness.com,079-48040000,www.zyduswellness.com,636321440,Mr. Umesh V Parikh CFO,"079-48040000, Ext. No. 1330",umesh.parikh@zyduswellness.com,BSE and NSE
279,L72200MH2000PLC125319,eClerx Services Limited,2000-03-24,"SONAWALA BUILDING, 1ST FLOOR, 29 BANK STREET F...","4th Floor, Express Towers, Nariman Point, Mumb...",investor@eclerx.com,+91 (022) 6614 8301,www.eclerx.com,490253590,Srinivasan Nadadhur,+91 (022) 6614 8301,esg@eclerx.com,BSE and NSE
447,L23201MH1952GOI008858,hindustan petroleum corporation limited,1952-07-05,"Petroleum House, 17, Jamshedji Tata Road, Chur...","Petroleum House, 17, Jamshedji Tata Road, Chur...",corphqo@hpcl.in,022-22863900,www.hindustanpetroleum.com,14185500000,P S Murty,022 - 22884723,corporatehse@mail.hpcl.co.in,BSE and NSE


### Data Cleaning
From observation, we see that the names of companies are not in proper case in some instances. Also, the amil IDs and websites are also not propely in lowercase. The share capital is also stored as a string. Let's fix that.

In [25]:
# 1. Company names: convert to title case only if the entire string is uppercase
final_df['Company'] = final_df['Company'].apply(
    lambda x: x.title() if isinstance(x, str) else x
)

# 2. Lowercase email and website columns
final_df['Contact Person Email'] = final_df['Contact Person Email'].astype(str).str.lower()
final_df['Company Website'] = final_df['Company Website'].astype(str).str.lower()

# 3. Attempt to convert 'Paid up share capital' to numeric, capture errors
import pandas as pd

errors = []

for i, val in final_df['Paid up share capital'].items():
    try:
        _ = pd.to_numeric(val)
    except Exception:
        errors.append(final_df.at[i, 'Company'])

# Display companies where conversion failed
if errors:
    print("\nCompanies with invalid 'Paid up share capital' values:")
    for name in set(errors):
        print(name)

In [16]:
import pandas as pd
final_df = pd.read_excel('profile.xlsx')
sector_df = pd.read_excel('nse_data.xlsx')

### Add sectors
We must add sector to the df because the analysis is sector-wise. Sectors for each of the companies are present in nse_data.xlsx that we extarcted earlier.

In [17]:
import re

def clean_name(name):
    if not isinstance(name, str):
        return ''
    # Lowercase and remove common noise
    name = name.lower()
    name = re.sub(r'[.,]', '', name)            # remove periods and commas
    name = re.sub(r'\s+', ' ', name).strip()    # remove extra spaces
    name = name.replace('&', 'and')             # unify ampersands
    return name.title()                         # return in title case

# Clean both dataframes
final_df['Company_clean'] = final_df['Company'].apply(clean_name)
sector_df['Company_clean'] = sector_df['Company'].apply(clean_name)

# Merge on the cleaned name
final_df = final_df.merge(
    sector_df[['Company_clean', 'Sector']],
    on='Company_clean',
    how='left'
)

# Drop the helper column if not needed
final_df.drop(columns=['Company_clean'], inplace=True)


In [18]:
final_df

Unnamed: 0,CIN,Company,Incorporation Date,Registered Address,Corporate Address,Company Email,Company Telephone,Company Website,Paid up share capital,Contact Person Name,Contact Person Number,Contact Person Email,Listed On,Sector
0,L74140MH2008PLC177884,360 One Wam Limited,2008-01-17,"360 ONE Centre, Kamala City, Senapati Bapat Ma...","360 ONE Centre, Kamala City, Senapati Bapat Ma...",sustainability@360.one,+91-22-48765600,www.360.one,3.588626e+08,"Mr. Rohit Bhase, Company Secretary & Complianc...",+91-22-48765600,sustainability@360.one,BSE and NSE,Financial Services
1,L67120MH1993PLC074411,3I Infotech Limited,1993-10-11,"Tower # 5, International Infotech Park, Vashi ...","Tower 2, 6th Floor, E Wing, Seawoods Grand Cen...",investors@3i-infotech.com,+91-22-7123 8000,www.3i-infotech.com,1.692308e+08,Mrs. Varika Rastogi,+91-22-7123 8000,compliance@3i-infotech.com,BSE and NSE,Information Technology
2,L31300KA1987PLC013543,3M India Limited,1987-07-04,"Plot Nos. 48-51, Electronics City, Hosur Road,...","WeWork Prestige Central, 3rd floor, 36 Infantr...",investorhelpdesk.in@mmm.com,+91-80-2223 1414,https://www.3mindia.in,1.126507e+08,Ms. Smitha Gopalkrishnan,+91-80-2223 1414,sgopalkrishnan@mmm.com,BSE and NSE,Diversified
3,L67190MH2007PLC289249,5Paisa Capital Limited,2007-07-10,"IIFL House, Sun Infotech Park, Road No. 16V, B...","IIFL House, Sun Infotech Park, Road No. 16V, B...",csteam@5paisa.com,022-41035000,www.5paisa.com,3.119097e+08,Namita Godbole,022 41035000,csteam@5paisa.com,NSE and BSE,Financial Services
4,L29142TN1988PLC015586,63 Moons Technologies Limited,1988-04-12,"Shakti Tower -II, 4th Floor, Premises -J 766, ...","FT Tower, CTS Nos.256 & 257, Suren Road, Andhe...",info@63moons.com,02266868010,www.63moons.com,9.215707e+07,Hariraj Chouhan,02266868010,info@63moons.com,BSE and NSE,Information Technology
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1169,L24230GJ1995PLC025878,Zydus Lifesciences Limited,1995-05-15,"""Zydus Corporate Park"", Scheme No. 63, Survey ...","""Zydus Corporate Park"", Scheme No. 63, Survey ...",dhavalsoni@zyduslife.com,"+917948040000, +917971800000",www.zyduslife.com,1.006234e+09,Mr. Vishal J. Gor,+917948040235,vishalgor@zyduslife.com,BSE and NSE,Healthcare
1170,L15201GJ1994PLC023490,Zydus Wellness Limited,1994-11-01,"Zydus Corporate Park, Scheme No. 63, Survey No...","Zydus Corporate Park, Scheme No. 63, Survey No...",investor.grievance@zyduswellness.com,079-48040000,www.zyduswellness.com,6.363214e+08,Mr. Umesh V Parikh CFO,"079-48040000, Ext. No. 1330",umesh.parikh@zyduswellness.com,BSE and NSE,Fast Moving Consumer Goods
1171,L72200MH2000PLC125319,Eclerx Services Limited,2000-03-24,"SONAWALA BUILDING, 1ST FLOOR, 29 BANK STREET F...","4th Floor, Express Towers, Nariman Point, Mumb...",investor@eclerx.com,+91 (022) 6614 8301,www.eclerx.com,4.902536e+08,Srinivasan Nadadhur,+91 (022) 6614 8301,esg@eclerx.com,BSE and NSE,Services
1172,L23201MH1952GOI008858,Hindustan Petroleum Corporation Limited,1952-07-05,"Petroleum House, 17, Jamshedji Tata Road, Chur...","Petroleum House, 17, Jamshedji Tata Road, Chur...",corphqo@hpcl.in,022-22863900,www.hindustanpetroleum.com,1.418550e+10,P S Murty,022 - 22884723,corporatehse@mail.hpcl.co.in,BSE and NSE,Oil Gas & Consumable Fuels


In [19]:
final_df.to_excel('profile.xlsx', index=False)

In [20]:
import sqlite3
df = pd.read_excel('profile.xlsx')

conn = sqlite3.connect('brsr.sqlite')

df.to_sql('profile', conn, if_exists='replace', index=False)

1174

In [21]:
tables = pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table';", conn)
print(tables['name'].tolist())

['profile']


In [22]:
df_sql = pd.read_sql_query("SELECT * FROM profile LIMIT 10;", conn)
df_sql

Unnamed: 0,CIN,Company,Incorporation Date,Registered Address,Corporate Address,Company Email,Company Telephone,Company Website,Paid up share capital,Contact Person Name,Contact Person Number,Contact Person Email,Listed On,Sector
0,L74140MH2008PLC177884,360 One Wam Limited,2008-01-17,"360 ONE Centre, Kamala City, Senapati Bapat Ma...","360 ONE Centre, Kamala City, Senapati Bapat Ma...",sustainability@360.one,+91-22-48765600,www.360.one,358862600.0,"Mr. Rohit Bhase, Company Secretary & Complianc...",+91-22-48765600,sustainability@360.one,BSE and NSE,Financial Services
1,L67120MH1993PLC074411,3I Infotech Limited,1993-10-11,"Tower # 5, International Infotech Park, Vashi ...","Tower 2, 6th Floor, E Wing, Seawoods Grand Cen...",investors@3i-infotech.com,+91-22-7123 8000,www.3i-infotech.com,169230800.0,Mrs. Varika Rastogi,+91-22-7123 8000,compliance@3i-infotech.com,BSE and NSE,Information Technology
2,L31300KA1987PLC013543,3M India Limited,1987-07-04,"Plot Nos. 48-51, Electronics City, Hosur Road,...","WeWork Prestige Central, 3rd floor, 36 Infantr...",investorhelpdesk.in@mmm.com,+91-80-2223 1414,https://www.3mindia.in,112650700.0,Ms. Smitha Gopalkrishnan,+91-80-2223 1414,sgopalkrishnan@mmm.com,BSE and NSE,Diversified
3,L67190MH2007PLC289249,5Paisa Capital Limited,2007-07-10,"IIFL House, Sun Infotech Park, Road No. 16V, B...","IIFL House, Sun Infotech Park, Road No. 16V, B...",csteam@5paisa.com,022-41035000,www.5paisa.com,311909700.0,Namita Godbole,022 41035000,csteam@5paisa.com,NSE and BSE,Financial Services
4,L29142TN1988PLC015586,63 Moons Technologies Limited,1988-04-12,"Shakti Tower -II, 4th Floor, Premises -J 766, ...","FT Tower, CTS Nos.256 & 257, Suren Road, Andhe...",info@63moons.com,02266868010,www.63moons.com,92157070.0,Hariraj Chouhan,02266868010,info@63moons.com,BSE and NSE,Information Technology
5,L74900MH2009PLC231660,Abans Holdings Limited,2009-09-24,"36, 37, 38A, Floor 3, Nariman Bhavan, Backbay ...","25, Mittal Chambers, 2nd Floor, Barrister Rajn...",compliance@abansholdings.com,61790000,http://www.abansholdings.com,100291900.0,Ms. Sheela Gupta,022 6179 0000,compliance@abansholdings.com,BSE and NSE,
6,L32202KA1949PLC032923,Abb India Limited,,"“Disha”, Corporate Office, 3rd Floor, Plot No....","“Disha”, Corporate Office, 3rd Floor, Plot No....",investor.helpdesk@in.abb.com,080 22949113 / 080 22949122,www.abb.com,423816800.0,Dhenuka Srinivasan,080-22949151,dhenuka.srinivasan@in.abb.com,BSE and NSE,Capital Goods
7,L26940GJ1936PLC149771,Acc Limited,1936-08-01,"Adani Corporate House, Shantigram, Near Vaishn...","Adani Corporate House, Shantigram, Near Vaishn...",acc-investorsupport@adani.com,+917926565555,www.acclimited.com,1877873000.0,Neeru Bansal,"Adani Corporate House, Shantigram, Near Vaishn...",neeru.bansal@adani.com,BSE and NSE,Construction Materials
8,L40100GJ1996PLC030533,Adani Power Limited,1996-08-22,"Adani Corporate House, Shantigram, Near Vaishn...","Adani Corporate House, Shantigram, Near Vaishn...",investor.apl@adani.com,+91 79 2656 5555,www.adanipower.com,42728010000.0,Mr. Santosh kumar Singh,079255557289,cso.power@adani.com,BSE and NSE,Power
9,L15146GJ1999PLC035320,Adani Wilmar Limited,1999-01-22,"FORTUNE HOUSE, NEAR NAVRANGPURA RAILWAY CROSSI...","FORTUNE HOUSE, NEAR NAVRANGPURA RAILWAY CROSSI...",investor.relations@adaniwilmar.in,+91 79 26455650,www.adaniwilmar.com,1299700000.0,Mr. Pulkit Mittal,+91 79 26455650,pulkit.mittal@adaniwilmar.in,NSE and BSE,Fast Moving Consumer Goods


In [23]:
conn.close()