In [1]:
import os
import pandas as pd
import glob
from tqdm import tqdm

folder_path = "excel_files"

# Get a list of all .xlsx file paths
xlsx_files = glob.glob(os.path.join(folder_path, "*.xlsx"))

In [2]:
data_list = []

# List all .xlsx files in the folder
excel_files = [f for f in os.listdir(folder_path) if f.endswith('.xlsx')]

for file_name in tqdm(excel_files, desc="Processing files", unit="file"):
    file_path = os.path.join(folder_path, file_name)
    df = pd.read_excel(file_path)

    # Helper function to extract a single value or blank
    def get_value(element):
        values = df.loc[df['Element Name'] == element, 'Fact Value'].values
        return values[0] if len(values) > 0 else ''

    # Extract fields
    corporate_identity = get_value('CorporateIdentityNumber')
    company_name = get_value('NameOfTheCompany')
    start_date = get_value('DateOfStartOfFinancialYear')
    end_date = get_value('DateOfEndOfFinancialYear')
    reporting_boundary = get_value('ReportingBoundary')
    has_assurance = get_value('WhetherTheCompanyHasUndertakenReasonableAssuranceOfTheBRSRCore')

    # Determine reporting period
    if start_date == '2023-04-01' and end_date == '2024-03-31':
        reporting_period = 'FY 2023-24'
    elif start_date == '2023-01-01' and end_date == '2023-12-31':
        reporting_period = 'CY 2023'
    elif start_date == '2024-01-01' and end_date == '2024-12-31':
        reporting_period = 'CY 2024'
    else:
        reporting_period = 'Unknown'

    # Count rows in the current file
    total_rows = len(df)

    # Append row
    data_list.append([
        corporate_identity, company_name, start_date, end_date,
        reporting_boundary, has_assurance, reporting_period, total_rows
    ])

# Create final dataframe
report_df = pd.DataFrame(data_list, columns=[
    'Corporate Identity Number', 'Name Of The Company',
    'Date Of Start Of Financial Year', 'Date Of End Of Financial Year',
    'Reporting Boundary', 'BRSR Core Assurance',
    'Reporting Period', 'Total Rows'
]).sort_values(by='Name Of The Company')


Processing files: 100%|██████████████████████████████████████████████████████████| 1174/1174 [08:03<00:00,  2.43file/s]


In [3]:
report_df.isnull().sum()

Corporate Identity Number          0
Name Of The Company                0
Date Of Start Of Financial Year    0
Date Of End Of Financial Year      0
Reporting Boundary                 0
BRSR Core Assurance                0
Reporting Period                   0
Total Rows                         0
dtype: int64

In [4]:
empty_counts = (report_df.fillna('').applymap(str).applymap(str.strip) == '').sum()
print(empty_counts)
report_df[report_df.fillna('').applymap(str).applymap(str.strip).eq('').any(axis=1)]

Corporate Identity Number           0
Name Of The Company                 0
Date Of Start Of Financial Year     0
Date Of End Of Financial Year       0
Reporting Boundary                  0
BRSR Core Assurance                12
Reporting Period                    0
Total Rows                          0
dtype: int64


  empty_counts = (report_df.fillna('').applymap(str).applymap(str.strip) == '').sum()
  report_df[report_df.fillna('').applymap(str).applymap(str.strip).eq('').any(axis=1)]


Unnamed: 0,Corporate Identity Number,Name Of The Company,Date Of Start Of Financial Year,Date Of End Of Financial Year,Reporting Boundary,BRSR Core Assurance,Reporting Period,Total Rows
10,L32202KA1949PLC032923,ABB India Limited,2023-01-01,2023-12-31,Standalone basis,,CY 2023,1484
334,L24294PN1958PLC011052,FOSECO INDIA LIMITED,2023-01-01,2023-12-31,Standalone basis,,CY 2023,1371
369,L15500MH1981PLC025809,GM BREWERIES LIMITED,2023-04-01,2024-03-31,Standalone basis,,FY 2023-24,1360
461,L21011MH1950FLC145537,Huhtamaki India Limited,2023-01-01,2023-12-31,Standalone basis,,CY 2023,1500
509,L99999GJ1976PLC018945,INOX INDIA LIMITED,2023-04-01,2024-03-31,Standalone basis,,FY 2023-24,1204
881,L74899DL1993PLC053579,R Systems International Limited,2023-01-01,2023-12-31,Standalone basis,,CY 2023,1579
832,L26942TG1974PLC001693,RAIN INDUSTRIES LIMITED,2023-01-01,2023-12-31,Consolidated basis,,CY 2023,1625
917,L21012TZ1960PLC000364,SESHASAYEE PAPER AND BOARDS LIMITED,2023-04-01,2024-03-31,Standalone basis,,FY 2023-24,1409
898,L24239MH1956PLC009794,Sanofi India Limited,2023-01-01,2023-12-31,Standalone basis,,CY 2023,1394
911,L29130PN1962PLC204515,Schaeffler India Limited,2023-01-01,2023-12-31,Standalone basis,,CY 2023,1576


In [5]:
report_df

Unnamed: 0,Corporate Identity Number,Name Of The Company,Date Of Start Of Financial Year,Date Of End Of Financial Year,Reporting Boundary,BRSR Core Assurance,Reporting Period,Total Rows
0,L74140MH2008PLC177884,360 ONE WAM LIMITED,2023-04-01,2024-03-31,Consolidated basis,false,FY 2023-24,1637
1,L67120MH1993PLC074411,3I Infotech Limited,2023-04-01,2024-03-31,Standalone basis,false,FY 2023-24,1585
2,L31300KA1987PLC013543,3M INDIA LIMITED,2023-04-01,2024-03-31,Standalone basis,false,FY 2023-24,1776
3,L67190MH2007PLC289249,5paisa Capital Limited,2023-04-01,2024-03-31,Standalone basis,false,FY 2023-24,1525
4,L29142TN1988PLC015586,63 moons technologies limited,2023-04-01,2024-03-31,Standalone basis,false,FY 2023-24,1284
...,...,...,...,...,...,...,...,...
1172,L24230GJ1995PLC025878,Zydus Lifesciences Limited,2023-04-01,2024-03-31,Consolidated basis,true,FY 2023-24,2269
1173,L15201GJ1994PLC023490,Zydus Wellness Limited,2023-04-01,2024-03-31,Consolidated basis,true,FY 2023-24,1861
279,L72200MH2000PLC125319,eClerx Services Limited,2023-04-01,2024-03-31,Consolidated basis,false,FY 2023-24,1656
447,L23201MH1952GOI008858,hindustan petroleum corporation limited,2023-04-01,2024-03-31,Standalone basis,true,FY 2023-24,2205


In [6]:
report_df.rename(columns={
    'Corporate Identity Number': 'CIN',
    'Name Of The Company': 'Company',
    'Date Of Start Of Financial Year': 'Start Date',
    'Date Of End Of Financial Year': 'End Date',
    'Boundary': 'Reporting Boundary',
    'BRSR Core Assurance': 'Core Assurance',
    'Reporting Period': 'Period'
}, inplace=True)
report_df

Unnamed: 0,CIN,Company,Start Date,End Date,Reporting Boundary,Core Assurance,Period,Total Rows
0,L74140MH2008PLC177884,360 ONE WAM LIMITED,2023-04-01,2024-03-31,Consolidated basis,false,FY 2023-24,1637
1,L67120MH1993PLC074411,3I Infotech Limited,2023-04-01,2024-03-31,Standalone basis,false,FY 2023-24,1585
2,L31300KA1987PLC013543,3M INDIA LIMITED,2023-04-01,2024-03-31,Standalone basis,false,FY 2023-24,1776
3,L67190MH2007PLC289249,5paisa Capital Limited,2023-04-01,2024-03-31,Standalone basis,false,FY 2023-24,1525
4,L29142TN1988PLC015586,63 moons technologies limited,2023-04-01,2024-03-31,Standalone basis,false,FY 2023-24,1284
...,...,...,...,...,...,...,...,...
1172,L24230GJ1995PLC025878,Zydus Lifesciences Limited,2023-04-01,2024-03-31,Consolidated basis,true,FY 2023-24,2269
1173,L15201GJ1994PLC023490,Zydus Wellness Limited,2023-04-01,2024-03-31,Consolidated basis,true,FY 2023-24,1861
279,L72200MH2000PLC125319,eClerx Services Limited,2023-04-01,2024-03-31,Consolidated basis,false,FY 2023-24,1656
447,L23201MH1952GOI008858,hindustan petroleum corporation limited,2023-04-01,2024-03-31,Standalone basis,true,FY 2023-24,2205


In [7]:
report_df.to_excel('report_details.xlsx', index=False)