In [8]:
import pandas as pd

import time
import unicodedata

In [9]:
pd.set_option('display.max_columns', 10)
# pd.set_option('display.max_rows', 2000)
pd.set_option('display.width', 1000)

In [10]:
# Define the DataFrame with specified columns and data types
df_panel = pd.DataFrame({
    'Year': pd.Series(dtype='int'),
    'Rank_nr': pd.Series(dtype='int'),
    'Company': pd.Series(dtype='string'),
    'Industry': pd.Series(dtype='string'),
    'Country': pd.Series(dtype='string'),
    'Sales': pd.Series(dtype='int'),
    'Profits': pd.Series(dtype='float'),
    'Assets': pd.Series(dtype='int'),
    'Market_Value': pd.Series(dtype='int')
})

# Display the empty DataFrame
df_panel

Unnamed: 0,Year,Rank_nr,Company,Industry,Country,Sales,Profits,Assets,Market_Value


In [11]:
folder = 'forbes-global-2000-2008-2019'
filename_prefix = 'Forbes Global 2000 - '

# Using Python f-strings or formatted string literals to format the filename

for year in range(2008, 2023):
    filename = f'../data/{folder}/{filename_prefix}{year}.csv'
    df = pd.read_csv(filename, encoding='ISO-8859-1')
    print(filename, len(df))

    df.insert(0, 'Year', year)
    df_panel = pd.concat([df_panel, df], ignore_index=True)

df_panel.reset_index(drop=True, inplace=True)
df_panel.to_csv(f'../data/{folder}/{filename_prefix} Combined Panel Data.csv', index=False, encoding='utf-8')
    


../data/forbes-global-2000-2008-2019/Forbes Global 2000 - 2008.csv 2000
../data/forbes-global-2000-2008-2019/Forbes Global 2000 - 2009.csv 2000
../data/forbes-global-2000-2008-2019/Forbes Global 2000 - 2010.csv 2000
../data/forbes-global-2000-2008-2019/Forbes Global 2000 - 2011.csv 1999
../data/forbes-global-2000-2008-2019/Forbes Global 2000 - 2012.csv 1999
../data/forbes-global-2000-2008-2019/Forbes Global 2000 - 2013.csv 1999
../data/forbes-global-2000-2008-2019/Forbes Global 2000 - 2014.csv 1998
../data/forbes-global-2000-2008-2019/Forbes Global 2000 - 2015.csv 2000
../data/forbes-global-2000-2008-2019/Forbes Global 2000 - 2016.csv 1999
../data/forbes-global-2000-2008-2019/Forbes Global 2000 - 2017.csv 1999
../data/forbes-global-2000-2008-2019/Forbes Global 2000 - 2018.csv 2000
../data/forbes-global-2000-2008-2019/Forbes Global 2000 - 2019.csv 2000
../data/forbes-global-2000-2008-2019/Forbes Global 2000 - 2020.csv 2000
../data/forbes-global-2000-2008-2019/Forbes Global 2000 - 2021.c

In [12]:
df_panel['Company'] = df_panel['Company'].map(lambda x: unicodedata.normalize('NFKD', x).encode('ascii', 'ignore').decode('utf-8', 'ignore').strip())
df_panel['Company'] = df_panel['Company'].replace({'_':' ', '-':' '}, regex=True)
df_panel

Unnamed: 0,Year,Rank_nr,Company,Industry,Country,Sales,Profits,Assets,Market_Value
0,2008,1,HSBC Holdings,Banking,United Kingdom,146500.0,19130,2348980.0,180810.0
1,2008,2,General Electric,Conglomerates,United States,172740.0,22210,795340.0,330930.0
2,2008,3,Bank of America,Banking,United States,119190.0,14980,1715750.0,176530.0
3,2008,4,JPMorgan Chase,Banking,United States,116350.0,15370,1562150.0,136880.0
4,2008,5,ExxonMobil,Oil & Gas Operations,United States,358600.0,40610,242080.0,465510.0
...,...,...,...,...,...,...,...,...,...
29988,2022,1995,Shenzhen Feima International Supply Chain,Business Services & Supplies,China,37.0,1408.3,166.0,1136.0
29989,2022,1997,NMDC,Materials,India,3520.0,1406.4,5715.0,6401.0
29990,2022,1997,Sichuan Changhong Electric,Consumer Durables,China,15716.0,53.1,12105.0,1957.0
29991,2022,1999,Satellite Chemical,Chemicals,China,4413.0,931.3,7640.0,9521.0


In [13]:
list_of_companies = sorted(df_panel['Company'].unique())
print(len(list_of_companies))
print(list_of_companies)

4614
['360 Security Technology', '3M', '3i Group', '77 Bank', 'A2A', 'AAC Technologies Holdings', 'AB Sagax', 'ABB', 'ABK', 'ACC', 'ACE', 'ACE Aviation', 'ACS Group', 'ACWA Power', 'ADNOC Drilling', 'ADT', 'AECOM Technology', 'AES', 'AGC', 'AGCO', 'AGL Energy', 'AGL Resources', 'AGNC Investment', 'AIA Group', 'AIB Group', 'AIRBUS', 'AK Steel Holding', 'ALFA', 'ALSO Holding', 'AMB Property', 'AMEC', 'AMMB Holdings', 'AMP', 'AMR', 'ANZ', 'ANZ Banking', 'AOC Holdings', 'AOL', 'APA', 'APA Group', 'ARM African Rainbow Minerals', 'ARM Holdings', 'ASE Advanced Semiconductor', 'ASE Technology Holding', 'ASM International', 'ASM International N.V.', 'ASML Holding', 'ASX', 'AT&T', 'ATEbank', 'ATOS', 'AU Optronics', 'AVIC Capita', 'AVIC International Holdings', 'AXA Group', 'Aareal Bank', 'AbbVie', 'Abbott Laboratories', 'Abengoa', 'Abercrombie & Fitch', 'Aberdeen Asset Management', 'Abertis', 'Abiomed', 'Abitare In Spa', 'AbitibiBowater', 'Aboitiz Equity Ventures', 'Abrdn', 'Absa Group', 'Abu Dh

In [14]:
# TOTO: Change folder accordingly
temp_folder = 'D:/Temporary/temp'

print('Total number of companies: {}'.format(len(list_of_companies)))

for company in list_of_companies:
   
    df_company = df_panel[df_panel['Company'] == company].copy()
    df_company.sort_values(by=['Year'], ascending=True, inplace=True)    

    # Save the DataFrame to a CSV file
    company_name = company.replace('/', ' and ').replace(' ', '_').replace('.', '').replace(':', '').replace('?', '').replace('!', '').replace('/', '').replace('\'', '')
    filename = f'{temp_folder}/{company_name}.csv'
    print(company, len(df_company), filename)

    df_company.to_csv(filename, index=False, encoding='utf-8')

Total number of companies: 4614
360 Security Technology 3 D:/Temporary/temp/360_Security_Technology.csv
3M 15 D:/Temporary/temp/3M.csv
3i Group 8 D:/Temporary/temp/3i_Group.csv
77 Bank 15 D:/Temporary/temp/77_Bank.csv
A2A 10 D:/Temporary/temp/A2A.csv
AAC Technologies Holdings 4 D:/Temporary/temp/AAC_Technologies_Holdings.csv
AB Sagax 2 D:/Temporary/temp/AB_Sagax.csv
ABB 15 D:/Temporary/temp/ABB.csv
ABK 1 D:/Temporary/temp/ABK.csv
ACC 1 D:/Temporary/temp/ACC.csv
ACE 8 D:/Temporary/temp/ACE.csv
ACE Aviation 2 D:/Temporary/temp/ACE_Aviation.csv
ACS Group 4 D:/Temporary/temp/ACS_Group.csv
ACWA Power 1 D:/Temporary/temp/ACWA_Power.csv
ADNOC Drilling 1 D:/Temporary/temp/ADNOC_Drilling.csv
ADT 1 D:/Temporary/temp/ADT.csv
AECOM Technology 8 D:/Temporary/temp/AECOM_Technology.csv
AES 15 D:/Temporary/temp/AES.csv
AGC 4 D:/Temporary/temp/AGC.csv
AGCO 9 D:/Temporary/temp/AGCO.csv
AGL Energy 13 D:/Temporary/temp/AGL_Energy.csv
AGL Resources 3 D:/Temporary/temp/AGL_Resources.csv
AGNC Investment 6 D: