In [1]:
# Import libraries
import os
import pandas as pd
from pathlib import Path
import configparser
import json
from tabulate import tabulate

In [2]:
base_path = Path(r'U:\WP 765 Energy RIC\Private data & analysis\Alternative Approach_Private R&D\Orbis_Data\Data_2020')

In [3]:
# Import config parameters
config = configparser.ConfigParser(
    converters={'list': lambda x: [i.strip() for i in x.split(',')]}
)
     
config.read(base_path.joinpath(r'config.ini'))

CASE = 'EU_28'

MAPPING = Path(config.get('DEFAULT','MAPPING_PATH'))
SCREENING_KEYS = config.getlist('DEFAULT','SCREENING_KEYS')

REGION = config.getlist(CASE,'ORBIS_REGION')
CASE_ROOT = base_path.joinpath(config.get(CASE,'CASE_ROOT_PATH'))
YEAR_LASTAV = config.getint(CASE,'YEAR_LASTAV')
SUBS_ID_FILE_N = config.getint(CASE,'SUBS_ID_FILE_N')
SUBS_FIN_FILE_N = config.getint(CASE,'SUBS_FIN_FILE_N')
GROUPS_FIN_FILE_N = config.getint(CASE,'GROUPS_FIN_FILE_N')
METHOD = config.get(CASE,'SUBS_METHOD')

In [5]:
file_path = CASE_ROOT.joinpath(r'Mapping\Country_table.csv')

if not file_path.exists():
    
    print('Mapping table - Country\n')
    
    # Read Country mapping file
    country_df = pd.read_excel(MAPPING.joinpath(r'Mapping_Country.xlsx'),
                               sheet_name = 'Country_map',
                               names = ['Country_Name_ISO', 'Country_Name_Simple', 'Country_2DID_ISO', 'Country_3DID_ISO',
                                        'Is_OECD', 'Is_IEA', 'Is_MI', 'Region', 'IEA_Region', 'World_Player'
                                       ],
                               na_values = 'n.a.',
                               dtype = {
                                   **{col: str for col in ['Country_Name_ISO', 'Country_Name_Simple', 'Country_2DID_ISO', 
                                                           'Country_3DID_ISO', 'Region', 'IEA_Region', 'World_Player'
                                                          ]},
                                   **{col: bool for col in ['Is_OECD', 'Is_IEA', 'Is_MI']}
                               }
                              ).drop(columns = 'Region')
    
    # Save it as csv
    country_df.to_csv(CASE_ROOT.joinpath(r'Mapping\Country_table.csv'),
                      index = False,
                      columns = ['Country_Name_ISO', 'Country_Name_Simple', 'Country_2DID_ISO', 'Country_3DID_ISO',
                                 'Is_OECD', 'Is_IEA', 'Is_MI', 'IEA_Region', 'World_Player'
                                ],
                      float_format = '%.10f',
                      na_rep = 'n.a.'
                     )

Mapping table - Country



In [None]:
file_path = CASE_ROOT.joinpath(r'Listed companies subsidiaries.csv')

if not file_path.exists():
    
    print('Listed companies subsidiaries\n')
    
    ListCompSub_df = pd.DataFrame()
    
    # Read ORBIS input list for subsidiaries
    for number in list(range(1,SUBS_ID_FILE_N+1)):
        
        print(number)
        df = pd.read_excel(CASE_ROOT.joinpath(r'Input\Listed companies subsidiaries #' + str(number) + '.xlsx'),
                           sheet_name = 'Results',
                           na_values = 'No data fulfill your filter criteria',
                           names = ['Rank', 'Company_name', 'BvD9', 'BvD_id', 'Group_Subs_Count', 'Sub_BvD_id', 'Sub_BvD9', 'Subs_lvl'],
                           dtype = {
                               **{col: str for col in ['Rank', 'Company_name', 'BvD9', 'BvD_id', 'Sub_BvD9', 'Sub_BvD_id']},
                               'Group_Subs_Count': pd.Int64Dtype(),
                               'Subs_lvl': pd.Int8Dtype()}
                          ).drop(columns = ['Rank','Subs_lvl','Group_Subs_Count'])
        
        # Consolidate list of subsidiaries
        ListCompSub_df = ListCompSub_df.append(df)
        
        # Save it as csv
        ListCompSub_df.to_csv(CASE_ROOT.joinpath(r'Listed companies subsidiaries.csv'),
                              index = False,
                              columns = ['Company_name', 'BvD9', 'BvD_id', 'Sub_BvD9', 'Sub_BvD_id'
                                        ],
                              na_rep = 'n.a.'
                             )

In [None]:
file_path = CASE_ROOT.joinpath(r'Listed companies subsidiaries - Financials.csv')

if not file_path.exists():
    
    print('Listed companies subsidiaries - Financials\n')
    
    Subs_fin_df = pd.DataFrame()
    
    # Read ORBIS input list for subsidiaries financials
    for number in list(range(1,SUBS_FIN_FILE_N+1)):
        
        print(number)
        
        df = pd.read_excel(CASE_ROOT.joinpath(r'Input\Listed companies subsidiaries - Financials #' + str(number) + 'test.xlsx'),
                           sheet_name = 'Results',
                           names = ['Rank', 'Company_name', 'BvD9', 'BvD_id', 'Country_ISO', 'NACE_Code', 'NACE_desc', 'Year_LastAvail']
                           + ['OpRevY' + str(YY) for YY in range(10,20)[::-1]]
                           + ['Trade_desc', 'Prod&Serv_desc', 'FullOverview_desc'],
                           na_values = 'n.a.',
                           dtype = {
                               **{col: str for col in ['Company_name', 'BvD9','BvD_id', 'Country_ISO', 'NACE_Code', 'NACE_desc',
                                                       'Trade_desc', 'Prod&Serv_desc', 'FullOverview_desc']},
                               **{col: float for col in ['OpRevY' + str(YY) for YY in range(10,20)[::-1]]},
                               **{'Year_LastAvail': pd.Int16Dtype()}
                           }
                          ).drop(columns = ['Rank', 'Country_ISO', 'NACE_Code', 'NACE_desc', 'Year_LastAvail'])
        
        # Consolidate subsidiaries financials
        Subs_fin_df = Subs_fin_df.append(df)
        
        # Save it as csv
        Subs_fin_df.to_csv(CASE_ROOT.joinpath(r'Listed companies subsidiaries - Financials.csv'),
                           index = False,
                           float_format = '%.10f',
                           na_rep = 'n.a.'
                          )

In [None]:
# Read Group financials

file_path = CASE_ROOT.joinpath(r'Listed companies - Financials.csv')

if not file_path.exists():
    
    print('Listed companies - Financials\n')
    
    Groups_fin_df = pd.DataFrame()
    
    # Read ORBIS input list for groups financials
    for number in list(range(1,GROUPS_FIN_FILE_N+1)):
        
        Groups_fin_df = pd.read_excel(CASE_ROOT.joinpath(r'Input\Listed companies - Financials #' + str(number) + '.xlsx'),
                                      sheet_name = 'Results',
                                      names = ['Rank', 'Company_name', 'BvD_id', 'Country_ISO', 'NACE_Code', 'NACE_desc', 'Year_LastAv']
                                      + ['RnD_Y_LastAv', 'Emp_number', 'OpRev_Y_LastAv', 'NetSales_Y_LastAv']
                                      + ['RnD_Y' + str(YY) for YY in range(10,20)[::-1]],
                                      na_values = 'n.a.',
                                      dtype = {
                                          **{col: str for col in ['Company_name', 'BvD_id', 'Country_ISO', 'NACE_Code', 'NACE_desc']},
                                          **{col: float for col in ['RnD_Y_LastAv', 'OpRev_Y_LastAv', 'NetSales_Y_LastAv'] 
                                             + ['RnD_Y' + str(YY) for YY in range(10,20)]
                                            },
                                          **{'Year_LastAvail': pd.Int16Dtype(), 'Emp_number': pd.Int64Dtype()}
                                      } 
                                     ).drop(columns = ['Rank', 'Country_ISO', 'NACE_Code', 'NACE_desc', 'Year_LastAv'])
        
        # Consolidate subsidiaries financials
        Groups_fin_df = Groups_fin_df.append(df)
        
        # Save it as csv
        Groups_fin_df.to_csv(CASE_ROOT.joinpath(r'Listed companies - Financials.csv'),
                             index = False,
                             float_format = '%.10f',
                             na_rep = 'n.a.'
                            )