In [1]:
import mitoolspro as mtp
import pandas as pd
import re
from mitoolspro.project import Project
from mitoolspro.utils import RECALCULATE
from mitoolspro.utils.objects import StringMapper

In [None]:
pr = Project.load(auto_load=True)

In [3]:
show = False
recalculate = False

# Load Data

In [4]:
data_name = pr.get_path('final_data')
data = pd.read_parquet(data_name).reset_index()

In [None]:
data

# Rename Indicators

In [6]:
sector_mapping = {
    'Agriculture': 'Agr',
    'Fishing': 'Fis',
    'Food & Beverages': 'FnB',
    'Machinery': 'Mch',
    'Iron & Steel': 'InS',
    'Electronics & Instruments': 'EnI',
    'Metal Products': 'Met',
    'Mining & Quarrying': 'MnQ',
    'Other Manufacturing': 'OthM',
    'Petroleum, Chemicals & Non-Metals': 'PCh',
    'Textiles & Wearing Apparel': 'Tex',
    'Transport Equipment': 'TrEq',
    'Wood & Paper': 'WnP'
}
others_mapping = {
    'Year': 'Year',
    'Country': 'Country',
    'Continent': 'Continent',
    'Income Group': 'IncomeGroup',
    'Current Income Group': 'CurrIncomeGroup'
}
eci_indexes = ['SCI', 'SECI', 'SCP', 'ECI']

In [7]:
def rename_eci_indexes(string, replace_mapping):
    pattern = re.compile('|'.join(re.escape(key) for key in replace_mapping.keys()))
    def replace_match(match):
        return replace_mapping[match.group(0)]
    return pattern.sub(replace_match, string).replace(' ', '').replace('+', '')

renamed_eci_cols = {c: rename_eci_indexes(c, sector_mapping) for c in data.columns if any([eci_idx in c for eci_idx in eci_indexes])}

In [None]:
renamed_eci_cols

In [9]:
custom_mapping = {**renamed_eci_cols, **others_mapping}

In [10]:
leftover_mapping = {}
n = 0
for c in data.columns:
    if c not in custom_mapping:
        leftover_mapping[c] = f"Indicator{n}X"
        n += 1

In [11]:
str_maps_file = pr.get_path('string_map')

In [12]:
variables_map = {**custom_mapping, **leftover_mapping}
str_mapper = StringMapper(variables_map, pass_if_mapped=True)
str_mapper.add_relation('Intercept', 'Intercept')
str_mapper.save_mappings(str_maps_file)

In [13]:
data.columns = str_mapper.uglify_strs(data.columns)

In [None]:
data

***