In [1]:
import pandas as pd
import json
from pprint import pprint
from item.historical.scripts.util.managers.dataframe import DataframeManager
from item.historical.scripts.util.managers.dataframe import ColumnName
from item.historical.scripts.util.managers.country_code import CountryCodeManager
from item.common import paths

# Variables used across the noteboook and not changed

In [2]:
DATASET_ID = "T009"
dataframeManager = DataframeManager(DATASET_ID)
countryCodeManager = CountryCodeManager()

In [3]:
path = paths['data']/'historical'/'input'/'T009_input.csv'
df = pd.read_csv(path)
df

Unnamed: 0,REGIONS/COUNTRIES,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
0,AUSTRIA,4157,4205,4246,4285,4360,4441,4513,4584,4641,4695,4748
1,BELGIUM,4861,4929,5006,5087,5160,5279,5359,5393,5439,5511,5587
2,DENMARK,1971,2026,2075,2105,2126,2169,2203,2240,2280,2321,2392
3,FINLAND,2414,2489,2481,2450,2450,2486,2532,2560,2576,2596,2613
4,FRANCE,30100,30400,30700,30850,31050,31300,31550,31600,31650,31800,32000
...,...,...,...,...,...,...,...,...,...,...,...,...
136,TOGO,101,101,105,105,106,108,111,116,120,130,140
137,TUNISIA,852,874,830,787,820,848,855,862,891,940,990
138,UGANDA,57,59,91,94,97,101,110,120,130,140,150
139,ZAMBIA,294,303,200,135,190,200,210,220,230,240,250


# Tranforming the view from PF to UF

In [4]:
df = dataframeManager.transform_from_uf_to_pf_view(df, 1)
df

Unnamed: 0,REGIONS/COUNTRIES,Year,Value
0,AUSTRIA,2005,4157
1,AUSTRIA,2006,4205
2,AUSTRIA,2007,4246
3,AUSTRIA,2008,4285
4,AUSTRIA,2009,4360
...,...,...,...
1546,ZIMBABWE,2011,680
1547,ZIMBABWE,2012,710
1548,ZIMBABWE,2013,750
1549,ZIMBABWE,2014,790


# Renaming the "Country" column

In [5]:
dataframeManager.rename_column(current_name="REGIONS/COUNTRIES",df=df,new_name=ColumnName.COUNTRY.value)
df

Unnamed: 0,Country,Year,Value
0,AUSTRIA,2005,4157
1,AUSTRIA,2006,4205
2,AUSTRIA,2007,4246
3,AUSTRIA,2008,4285
4,AUSTRIA,2009,4360
...,...,...,...
1546,ZIMBABWE,2011,680
1547,ZIMBABWE,2012,710
1548,ZIMBABWE,2013,750
1549,ZIMBABWE,2014,790


# Setting the "Source" column
    Rule: This dataset comes from "International Organization of Motor Vehicle Manufacturers"

In [6]:
dataframeManager.simple_column_insert(cell_value="International Organization of Motor Vehicle Manufacturers", dataframe=df, column_name= ColumnName.SOURCE.value)
df

Unnamed: 0,Source,Country,Year,Value
0,International Organization of Motor Vehicle Ma...,AUSTRIA,2005,4157
1,International Organization of Motor Vehicle Ma...,AUSTRIA,2006,4205
2,International Organization of Motor Vehicle Ma...,AUSTRIA,2007,4246
3,International Organization of Motor Vehicle Ma...,AUSTRIA,2008,4285
4,International Organization of Motor Vehicle Ma...,AUSTRIA,2009,4360
...,...,...,...,...
1546,International Organization of Motor Vehicle Ma...,ZIMBABWE,2011,680
1547,International Organization of Motor Vehicle Ma...,ZIMBABWE,2012,710
1548,International Organization of Motor Vehicle Ma...,ZIMBABWE,2013,750
1549,International Organization of Motor Vehicle Ma...,ZIMBABWE,2014,790


# Getting the ISO code for each country

## Determining which countries do not have an ISO code
    Rule: As seen below, 12 countries do not have an ISO code. The transformation that will be done for each country is as follows:
    ORIGINAL NAME --> NAME TO USE 
     RUSSIA --> Russian Federation
     SYRIA --> Syrian Arab Republic
     IRAN --> Iran, Islamic Republic of
     BOSNIA --> Bosnia and Herzegovina
     HONG-KONG --> Hong Kong
     IVORY COAST --> Côte d'Ivoire
     BRUNEI --> Brunei Darussalam
     MOLDAVIA --> Moldova, Republic of
     SOUTH KOREA --> Korea, Republic of
     CONGO KINSHASA --> Congo, The Democratic Republic of the
     PALESTINE --> Palestine, State of
     MACEDONIA --> North Macedonia

In [7]:
list_of_countries = list(set(df[ColumnName.COUNTRY.value]))
countries_with_no_iso_code = countryCodeManager.get_list_of_countries_with_no_iso_code(list_of_countries)
countries_with_no_iso_code

['SOUTH KOREA',
 'SYRIA',
 'IRAN',
 'MOLDAVIA',
 'MACEDONIA',
 'BOSNIA',
 'RUSSIA',
 'BRUNEI',
 'HONG-KONG',
 'PALESTINE',
 'IVORY COAST',
 'CONGO KINSHASA ']

In [8]:
# Creating a list to hold all the iso_codes
dirty_list_of_all_countries = df["Country"]
clean_list_of_all_countries = []

# Cleaning the list and replacing the missing countries with the appropriate name
for country in dirty_list_of_all_countries:
    if country == "RUSSIA":
        clean_list_of_all_countries.append("Russian Federation")
    elif country == "SYRIA":
        clean_list_of_all_countries.append("Syrian Arab Republic")
    elif country == "IRAN":
        clean_list_of_all_countries.append("Iran, Islamic Republic of")
    elif country == "BOSNIA":
        clean_list_of_all_countries.append("Bosnia and Herzegovina")
    elif country == "HONG-KONG":
        clean_list_of_all_countries.append("Hong Kong")
    elif country == "IVORY COAST":
        clean_list_of_all_countries.append("Côte d'Ivoire")
    elif country == "BRUNEI":
        clean_list_of_all_countries.append("Brunei Darussalam")
    elif country == "MOLDAVIA":
        clean_list_of_all_countries.append("Moldova, Republic of")
    elif country == "SOUTH KOREA":
        clean_list_of_all_countries.append("Korea, Republic of")
    elif country == "CONGO KINSHASA ":
        clean_list_of_all_countries.append("Congo, The Democratic Republic of the")
    elif country == "PALESTINE":
        clean_list_of_all_countries.append("Palestine, State of")
    elif country == "MACEDONIA":
        clean_list_of_all_countries.append("North Macedonia")
    else:
        clean_list_of_all_countries.append(country)
        
# Ensuring the size of the clean list is equal to the old list
assert len(clean_list_of_all_countries) == len(dirty_list_of_all_countries)

# Getting the list of iso codes
list_of_iso_codes = countryCodeManager.get_list_of_iso_for_countries(clean_list_of_all_countries)

# Setting the new country column
df.drop(columns=["Country"], inplace = True)
df[ColumnName.COUNTRY.value] = clean_list_of_all_countries

# Adding the column to the dataframe
df[ColumnName.ISO_CODE.value] = list_of_iso_codes
df

Unnamed: 0,Source,Year,Value,Country,ISO Code
0,International Organization of Motor Vehicle Ma...,2005,4157,AUSTRIA,AUT
1,International Organization of Motor Vehicle Ma...,2006,4205,AUSTRIA,AUT
2,International Organization of Motor Vehicle Ma...,2007,4246,AUSTRIA,AUT
3,International Organization of Motor Vehicle Ma...,2008,4285,AUSTRIA,AUT
4,International Organization of Motor Vehicle Ma...,2009,4360,AUSTRIA,AUT
...,...,...,...,...,...
1546,International Organization of Motor Vehicle Ma...,2011,680,ZIMBABWE,ZWE
1547,International Organization of Motor Vehicle Ma...,2012,710,ZIMBABWE,ZWE
1548,International Organization of Motor Vehicle Ma...,2013,750,ZIMBABWE,ZWE
1549,International Organization of Motor Vehicle Ma...,2014,790,ZIMBABWE,ZWE


# Getting the ITEM region for each ISO code

## Determining which countries do not have an ITEM region
    Rule: As seen below, all ISO code have a respective region.

In [9]:
list_of_unique_iso_codes = list(set(df[ColumnName.ISO_CODE.value]))
iso_codes_with_no_region = countryCodeManager.get_list_of_iso_codes_with_no_region(list_of_unique_iso_codes)
iso_codes_with_no_region

[]

## Assigning the ITEM region

In [10]:
# Getting the complete list of iso codes
list_of_all_codes = df[ColumnName.ISO_CODE.value]

# Getting the list of regions
item_regions = countryCodeManager.get_list_of_regions_for_iso_codes(list_of_all_codes)

# Assert that the size of the list of iso codes is equivalent to the size of country column
assert len(df) == len(item_regions)

# Adding the column to the dataframe
df.insert(3, ColumnName.ITEM_REGION.value, item_regions, True)
df

Unnamed: 0,Source,Year,Value,Region,Country,ISO Code
0,International Organization of Motor Vehicle Ma...,2005,4157,EU-27,AUSTRIA,AUT
1,International Organization of Motor Vehicle Ma...,2006,4205,EU-27,AUSTRIA,AUT
2,International Organization of Motor Vehicle Ma...,2007,4246,EU-27,AUSTRIA,AUT
3,International Organization of Motor Vehicle Ma...,2008,4285,EU-27,AUSTRIA,AUT
4,International Organization of Motor Vehicle Ma...,2009,4360,EU-27,AUSTRIA,AUT
...,...,...,...,...,...,...
1546,International Organization of Motor Vehicle Ma...,2011,680,Africa,ZIMBABWE,ZWE
1547,International Organization of Motor Vehicle Ma...,2012,710,Africa,ZIMBABWE,ZWE
1548,International Organization of Motor Vehicle Ma...,2013,750,Africa,ZIMBABWE,ZWE
1549,International Organization of Motor Vehicle Ma...,2014,790,Africa,ZIMBABWE,ZWE


# Modifying the country name and making them capital and not upper case

In [11]:
# Variable holding the new names
new_names = []
old_names =list(df["Country"])

# Changing the names
for name in old_names:
    new_names.append(name.capitalize())
    
# Asserting that no data is missing
assert len(new_names) == len(df)

# Dropping current name and adding the new column
df.drop(columns=["Country"], inplace = True)
df.insert(0, ColumnName.COUNTRY.value, new_names, True)
df

Unnamed: 0,Country,Source,Year,Value,Region,ISO Code
0,Austria,International Organization of Motor Vehicle Ma...,2005,4157,EU-27,AUT
1,Austria,International Organization of Motor Vehicle Ma...,2006,4205,EU-27,AUT
2,Austria,International Organization of Motor Vehicle Ma...,2007,4246,EU-27,AUT
3,Austria,International Organization of Motor Vehicle Ma...,2008,4285,EU-27,AUT
4,Austria,International Organization of Motor Vehicle Ma...,2009,4360,EU-27,AUT
...,...,...,...,...,...,...
1546,Zimbabwe,International Organization of Motor Vehicle Ma...,2011,680,Africa,ZWE
1547,Zimbabwe,International Organization of Motor Vehicle Ma...,2012,710,Africa,ZWE
1548,Zimbabwe,International Organization of Motor Vehicle Ma...,2013,750,Africa,ZWE
1549,Zimbabwe,International Organization of Motor Vehicle Ma...,2014,790,Africa,ZWE


# Adding the 'Service' column
    Rule: The dataset is associated to passenger data, therefore, the "Service," is "Passenger"

In [12]:
dataframeManager.simple_column_insert(df, ColumnName.SERVICE.value, "Passenger", 6)
df

Unnamed: 0,Country,Source,Year,Value,Region,ISO Code,Service
0,Austria,International Organization of Motor Vehicle Ma...,2005,4157,EU-27,AUT,Passenger
1,Austria,International Organization of Motor Vehicle Ma...,2006,4205,EU-27,AUT,Passenger
2,Austria,International Organization of Motor Vehicle Ma...,2007,4246,EU-27,AUT,Passenger
3,Austria,International Organization of Motor Vehicle Ma...,2008,4285,EU-27,AUT,Passenger
4,Austria,International Organization of Motor Vehicle Ma...,2009,4360,EU-27,AUT,Passenger
...,...,...,...,...,...,...,...
1546,Zimbabwe,International Organization of Motor Vehicle Ma...,2011,680,Africa,ZWE,Passenger
1547,Zimbabwe,International Organization of Motor Vehicle Ma...,2012,710,Africa,ZWE,Passenger
1548,Zimbabwe,International Organization of Motor Vehicle Ma...,2013,750,Africa,ZWE,Passenger
1549,Zimbabwe,International Organization of Motor Vehicle Ma...,2014,790,Africa,ZWE,Passenger


# Adding the "Technology", "Fuel", and "Vehicle Type" columns
    Rule: Since the dataset does not provide information about technology or fuel, we set both as "All"

In [13]:
dataframeManager.simple_column_insert(df, ColumnName.TECHNOLOGY.value, "All", 7)
dataframeManager.simple_column_insert(df, ColumnName.VEHICLE_TYPE.value, "All", 8)
dataframeManager.simple_column_insert(df, ColumnName.FUEL.value, "All", 9)
df

Unnamed: 0,Country,Source,Year,Value,Region,ISO Code,Service,Technology,Vehicle Type,Fuel
0,Austria,International Organization of Motor Vehicle Ma...,2005,4157,EU-27,AUT,Passenger,All,All,All
1,Austria,International Organization of Motor Vehicle Ma...,2006,4205,EU-27,AUT,Passenger,All,All,All
2,Austria,International Organization of Motor Vehicle Ma...,2007,4246,EU-27,AUT,Passenger,All,All,All
3,Austria,International Organization of Motor Vehicle Ma...,2008,4285,EU-27,AUT,Passenger,All,All,All
4,Austria,International Organization of Motor Vehicle Ma...,2009,4360,EU-27,AUT,Passenger,All,All,All
...,...,...,...,...,...,...,...,...,...,...
1546,Zimbabwe,International Organization of Motor Vehicle Ma...,2011,680,Africa,ZWE,Passenger,All,All,All
1547,Zimbabwe,International Organization of Motor Vehicle Ma...,2012,710,Africa,ZWE,Passenger,All,All,All
1548,Zimbabwe,International Organization of Motor Vehicle Ma...,2013,750,Africa,ZWE,Passenger,All,All,All
1549,Zimbabwe,International Organization of Motor Vehicle Ma...,2014,790,Africa,ZWE,Passenger,All,All,All


# Adding the "Mode" column
    Rule: This dataset is about vehicles, the mode is "Road"

In [14]:
dataframeManager.simple_column_insert(df, ColumnName.MODE.value, "Road", 8)
df

Unnamed: 0,Country,Source,Year,Value,Region,ISO Code,Service,Technology,Mode,Vehicle Type,Fuel
0,Austria,International Organization of Motor Vehicle Ma...,2005,4157,EU-27,AUT,Passenger,All,Road,All,All
1,Austria,International Organization of Motor Vehicle Ma...,2006,4205,EU-27,AUT,Passenger,All,Road,All,All
2,Austria,International Organization of Motor Vehicle Ma...,2007,4246,EU-27,AUT,Passenger,All,Road,All,All
3,Austria,International Organization of Motor Vehicle Ma...,2008,4285,EU-27,AUT,Passenger,All,Road,All,All
4,Austria,International Organization of Motor Vehicle Ma...,2009,4360,EU-27,AUT,Passenger,All,Road,All,All
...,...,...,...,...,...,...,...,...,...,...,...
1546,Zimbabwe,International Organization of Motor Vehicle Ma...,2011,680,Africa,ZWE,Passenger,All,Road,All,All
1547,Zimbabwe,International Organization of Motor Vehicle Ma...,2012,710,Africa,ZWE,Passenger,All,Road,All,All
1548,Zimbabwe,International Organization of Motor Vehicle Ma...,2013,750,Africa,ZWE,Passenger,All,Road,All,All
1549,Zimbabwe,International Organization of Motor Vehicle Ma...,2014,790,Africa,ZWE,Passenger,All,Road,All,All


# Adding the "Variable" and "Unit" columns
Rule: This dataset is associated to stock data. Therefore, the variable is "Stock." The units of this data are expressed as either "10^6 vehicle"

## Variable column

In [15]:
dataframeManager.simple_column_insert(df, ColumnName.VARIABLE.value, "Stock", 4)
df

Unnamed: 0,Country,Source,Year,Value,Variable,Region,ISO Code,Service,Technology,Mode,Vehicle Type,Fuel
0,Austria,International Organization of Motor Vehicle Ma...,2005,4157,Stock,EU-27,AUT,Passenger,All,Road,All,All
1,Austria,International Organization of Motor Vehicle Ma...,2006,4205,Stock,EU-27,AUT,Passenger,All,Road,All,All
2,Austria,International Organization of Motor Vehicle Ma...,2007,4246,Stock,EU-27,AUT,Passenger,All,Road,All,All
3,Austria,International Organization of Motor Vehicle Ma...,2008,4285,Stock,EU-27,AUT,Passenger,All,Road,All,All
4,Austria,International Organization of Motor Vehicle Ma...,2009,4360,Stock,EU-27,AUT,Passenger,All,Road,All,All
...,...,...,...,...,...,...,...,...,...,...,...,...
1546,Zimbabwe,International Organization of Motor Vehicle Ma...,2011,680,Stock,Africa,ZWE,Passenger,All,Road,All,All
1547,Zimbabwe,International Organization of Motor Vehicle Ma...,2012,710,Stock,Africa,ZWE,Passenger,All,Road,All,All
1548,Zimbabwe,International Organization of Motor Vehicle Ma...,2013,750,Stock,Africa,ZWE,Passenger,All,Road,All,All
1549,Zimbabwe,International Organization of Motor Vehicle Ma...,2014,790,Stock,Africa,ZWE,Passenger,All,Road,All,All


## Unit column

In [16]:
dataframeManager.simple_column_insert(df, ColumnName.UNIT.value, "10^6 vehicle", 5)
df

Unnamed: 0,Country,Source,Year,Value,Variable,Unit,Region,ISO Code,Service,Technology,Mode,Vehicle Type,Fuel
0,Austria,International Organization of Motor Vehicle Ma...,2005,4157,Stock,10^6 vehicle,EU-27,AUT,Passenger,All,Road,All,All
1,Austria,International Organization of Motor Vehicle Ma...,2006,4205,Stock,10^6 vehicle,EU-27,AUT,Passenger,All,Road,All,All
2,Austria,International Organization of Motor Vehicle Ma...,2007,4246,Stock,10^6 vehicle,EU-27,AUT,Passenger,All,Road,All,All
3,Austria,International Organization of Motor Vehicle Ma...,2008,4285,Stock,10^6 vehicle,EU-27,AUT,Passenger,All,Road,All,All
4,Austria,International Organization of Motor Vehicle Ma...,2009,4360,Stock,10^6 vehicle,EU-27,AUT,Passenger,All,Road,All,All
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1546,Zimbabwe,International Organization of Motor Vehicle Ma...,2011,680,Stock,10^6 vehicle,Africa,ZWE,Passenger,All,Road,All,All
1547,Zimbabwe,International Organization of Motor Vehicle Ma...,2012,710,Stock,10^6 vehicle,Africa,ZWE,Passenger,All,Road,All,All
1548,Zimbabwe,International Organization of Motor Vehicle Ma...,2013,750,Stock,10^6 vehicle,Africa,ZWE,Passenger,All,Road,All,All
1549,Zimbabwe,International Organization of Motor Vehicle Ma...,2014,790,Stock,10^6 vehicle,Africa,ZWE,Passenger,All,Road,All,All


# Valut Column
    Rule: The dataset provides values in thousands so we much change it to million

In [17]:
# Variable holding the new value
new_values = []
old_values = list(df["Value"])

# Transforming each value
for old_val in old_values:
    val = old_val.replace(",","")
    new_val = float(val) / float(1000)
    new_values.append(new_val)
    
# Asserting no data was lost
assert len(new_values) == len(df)

# Setting the new column
df.drop(columns=["Value"], inplace = True)
df[ColumnName.VALUE.value] = new_values
df

Unnamed: 0,Country,Source,Year,Variable,Unit,Region,ISO Code,Service,Technology,Mode,Vehicle Type,Fuel,Value
0,Austria,International Organization of Motor Vehicle Ma...,2005,Stock,10^6 vehicle,EU-27,AUT,Passenger,All,Road,All,All,4.157
1,Austria,International Organization of Motor Vehicle Ma...,2006,Stock,10^6 vehicle,EU-27,AUT,Passenger,All,Road,All,All,4.205
2,Austria,International Organization of Motor Vehicle Ma...,2007,Stock,10^6 vehicle,EU-27,AUT,Passenger,All,Road,All,All,4.246
3,Austria,International Organization of Motor Vehicle Ma...,2008,Stock,10^6 vehicle,EU-27,AUT,Passenger,All,Road,All,All,4.285
4,Austria,International Organization of Motor Vehicle Ma...,2009,Stock,10^6 vehicle,EU-27,AUT,Passenger,All,Road,All,All,4.360
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1546,Zimbabwe,International Organization of Motor Vehicle Ma...,2011,Stock,10^6 vehicle,Africa,ZWE,Passenger,All,Road,All,All,0.680
1547,Zimbabwe,International Organization of Motor Vehicle Ma...,2012,Stock,10^6 vehicle,Africa,ZWE,Passenger,All,Road,All,All,0.710
1548,Zimbabwe,International Organization of Motor Vehicle Ma...,2013,Stock,10^6 vehicle,Africa,ZWE,Passenger,All,Road,All,All,0.750
1549,Zimbabwe,International Organization of Motor Vehicle Ma...,2014,Stock,10^6 vehicle,Africa,ZWE,Passenger,All,Road,All,All,0.790


# Reordering Columns
    Rule: To comply with the current template, the columns must be shown in a specific order. The dataframe manager specifies the order of the columns

In [18]:
df = dataframeManager.reorder_columns(df)
df

Unnamed: 0,Source,Country,ISO Code,Region,Variable,Unit,Service,Mode,Vehicle Type,Technology,Fuel,Value,Year
0,International Organization of Motor Vehicle Ma...,Austria,AUT,EU-27,Stock,10^6 vehicle,Passenger,Road,All,All,All,4.157,2005
1,International Organization of Motor Vehicle Ma...,Austria,AUT,EU-27,Stock,10^6 vehicle,Passenger,Road,All,All,All,4.205,2006
2,International Organization of Motor Vehicle Ma...,Austria,AUT,EU-27,Stock,10^6 vehicle,Passenger,Road,All,All,All,4.246,2007
3,International Organization of Motor Vehicle Ma...,Austria,AUT,EU-27,Stock,10^6 vehicle,Passenger,Road,All,All,All,4.285,2008
4,International Organization of Motor Vehicle Ma...,Austria,AUT,EU-27,Stock,10^6 vehicle,Passenger,Road,All,All,All,4.360,2009
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1546,International Organization of Motor Vehicle Ma...,Zimbabwe,ZWE,Africa,Stock,10^6 vehicle,Passenger,Road,All,All,All,0.680,2011
1547,International Organization of Motor Vehicle Ma...,Zimbabwe,ZWE,Africa,Stock,10^6 vehicle,Passenger,Road,All,All,All,0.710,2012
1548,International Organization of Motor Vehicle Ma...,Zimbabwe,ZWE,Africa,Stock,10^6 vehicle,Passenger,Road,All,All,All,0.750,2013
1549,International Organization of Motor Vehicle Ma...,Zimbabwe,ZWE,Africa,Stock,10^6 vehicle,Passenger,Road,All,All,All,0.790,2014


# Exporting Results

In [96]:
# Programming Friendly View
dataframeManager.create_programming_friendly_file(df)

# User Friendly View
dataframeManager.create_user_friendly_file(df)

> PF File saved at: /Users/hlinero/Documents/database/item/historical/scripts
> UF File saved at: /Users/hlinero/Documents/database/item/historical/scripts
