In [1]:
import pandas as pd
import json
from pprint import pprint
from util.managers.DataframeManager import DataframeManager
from util.managers.DataframeManager import ColumnName
from util.managers.CountryCodeManager import CountryCodeManager

# Variables used across the noteboook and not changed

In [2]:
DATASET_ID = "T010"
dataframeManager = DataframeManager(DATASET_ID)
countryCodeManager = CountryCodeManager()

In [3]:
filename = "CV_Vehicles-in-use.csv"
path = "/Users/hlinero/Desktop/iTEM Material/raw dataset/T010/{}".format(filename)
df = dataframeManager.get_dataframe_from_csv_file(path)
df

Unnamed: 0,REGIONS/COUNTRIES,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
0,AUSTRIA,367,374,382,391,398,406,417,426,434,445,454
1,BELGIUM,674,691,712,731,743,761,779,791,802,817,839
2,DENMARK,479,519,546,541,518,495,479,465,451,448,446
3,FINLAND,87,91,93,93,92,94,97,97,97,95,95
4,FRANCE,6198,6261,6333,6362,6388,6444,6517,6538,6550,6608,6652
...,...,...,...,...,...,...,...,...,...,...,...,...
136,TOGO,47,47,48,49,49,50,51,52,54,56,58
137,TUNISIA,122,126,220,390,400,411,414,417,439,450,460
138,UGANDA,171,176,212,219,259,265,270,280,300,320,340
139,ZAMBIA,73,74,80,83,108,110,110,110,113,120,120


# Tranforming the view from PF to UF

In [4]:
df = dataframeManager.transform_from_uf_to_pf_view(df, 1)
df

Unnamed: 0,REGIONS/COUNTRIES,Year,Value
0,AUSTRIA,2005,367
1,AUSTRIA,2006,374
2,AUSTRIA,2007,382
3,AUSTRIA,2008,391
4,AUSTRIA,2009,398
...,...,...,...
1546,ZIMBABWE,2011,100
1547,ZIMBABWE,2012,100
1548,ZIMBABWE,2013,103
1549,ZIMBABWE,2014,110


# Renaming the "Country" column

In [5]:
dataframeManager.rename_column(current_name="REGIONS/COUNTRIES",df=df,new_name=ColumnName.COUNTRY.value)
df

Unnamed: 0,Country,Year,Value
0,AUSTRIA,2005,367
1,AUSTRIA,2006,374
2,AUSTRIA,2007,382
3,AUSTRIA,2008,391
4,AUSTRIA,2009,398
...,...,...,...
1546,ZIMBABWE,2011,100
1547,ZIMBABWE,2012,100
1548,ZIMBABWE,2013,103
1549,ZIMBABWE,2014,110


# Setting the "Source" column
    Rule: This dataset comes from "International Organization of Motor Vehicle Manufacturers"

In [6]:
dataframeManager.simple_column_insert(cell_value="International Organization of Motor Vehicle Manufacturers", dataframe=df, column_name= ColumnName.SOURCE.value)
df

Unnamed: 0,Source,Country,Year,Value
0,International Organization of Motor Vehicle Ma...,AUSTRIA,2005,367
1,International Organization of Motor Vehicle Ma...,AUSTRIA,2006,374
2,International Organization of Motor Vehicle Ma...,AUSTRIA,2007,382
3,International Organization of Motor Vehicle Ma...,AUSTRIA,2008,391
4,International Organization of Motor Vehicle Ma...,AUSTRIA,2009,398
...,...,...,...,...
1546,International Organization of Motor Vehicle Ma...,ZIMBABWE,2011,100
1547,International Organization of Motor Vehicle Ma...,ZIMBABWE,2012,100
1548,International Organization of Motor Vehicle Ma...,ZIMBABWE,2013,103
1549,International Organization of Motor Vehicle Ma...,ZIMBABWE,2014,110


# Getting the ISO code for each country

## Determining which countries do not have an ISO code
    Rule: As seen below, 13 countries do not have an ISO code. The transformation that will be done for each country is as follows:
    ORIGINAL NAME --> NAME TO USE 
     RUSSIA --> Russian Federation
     SYRIA --> Syrian Arab Republic
     IRAN --> Iran, Islamic Republic of
     BOSNIA --> Bosnia and Herzegovina
     HONG-KONG --> Hong Kong
     IVORY COAST --> Côte d'Ivoire
     BRUNEI --> Brunei Darussalam
     MOLDAVIA --> Moldova, Republic of
     SOUTH KOREA --> Korea, Republic of
     CONGO KINSHASA --> Congo, The Democratic Republic of the
     PALESTINE --> Palestine, State of
     MACEDONIA --> North Macedonia
     AZERBAIDJAN --> ??

In [8]:
list_of_countries = list(set(df[ColumnName.COUNTRY.value]))
countries_with_no_iso_code = countryCodeManager.get_list_of_countries_with_no_iso_code(list_of_countries)
countries_with_no_iso_code

['PALESTINE',
 'AZERBAIDJAN',
 'SOUTH KOREA',
 'IVORY COAST',
 'BRUNEI',
 'IRAN',
 'BOSNIA',
 'RUSSIA',
 'SYRIA',
 'MOLDAVIA',
 'MACEDONIA',
 'HONG-KONG',
 'CONGO KINSHASA ']

In [10]:
# Creating a list to hold all the iso_codes
dirty_list_of_all_countries = df["Country"]
clean_list_of_all_countries = []

# Cleaning the list and replacing the missing countries with the appropriate name
for country in dirty_list_of_all_countries:
    if country == "RUSSIA":
        clean_list_of_all_countries.append("Russian Federation")
    elif country == "SYRIA":
        clean_list_of_all_countries.append("Syrian Arab Republic")
    elif country == "IRAN":
        clean_list_of_all_countries.append("Iran, Islamic Republic of")
    elif country == "BOSNIA":
        clean_list_of_all_countries.append("Bosnia and Herzegovina")
    elif country == "HONG-KONG":
        clean_list_of_all_countries.append("Hong Kong")
    elif country == "IVORY COAST":
        clean_list_of_all_countries.append("Côte d'Ivoire")
    elif country == "BRUNEI":
        clean_list_of_all_countries.append("Brunei Darussalam")
    elif country == "MOLDAVIA":
        clean_list_of_all_countries.append("Moldova, Republic of")
    elif country == "SOUTH KOREA":
        clean_list_of_all_countries.append("Korea, Republic of")
    elif country == "CONGO KINSHASA ":
        clean_list_of_all_countries.append("Congo, The Democratic Republic of the")
    elif country == "PALESTINE":
        clean_list_of_all_countries.append("Palestine, State of")
    elif country == "MACEDONIA":
        clean_list_of_all_countries.append("North Macedonia")
    else:
        clean_list_of_all_countries.append(country)
        
# Ensuring the size of the clean list is equal to the old list
assert len(clean_list_of_all_countries) == len(dirty_list_of_all_countries)

# Getting the list of iso codes
list_of_iso_codes = countryCodeManager.get_list_of_iso_for_countries(clean_list_of_all_countries)

# Setting the new country column
df.drop(columns=["Country"], inplace = True)
df[ColumnName.COUNTRY.value] = clean_list_of_all_countries

# Adding the column to the dataframe
df[ColumnName.ISO_CODE.value] = list_of_iso_codes
df

Unnamed: 0,Source,Year,Value,Country,ISO Code
0,International Organization of Motor Vehicle Ma...,2005,367,AUSTRIA,AUT
1,International Organization of Motor Vehicle Ma...,2006,374,AUSTRIA,AUT
2,International Organization of Motor Vehicle Ma...,2007,382,AUSTRIA,AUT
3,International Organization of Motor Vehicle Ma...,2008,391,AUSTRIA,AUT
4,International Organization of Motor Vehicle Ma...,2009,398,AUSTRIA,AUT
...,...,...,...,...,...
1546,International Organization of Motor Vehicle Ma...,2011,100,ZIMBABWE,ZWE
1547,International Organization of Motor Vehicle Ma...,2012,100,ZIMBABWE,ZWE
1548,International Organization of Motor Vehicle Ma...,2013,103,ZIMBABWE,ZWE
1549,International Organization of Motor Vehicle Ma...,2014,110,ZIMBABWE,ZWE


# Getting the ITEM region for each ISO code

# Determining which countries do not have an ITEM region
    Rule: As seen below, all ISO code have a respective region.

In [11]:
list_of_unique_iso_codes = list(set(df[ColumnName.ISO_CODE.value]))
iso_codes_with_no_region = countryCodeManager.get_list_of_iso_codes_with_no_region(list_of_unique_iso_codes)
iso_codes_with_no_region

['N/A']

## Assigning the ITEM region
    Rule: AZERBAIDJAN is the only country to which we found no ISO code from the pycountry library

In [12]:
# Getting the complete list of iso codes
list_of_all_codes = df[ColumnName.ISO_CODE.value]

# Getting the list of regions
item_regions = countryCodeManager.get_list_of_regions_for_iso_codes(list_of_all_codes)

# Assert that the size of the list of iso codes is equivalent to the size of country column
assert len(df) == len(item_regions)

# Adding the column to the dataframe
df.insert(3, ColumnName.ITEM_REGION.value, item_regions, True)
df

Unnamed: 0,Source,Year,Value,Region,Country,ISO Code
0,International Organization of Motor Vehicle Ma...,2005,367,EU-27,AUSTRIA,AUT
1,International Organization of Motor Vehicle Ma...,2006,374,EU-27,AUSTRIA,AUT
2,International Organization of Motor Vehicle Ma...,2007,382,EU-27,AUSTRIA,AUT
3,International Organization of Motor Vehicle Ma...,2008,391,EU-27,AUSTRIA,AUT
4,International Organization of Motor Vehicle Ma...,2009,398,EU-27,AUSTRIA,AUT
...,...,...,...,...,...,...
1546,International Organization of Motor Vehicle Ma...,2011,100,Africa,ZIMBABWE,ZWE
1547,International Organization of Motor Vehicle Ma...,2012,100,Africa,ZIMBABWE,ZWE
1548,International Organization of Motor Vehicle Ma...,2013,103,Africa,ZIMBABWE,ZWE
1549,International Organization of Motor Vehicle Ma...,2014,110,Africa,ZIMBABWE,ZWE


# Modifying the country name and making them capital and not upper case

In [13]:
# Variable holding the new names
new_names = []
old_names =list(df["Country"])

# Changing the names
for name in old_names:
    new_names.append(name.capitalize())
    
# Asserting that no data is missing
assert len(new_names) == len(df)

# Dropping current name and adding the new column
df.drop(columns=["Country"], inplace = True)
df.insert(0, ColumnName.COUNTRY.value, new_names, True)
df

Unnamed: 0,Country,Source,Year,Value,Region,ISO Code
0,Austria,International Organization of Motor Vehicle Ma...,2005,367,EU-27,AUT
1,Austria,International Organization of Motor Vehicle Ma...,2006,374,EU-27,AUT
2,Austria,International Organization of Motor Vehicle Ma...,2007,382,EU-27,AUT
3,Austria,International Organization of Motor Vehicle Ma...,2008,391,EU-27,AUT
4,Austria,International Organization of Motor Vehicle Ma...,2009,398,EU-27,AUT
...,...,...,...,...,...,...
1546,Zimbabwe,International Organization of Motor Vehicle Ma...,2011,100,Africa,ZWE
1547,Zimbabwe,International Organization of Motor Vehicle Ma...,2012,100,Africa,ZWE
1548,Zimbabwe,International Organization of Motor Vehicle Ma...,2013,103,Africa,ZWE
1549,Zimbabwe,International Organization of Motor Vehicle Ma...,2014,110,Africa,ZWE


# Adding the 'Service' column
    Rule: The dataset is associated to commercial data, therefore, the "Service," is "Freight"

In [14]:
dataframeManager.simple_column_insert(df, ColumnName.SERVICE.value, "Freight", 6)
df

Unnamed: 0,Country,Source,Year,Value,Region,ISO Code,Service
0,Austria,International Organization of Motor Vehicle Ma...,2005,367,EU-27,AUT,Freight
1,Austria,International Organization of Motor Vehicle Ma...,2006,374,EU-27,AUT,Freight
2,Austria,International Organization of Motor Vehicle Ma...,2007,382,EU-27,AUT,Freight
3,Austria,International Organization of Motor Vehicle Ma...,2008,391,EU-27,AUT,Freight
4,Austria,International Organization of Motor Vehicle Ma...,2009,398,EU-27,AUT,Freight
...,...,...,...,...,...,...,...
1546,Zimbabwe,International Organization of Motor Vehicle Ma...,2011,100,Africa,ZWE,Freight
1547,Zimbabwe,International Organization of Motor Vehicle Ma...,2012,100,Africa,ZWE,Freight
1548,Zimbabwe,International Organization of Motor Vehicle Ma...,2013,103,Africa,ZWE,Freight
1549,Zimbabwe,International Organization of Motor Vehicle Ma...,2014,110,Africa,ZWE,Freight


# Adding the "Technology", "Fuel", and "Vehicle Type" columns
    Rule: Since the dataset does not provide information about technology or fuel, we set both as "All"

In [15]:
dataframeManager.simple_column_insert(df, ColumnName.TECHNOLOGY.value, "All", 7)
dataframeManager.simple_column_insert(df, ColumnName.VEHICLE_TYPE.value, "All", 8)
dataframeManager.simple_column_insert(df, ColumnName.FUEL.value, "All", 9)
df

Unnamed: 0,Country,Source,Year,Value,Region,ISO Code,Service,Technology,Vehicle Type,Fuel
0,Austria,International Organization of Motor Vehicle Ma...,2005,367,EU-27,AUT,Freight,All,All,All
1,Austria,International Organization of Motor Vehicle Ma...,2006,374,EU-27,AUT,Freight,All,All,All
2,Austria,International Organization of Motor Vehicle Ma...,2007,382,EU-27,AUT,Freight,All,All,All
3,Austria,International Organization of Motor Vehicle Ma...,2008,391,EU-27,AUT,Freight,All,All,All
4,Austria,International Organization of Motor Vehicle Ma...,2009,398,EU-27,AUT,Freight,All,All,All
...,...,...,...,...,...,...,...,...,...,...
1546,Zimbabwe,International Organization of Motor Vehicle Ma...,2011,100,Africa,ZWE,Freight,All,All,All
1547,Zimbabwe,International Organization of Motor Vehicle Ma...,2012,100,Africa,ZWE,Freight,All,All,All
1548,Zimbabwe,International Organization of Motor Vehicle Ma...,2013,103,Africa,ZWE,Freight,All,All,All
1549,Zimbabwe,International Organization of Motor Vehicle Ma...,2014,110,Africa,ZWE,Freight,All,All,All


# Adding the "Mode" column
    Rule: This dataset is about vehicles, the mode is "Road"

In [16]:
dataframeManager.simple_column_insert(df, ColumnName.MODE.value, "Road", 8)
df

Unnamed: 0,Country,Source,Year,Value,Region,ISO Code,Service,Technology,Mode,Vehicle Type,Fuel
0,Austria,International Organization of Motor Vehicle Ma...,2005,367,EU-27,AUT,Freight,All,Road,All,All
1,Austria,International Organization of Motor Vehicle Ma...,2006,374,EU-27,AUT,Freight,All,Road,All,All
2,Austria,International Organization of Motor Vehicle Ma...,2007,382,EU-27,AUT,Freight,All,Road,All,All
3,Austria,International Organization of Motor Vehicle Ma...,2008,391,EU-27,AUT,Freight,All,Road,All,All
4,Austria,International Organization of Motor Vehicle Ma...,2009,398,EU-27,AUT,Freight,All,Road,All,All
...,...,...,...,...,...,...,...,...,...,...,...
1546,Zimbabwe,International Organization of Motor Vehicle Ma...,2011,100,Africa,ZWE,Freight,All,Road,All,All
1547,Zimbabwe,International Organization of Motor Vehicle Ma...,2012,100,Africa,ZWE,Freight,All,Road,All,All
1548,Zimbabwe,International Organization of Motor Vehicle Ma...,2013,103,Africa,ZWE,Freight,All,Road,All,All
1549,Zimbabwe,International Organization of Motor Vehicle Ma...,2014,110,Africa,ZWE,Freight,All,Road,All,All


# Adding the "Variable" and "Unit" columns
    Rule: This dataset is associated to stock data. Therefore, the variable is "Stock." The units of this data are expressed as either "10^6 vehicle"

## Variable Column

In [17]:
dataframeManager.simple_column_insert(df, ColumnName.VARIABLE.value, "Stock", 4)
df

Unnamed: 0,Country,Source,Year,Value,Variable,Region,ISO Code,Service,Technology,Mode,Vehicle Type,Fuel
0,Austria,International Organization of Motor Vehicle Ma...,2005,367,Stock,EU-27,AUT,Freight,All,Road,All,All
1,Austria,International Organization of Motor Vehicle Ma...,2006,374,Stock,EU-27,AUT,Freight,All,Road,All,All
2,Austria,International Organization of Motor Vehicle Ma...,2007,382,Stock,EU-27,AUT,Freight,All,Road,All,All
3,Austria,International Organization of Motor Vehicle Ma...,2008,391,Stock,EU-27,AUT,Freight,All,Road,All,All
4,Austria,International Organization of Motor Vehicle Ma...,2009,398,Stock,EU-27,AUT,Freight,All,Road,All,All
...,...,...,...,...,...,...,...,...,...,...,...,...
1546,Zimbabwe,International Organization of Motor Vehicle Ma...,2011,100,Stock,Africa,ZWE,Freight,All,Road,All,All
1547,Zimbabwe,International Organization of Motor Vehicle Ma...,2012,100,Stock,Africa,ZWE,Freight,All,Road,All,All
1548,Zimbabwe,International Organization of Motor Vehicle Ma...,2013,103,Stock,Africa,ZWE,Freight,All,Road,All,All
1549,Zimbabwe,International Organization of Motor Vehicle Ma...,2014,110,Stock,Africa,ZWE,Freight,All,Road,All,All


## Unit column

In [18]:
dataframeManager.simple_column_insert(df, ColumnName.UNIT.value, "10^6 vehicle", 5)
df

Unnamed: 0,Country,Source,Year,Value,Variable,Unit,Region,ISO Code,Service,Technology,Mode,Vehicle Type,Fuel
0,Austria,International Organization of Motor Vehicle Ma...,2005,367,Stock,10^6 vehicle,EU-27,AUT,Freight,All,Road,All,All
1,Austria,International Organization of Motor Vehicle Ma...,2006,374,Stock,10^6 vehicle,EU-27,AUT,Freight,All,Road,All,All
2,Austria,International Organization of Motor Vehicle Ma...,2007,382,Stock,10^6 vehicle,EU-27,AUT,Freight,All,Road,All,All
3,Austria,International Organization of Motor Vehicle Ma...,2008,391,Stock,10^6 vehicle,EU-27,AUT,Freight,All,Road,All,All
4,Austria,International Organization of Motor Vehicle Ma...,2009,398,Stock,10^6 vehicle,EU-27,AUT,Freight,All,Road,All,All
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1546,Zimbabwe,International Organization of Motor Vehicle Ma...,2011,100,Stock,10^6 vehicle,Africa,ZWE,Freight,All,Road,All,All
1547,Zimbabwe,International Organization of Motor Vehicle Ma...,2012,100,Stock,10^6 vehicle,Africa,ZWE,Freight,All,Road,All,All
1548,Zimbabwe,International Organization of Motor Vehicle Ma...,2013,103,Stock,10^6 vehicle,Africa,ZWE,Freight,All,Road,All,All
1549,Zimbabwe,International Organization of Motor Vehicle Ma...,2014,110,Stock,10^6 vehicle,Africa,ZWE,Freight,All,Road,All,All


# Value Column
    Rule: The dataset provides values in thousands so we much change it to million

In [23]:
# Variable holding the new value
new_values = []
old_values = list(df["Value"])

# Transforming each value
for old_val in old_values:
    val = str(old_val).replace(",","")
    new_val = float(val) / float(1000)
    new_values.append(new_val)
    
# Asserting no data was lost
assert len(new_values) == len(df)

# Setting the new column
df.drop(columns=["Value"], inplace = True)
df[ColumnName.VALUE.value] = new_values
df

Unnamed: 0,Country,Source,Year,Variable,Unit,Region,ISO Code,Service,Technology,Mode,Vehicle Type,Fuel,Value
0,Austria,International Organization of Motor Vehicle Ma...,2005,Stock,10^6 vehicle,EU-27,AUT,Freight,All,Road,All,All,0.367
1,Austria,International Organization of Motor Vehicle Ma...,2006,Stock,10^6 vehicle,EU-27,AUT,Freight,All,Road,All,All,0.374
2,Austria,International Organization of Motor Vehicle Ma...,2007,Stock,10^6 vehicle,EU-27,AUT,Freight,All,Road,All,All,0.382
3,Austria,International Organization of Motor Vehicle Ma...,2008,Stock,10^6 vehicle,EU-27,AUT,Freight,All,Road,All,All,0.391
4,Austria,International Organization of Motor Vehicle Ma...,2009,Stock,10^6 vehicle,EU-27,AUT,Freight,All,Road,All,All,0.398
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1546,Zimbabwe,International Organization of Motor Vehicle Ma...,2011,Stock,10^6 vehicle,Africa,ZWE,Freight,All,Road,All,All,0.100
1547,Zimbabwe,International Organization of Motor Vehicle Ma...,2012,Stock,10^6 vehicle,Africa,ZWE,Freight,All,Road,All,All,0.100
1548,Zimbabwe,International Organization of Motor Vehicle Ma...,2013,Stock,10^6 vehicle,Africa,ZWE,Freight,All,Road,All,All,0.103
1549,Zimbabwe,International Organization of Motor Vehicle Ma...,2014,Stock,10^6 vehicle,Africa,ZWE,Freight,All,Road,All,All,0.110


# Reordering Columns
    Rule: To comply with the current template, the columns must be shown in a specific order. The dataframe manager specifies the order of the columns

In [24]:
df = dataframeManager.reorder_columns(df)
df

Unnamed: 0,Source,Country,ISO Code,Region,Variable,Unit,Service,Mode,Vehicle Type,Technology,Fuel,Value,Year
0,International Organization of Motor Vehicle Ma...,Austria,AUT,EU-27,Stock,10^6 vehicle,Freight,Road,All,All,All,0.367,2005
1,International Organization of Motor Vehicle Ma...,Austria,AUT,EU-27,Stock,10^6 vehicle,Freight,Road,All,All,All,0.374,2006
2,International Organization of Motor Vehicle Ma...,Austria,AUT,EU-27,Stock,10^6 vehicle,Freight,Road,All,All,All,0.382,2007
3,International Organization of Motor Vehicle Ma...,Austria,AUT,EU-27,Stock,10^6 vehicle,Freight,Road,All,All,All,0.391,2008
4,International Organization of Motor Vehicle Ma...,Austria,AUT,EU-27,Stock,10^6 vehicle,Freight,Road,All,All,All,0.398,2009
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1546,International Organization of Motor Vehicle Ma...,Zimbabwe,ZWE,Africa,Stock,10^6 vehicle,Freight,Road,All,All,All,0.100,2011
1547,International Organization of Motor Vehicle Ma...,Zimbabwe,ZWE,Africa,Stock,10^6 vehicle,Freight,Road,All,All,All,0.100,2012
1548,International Organization of Motor Vehicle Ma...,Zimbabwe,ZWE,Africa,Stock,10^6 vehicle,Freight,Road,All,All,All,0.103,2013
1549,International Organization of Motor Vehicle Ma...,Zimbabwe,ZWE,Africa,Stock,10^6 vehicle,Freight,Road,All,All,All,0.110,2014


# Exporting Results

In [25]:
# Programming Friendly View
dataframeManager.create_programming_friendly_file(df)

# User Friendly View
dataframeManager.create_user_friendly_file(df)

> PF File saved at: /Users/hlinero/Documents/database/item/historical/scripts
> UF File saved at: /Users/hlinero/Documents/database/item/historical/scripts
