In [22]:
import pandas as pd
from pprint import pprint
from item.historical.scripts.util.managers.dataframe import DataframeManager
from item.historical.scripts.util.managers.dataframe import ColumnName
from item.historical.scripts.util.managers.country_code import CountryCodeManager
from item.common import paths

# Variables used all over the notebook and *not changed*

In [23]:
DATASET_ID = "T004"
dataframeManager = DataframeManager(DATASET_ID)
countryCodeManager = CountryCodeManager()

# Opening the dataset and getting general info

In [24]:
path = paths['data']/'historical'/'input'/'T004_input.csv'
df = pd.read_csv(path, ";")
df

Unnamed: 0,Country,Date,Frequency,Fuel type,Type of vehicle,Value
0,Malta,2015,Annual,Diesel,New lorries (vehicle wt over 3500 kg),348.0
1,Malta,1998,Annual,Diesel,New passenger cars,2792.0
2,Malta,2002,Annual,Diesel,New passenger cars,5409.0
3,Malta,2002,Annual,Diesel,"New motor coaches, buses and trolley buses",37.0
4,Malta,2007,Annual,Diesel,"New motor coaches, buses and trolley buses",48.0
...,...,...,...,...,...,...
16145,Austria,2016,Annual,Alternative (total),New light goods vehicles,567.0
16146,Austria,2007,Annual,Alternative (total),New road tractors,0.0
16147,Austria,2011,Annual,Alternative (total),New road tractors,0.0
16148,Austria,2013,Annual,Alternative (total),New road tractors,0.0


# Removing unnecessary columns
    Rule: To comply with the latest template, we are dropping unnecessary columns.

In [25]:
# We are dropping the "Fruequncy" column because its value is not part of the template
df.drop(columns=["Frequency"], inplace=True)
df

Unnamed: 0,Country,Date,Fuel type,Type of vehicle,Value
0,Malta,2015,Diesel,New lorries (vehicle wt over 3500 kg),348.0
1,Malta,1998,Diesel,New passenger cars,2792.0
2,Malta,2002,Diesel,New passenger cars,5409.0
3,Malta,2002,Diesel,"New motor coaches, buses and trolley buses",37.0
4,Malta,2007,Diesel,"New motor coaches, buses and trolley buses",48.0
...,...,...,...,...,...
16145,Austria,2016,Alternative (total),New light goods vehicles,567.0
16146,Austria,2007,Alternative (total),New road tractors,0.0
16147,Austria,2011,Alternative (total),New road tractors,0.0
16148,Austria,2013,Alternative (total),New road tractors,0.0


# Adding the 'Source' column
    Rule: Add the same source to all rows since all data comes from the same source

In [26]:
dataframeManager.simple_column_insert(df,ColumnName.SOURCE.value,"United Nations Economic Commission for Europe")
df

Unnamed: 0,Source,Country,Date,Fuel type,Type of vehicle,Value
0,United Nations Economic Commission for Europe,Malta,2015,Diesel,New lorries (vehicle wt over 3500 kg),348.0
1,United Nations Economic Commission for Europe,Malta,1998,Diesel,New passenger cars,2792.0
2,United Nations Economic Commission for Europe,Malta,2002,Diesel,New passenger cars,5409.0
3,United Nations Economic Commission for Europe,Malta,2002,Diesel,"New motor coaches, buses and trolley buses",37.0
4,United Nations Economic Commission for Europe,Malta,2007,Diesel,"New motor coaches, buses and trolley buses",48.0
...,...,...,...,...,...,...
16145,United Nations Economic Commission for Europe,Austria,2016,Alternative (total),New light goods vehicles,567.0
16146,United Nations Economic Commission for Europe,Austria,2007,Alternative (total),New road tractors,0.0
16147,United Nations Economic Commission for Europe,Austria,2011,Alternative (total),New road tractors,0.0
16148,United Nations Economic Commission for Europe,Austria,2013,Alternative (total),New road tractors,0.0


# Renaming the column "Date" to "Year"
    Rule: To comply with the current version of the template, the columns showing year values must be called "Year"

In [27]:
dataframeManager.rename_column(df, "Date", ColumnName.YEAR.value)
df

Unnamed: 0,Source,Country,Year,Fuel type,Type of vehicle,Value
0,United Nations Economic Commission for Europe,Malta,2015,Diesel,New lorries (vehicle wt over 3500 kg),348.0
1,United Nations Economic Commission for Europe,Malta,1998,Diesel,New passenger cars,2792.0
2,United Nations Economic Commission for Europe,Malta,2002,Diesel,New passenger cars,5409.0
3,United Nations Economic Commission for Europe,Malta,2002,Diesel,"New motor coaches, buses and trolley buses",37.0
4,United Nations Economic Commission for Europe,Malta,2007,Diesel,"New motor coaches, buses and trolley buses",48.0
...,...,...,...,...,...,...
16145,United Nations Economic Commission for Europe,Austria,2016,Alternative (total),New light goods vehicles,567.0
16146,United Nations Economic Commission for Europe,Austria,2007,Alternative (total),New road tractors,0.0
16147,United Nations Economic Commission for Europe,Austria,2011,Alternative (total),New road tractors,0.0
16148,United Nations Economic Commission for Europe,Austria,2013,Alternative (total),New road tractors,0.0


# Getting the ISO Code for each Country
    Rule: For each country we have to assign their respective ISO code. 

## Determining which countries do not appear in the list of ISO codes

    As seen from the below cell, it "The former Yugoslav Republic of Macedonia" do not appear in our list of ISO codes.  However, they do appear but under the following names:
    > The former Yugoslav Republic of Macedonia ---> North Macedonia

In [28]:
# Getting the list of countries available
list_of_countries = list(set(df["Country"]))

# Getting the list of countries with no ISO code
countries_with_no_ISO_code = countryCodeManager.get_list_of_countries_with_no_iso_code(list_of_countries)
        
# Print this list of countries with no ISO codes
countries_with_no_ISO_code

['The former Yugoslav Republic of Macedonia']

## Adding the ISO column to the dataset

In [29]:
# Creating a list to hold all the iso_codes
dirty_list_of_all_countries = df["Country"]
clean_list_of_all_countries = []

# Cleaning the list and replacing the missing countries with the appropriate name
for country in dirty_list_of_all_countries:
    if country == "The former Yugoslav Republic of Macedonia":
        clean_list_of_all_countries.append("North Macedonia")
    else:
        clean_list_of_all_countries.append(country)
        
# Ensuring the size of the clean list is equal to the old list
assert len(clean_list_of_all_countries) == len(dirty_list_of_all_countries)

# Getting the list of iso codes
list_of_iso_codes = countryCodeManager.get_list_of_iso_for_countries(clean_list_of_all_countries)

# Adding the column to the dataframe
df[ColumnName.ISO_CODE.value] = list_of_iso_codes
df

Unnamed: 0,Source,Country,Year,Fuel type,Type of vehicle,Value,ISO Code
0,United Nations Economic Commission for Europe,Malta,2015,Diesel,New lorries (vehicle wt over 3500 kg),348.0,MLT
1,United Nations Economic Commission for Europe,Malta,1998,Diesel,New passenger cars,2792.0,MLT
2,United Nations Economic Commission for Europe,Malta,2002,Diesel,New passenger cars,5409.0,MLT
3,United Nations Economic Commission for Europe,Malta,2002,Diesel,"New motor coaches, buses and trolley buses",37.0,MLT
4,United Nations Economic Commission for Europe,Malta,2007,Diesel,"New motor coaches, buses and trolley buses",48.0,MLT
...,...,...,...,...,...,...,...
16145,United Nations Economic Commission for Europe,Austria,2016,Alternative (total),New light goods vehicles,567.0,AUT
16146,United Nations Economic Commission for Europe,Austria,2007,Alternative (total),New road tractors,0.0,AUT
16147,United Nations Economic Commission for Europe,Austria,2011,Alternative (total),New road tractors,0.0,AUT
16148,United Nations Economic Commission for Europe,Austria,2013,Alternative (total),New road tractors,0.0,AUT


# Getting the ITEM Region for each country
    Rule: For each country, we need to assign an ITEM region

## Determining which countries are missing an ITEM region
    As seen from the cell below, there is no country that does no have a respective ITEM region. Therefore, no further cleaning needs to be done to get the item regions.

In [30]:
# Getting the list of ISO codes
list_of_iso_codes = list(set(df["ISO Code"]))

# Getting the list of ISO code with no region
iso_code_with_no_region = countryCodeManager.get_list_of_iso_codes_with_no_region(list_of_iso_codes)

# printing the list of ISO codes
iso_code_with_no_region

[]

# Adding the ITEM region column to the dataset

In [31]:
# Getting the complete list of iso codes
list_of_all_codes = df["ISO Code"]

item_region = countryCodeManager.get_list_of_regions_for_iso_codes(list_of_all_codes)

# Adding the column to the dataframe
df[ColumnName.ITEM_REGION.value] = item_region
df

Unnamed: 0,Source,Country,Year,Fuel type,Type of vehicle,Value,ISO Code,Region
0,United Nations Economic Commission for Europe,Malta,2015,Diesel,New lorries (vehicle wt over 3500 kg),348.0,MLT,EU-27
1,United Nations Economic Commission for Europe,Malta,1998,Diesel,New passenger cars,2792.0,MLT,EU-27
2,United Nations Economic Commission for Europe,Malta,2002,Diesel,New passenger cars,5409.0,MLT,EU-27
3,United Nations Economic Commission for Europe,Malta,2002,Diesel,"New motor coaches, buses and trolley buses",37.0,MLT,EU-27
4,United Nations Economic Commission for Europe,Malta,2007,Diesel,"New motor coaches, buses and trolley buses",48.0,MLT,EU-27
...,...,...,...,...,...,...,...,...
16145,United Nations Economic Commission for Europe,Austria,2016,Alternative (total),New light goods vehicles,567.0,AUT,EU-27
16146,United Nations Economic Commission for Europe,Austria,2007,Alternative (total),New road tractors,0.0,AUT,EU-27
16147,United Nations Economic Commission for Europe,Austria,2011,Alternative (total),New road tractors,0.0,AUT,EU-27
16148,United Nations Economic Commission for Europe,Austria,2013,Alternative (total),New road tractors,0.0,AUT,EU-27


# Setting the "Variable" column
    Rule: Since all the data from this dataset is associated to sales of new cars, the variable is "Sales (New Vehicles)"

In [32]:
dataframeManager.simple_column_insert(df, ColumnName.VARIABLE.value, "Sales (New Vehicles)")
df

Unnamed: 0,Variable,Source,Country,Year,Fuel type,Type of vehicle,Value,ISO Code,Region
0,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2015,Diesel,New lorries (vehicle wt over 3500 kg),348.0,MLT,EU-27
1,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,1998,Diesel,New passenger cars,2792.0,MLT,EU-27
2,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2002,Diesel,New passenger cars,5409.0,MLT,EU-27
3,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2002,Diesel,"New motor coaches, buses and trolley buses",37.0,MLT,EU-27
4,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2007,Diesel,"New motor coaches, buses and trolley buses",48.0,MLT,EU-27
...,...,...,...,...,...,...,...,...,...
16145,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2016,Alternative (total),New light goods vehicles,567.0,AUT,EU-27
16146,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2007,Alternative (total),New road tractors,0.0,AUT,EU-27
16147,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2011,Alternative (total),New road tractors,0.0,AUT,EU-27
16148,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2013,Alternative (total),New road tractors,0.0,AUT,EU-27


# Setting the "Unit" column and setting the correct magnitude of the "Value" column
    Rule: The unit for the "Sales (New Vehicles)" is 10^6 vehicles / yr. Therefore, we need to convert all available values to 10^6 and we need to set the correct magnitude of the "Value" column

## Adding the column called "Unit"

In [33]:
dataframeManager.simple_column_insert(df, ColumnName.UNIT.value, "10^6 vehicle / yr")
df

Unnamed: 0,Unit,Variable,Source,Country,Year,Fuel type,Type of vehicle,Value,ISO Code,Region
0,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2015,Diesel,New lorries (vehicle wt over 3500 kg),348.0,MLT,EU-27
1,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,1998,Diesel,New passenger cars,2792.0,MLT,EU-27
2,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2002,Diesel,New passenger cars,5409.0,MLT,EU-27
3,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2002,Diesel,"New motor coaches, buses and trolley buses",37.0,MLT,EU-27
4,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2007,Diesel,"New motor coaches, buses and trolley buses",48.0,MLT,EU-27
...,...,...,...,...,...,...,...,...,...,...
16145,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2016,Alternative (total),New light goods vehicles,567.0,AUT,EU-27
16146,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2007,Alternative (total),New road tractors,0.0,AUT,EU-27
16147,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2011,Alternative (total),New road tractors,0.0,AUT,EU-27
16148,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2013,Alternative (total),New road tractors,0.0,AUT,EU-27


## Setting the correct magnitude to the "Value" column

In [34]:
# Variable holding the new magnitudes
new_magnitude_value = []

# For each value, convert them to billion
current_magnitude = df["Value"]
for value in current_magnitude:
    val_in_billion = float(value) / float(1000000)
    new_magnitude_value.append(val_in_billion)
    
# Assert that the len of the new list is equal to the old list
assert len(new_magnitude_value) == len(current_magnitude)

# Remove the old "value" column
df.drop(columns= ["Value"],inplace = True)

# Add the new "Value" column
df[ColumnName.VALUE.value] = new_magnitude_value
df

Unnamed: 0,Unit,Variable,Source,Country,Year,Fuel type,Type of vehicle,ISO Code,Region,Value
0,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2015,Diesel,New lorries (vehicle wt over 3500 kg),MLT,EU-27,0.000348
1,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,1998,Diesel,New passenger cars,MLT,EU-27,0.002792
2,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2002,Diesel,New passenger cars,MLT,EU-27,0.005409
3,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2002,Diesel,"New motor coaches, buses and trolley buses",MLT,EU-27,0.000037
4,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2007,Diesel,"New motor coaches, buses and trolley buses",MLT,EU-27,0.000048
...,...,...,...,...,...,...,...,...,...,...
16145,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2016,Alternative (total),New light goods vehicles,AUT,EU-27,0.000567
16146,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2007,Alternative (total),New road tractors,AUT,EU-27,0.000000
16147,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2011,Alternative (total),New road tractors,AUT,EU-27,0.000000
16148,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2013,Alternative (total),New road tractors,AUT,EU-27,0.000000


# Adding the "Service" column
    Rule: The new vehicles sold are associated to "Freight" and "Passanger". So depending on the type of vehicle sold, we will determine the service to be assigned. Below is the service to be assigned to each vehicle type
    
    > New lorries (vehicle wt over 3500 kg) --> Freight
    > New road tractors --> Freight
    > New passenger cars --> Passenger
    > New motor coaches, buses and trolley buses --> Freight
    > New light goods vehicles --> Freight

In [35]:
# Variable for holding the service of each row
service_per_row = []

# For each row, determine the vehicle type and then assign the correct service
for index, row in df.iterrows():
    vehicle_type = row["Type of vehicle"]
    
    if vehicle_type == "New passenger cars":
        service_per_row.append("Passenger")
    else:
        service_per_row.append("Freight")
        
# Ensure the size of the service column is the same as the size of the dataframe
assert len(service_per_row) == len(df)

# Create the "Service" column
df[ColumnName.SERVICE.value] = service_per_row
df

Unnamed: 0,Unit,Variable,Source,Country,Year,Fuel type,Type of vehicle,ISO Code,Region,Value,Service
0,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2015,Diesel,New lorries (vehicle wt over 3500 kg),MLT,EU-27,0.000348,Freight
1,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,1998,Diesel,New passenger cars,MLT,EU-27,0.002792,Passenger
2,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2002,Diesel,New passenger cars,MLT,EU-27,0.005409,Passenger
3,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2002,Diesel,"New motor coaches, buses and trolley buses",MLT,EU-27,0.000037,Freight
4,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2007,Diesel,"New motor coaches, buses and trolley buses",MLT,EU-27,0.000048,Freight
...,...,...,...,...,...,...,...,...,...,...,...
16145,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2016,Alternative (total),New light goods vehicles,AUT,EU-27,0.000567,Freight
16146,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2007,Alternative (total),New road tractors,AUT,EU-27,0.000000,Freight
16147,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2011,Alternative (total),New road tractors,AUT,EU-27,0.000000,Freight
16148,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2013,Alternative (total),New road tractors,AUT,EU-27,0.000000,Freight


# Creating the "Mode" column
    Rule: Since all the data is associated to road vehicles, the mode is "Road"

In [36]:
dataframeManager.simple_column_insert(df, ColumnName.MODE.value, "Road")
df

Unnamed: 0,Mode,Unit,Variable,Source,Country,Year,Fuel type,Type of vehicle,ISO Code,Region,Value,Service
0,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2015,Diesel,New lorries (vehicle wt over 3500 kg),MLT,EU-27,0.000348,Freight
1,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,1998,Diesel,New passenger cars,MLT,EU-27,0.002792,Passenger
2,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2002,Diesel,New passenger cars,MLT,EU-27,0.005409,Passenger
3,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2002,Diesel,"New motor coaches, buses and trolley buses",MLT,EU-27,0.000037,Freight
4,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2007,Diesel,"New motor coaches, buses and trolley buses",MLT,EU-27,0.000048,Freight
...,...,...,...,...,...,...,...,...,...,...,...,...
16145,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2016,Alternative (total),New light goods vehicles,AUT,EU-27,0.000567,Freight
16146,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2007,Alternative (total),New road tractors,AUT,EU-27,0.000000,Freight
16147,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2011,Alternative (total),New road tractors,AUT,EU-27,0.000000,Freight
16148,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2013,Alternative (total),New road tractors,AUT,EU-27,0.000000,Freight


# Creating the "Vehicle Type" column
    Rule: The dataset provide 5 different types of vehicles and this is how each one will be mapped to our data
    
    > ORIGINAL NAME --> STANDARD NAME
    > New lorries (vehicle wt over 3500 kg) --> Heavy Truck
    > New road tractors --> Medium Truck
    > New passenger cars --> LDV
    > New motor coaches, buses and trolley buses --> Bus
    > New light goods vehicles --> Light Truck

In [37]:
# Storing the type of vehicle name
vehicle_type_with_item_names = []

# For each row, map each type of vehicle given to the ITEM standard vehicle type
for index, row in df.iterrows():
    type_of_vehicle = row["Type of vehicle"]
    if type_of_vehicle == "New lorries (vehicle wt over 3500 kg)":
        vehicle_type_with_item_names.append("Heavy Truck")
    if type_of_vehicle == "New road tractors":
        vehicle_type_with_item_names.append("Medium Truck")
    if type_of_vehicle == "New passenger cars":
        vehicle_type_with_item_names.append("LDV")
    if type_of_vehicle == "New motor coaches, buses and trolley buses":
        vehicle_type_with_item_names.append("Bus")
    if type_of_vehicle == "New light goods vehicles":
        vehicle_type_with_item_names.append("Light Truck")

# Assert the final list is the same size as the size of the dataframe
assert len(vehicle_type_with_item_names) == len(df)

# Removing the column "Type of vehicle"
df.drop(columns=["Type of vehicle"], inplace= True)

# Creating the column
df[ColumnName.VEHICLE_TYPE.value] = vehicle_type_with_item_names
df

Unnamed: 0,Mode,Unit,Variable,Source,Country,Year,Fuel type,ISO Code,Region,Value,Service,Vehicle Type
0,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2015,Diesel,MLT,EU-27,0.000348,Freight,Heavy Truck
1,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,1998,Diesel,MLT,EU-27,0.002792,Passenger,LDV
2,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2002,Diesel,MLT,EU-27,0.005409,Passenger,LDV
3,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2002,Diesel,MLT,EU-27,0.000037,Freight,Bus
4,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2007,Diesel,MLT,EU-27,0.000048,Freight,Bus
...,...,...,...,...,...,...,...,...,...,...,...,...
16145,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2016,Alternative (total),AUT,EU-27,0.000567,Freight,Light Truck
16146,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2007,Alternative (total),AUT,EU-27,0.000000,Freight,Medium Truck
16147,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2011,Alternative (total),AUT,EU-27,0.000000,Freight,Medium Truck
16148,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2013,Alternative (total),AUT,EU-27,0.000000,Freight,Medium Truck


# Setting the "Technology" and "Fuel Type" columns
    Rule: The mapping from the fuel type given to ITEM Technology and Fuel Type is as follows:
    
    > ORIGINAL --> TECHNOLOGY --> FUEL TYPE
    ---------------------------------------
    > LPG --> Natural Gas Vehicle --> Natural gas
    > Compressed natural gas (CNG) --> Natural Gas Vehicle --> Natural gas
    > Liquefied natural gas (LNG) --> Natural Gas Vehicle --> Natural gas
    
    > Bioethanol --> Conventional --> Liquid-Bio
    > Bi-fuel vehicles --> Conventional --> Liquid-Bio
    > Biodiesel --> Conventional --> Liquid-Bio
    
    > Diesel (excluding hybrids) --> Conventional --> Liquid - Fossil
    > Hybrid electric-diesel --> Conventional --> Liquid - Fossil
    > Hybrid electric-petrol --> Conventional --> Liquid - Fossil
    > Diesel --> Conventional --> Liquid - Fossil
    > Petrol --> Conventional --> Liquid - Fossil
    > Petrol (excluding hybrids) --> Conventional --> Liquid - Fossil
    
    > Plug-in hybrid diesel-electric --> PHEV --> Electricity
    > Plug-in hybrid petrol-electric --> --> PHEV --> Electricity
    
    > Hydrogen and fuel cells --> Fuel Cell --> Hydrogen
    > Electricity --> BEV --> Electricity
    > Total --> All --> All
    > Alternative (total) --> Alternative --> Alternative

In [38]:
# Variable holding the fuel type and technology type value
technology_type = []
fuel_type = []

# Groups that have the same mapping
group_one = ["LPG", "Compressed natural gas (CNG)", "Liquefied natural gas (LNG)"]
group_two = ["Bioethanol", "Bi-fuel vehicles", "Biodiesel"]
group_three = ["Diesel (excluding hybrids)", "Hybrid electric-diesel", "Hybrid electric-petrol", "Diesel", "Petrol", "Petrol (excluding hybrids)"]
group_four = ["Plug-in hybrid diesel-electric", "Plug-in hybrid petrol-electric"]


# Ensure that all belong to one group
for index, row in df.iterrows():
    original_fuel_type = row["Fuel type"].replace("- ", "")
    if original_fuel_type in group_one:
        technology_type.append("Natural Gas Vehicle")
        fuel_type.append("Natural Gas")
        
    elif original_fuel_type in group_two:
        technology_type.append("Conventional")
        fuel_type.append("Liquid-Bio")
        
    elif original_fuel_type in group_three:
        technology_type.append("Conventional")
        fuel_type.append("Liquid - Fossil")
        
    elif original_fuel_type in group_four:
        technology_type.append("PHEV")
        fuel_type.append("Electricity")
        
    elif original_fuel_type == "Hydrogen and fuel cells":
        technology_type.append("Fuel Cell")
        fuel_type.append("Hydrogen")
        
    elif original_fuel_type == "Electricity":
        technology_type.append("BEV")
        fuel_type.append("Electricity")
        
    elif original_fuel_type == "Total":
        technology_type.append("All")
        fuel_type.append("All")
        
    elif original_fuel_type == "Alternative (total)":
        technology_type.append("Alternative")
        fuel_type.append("Alternative")
        
    else:
        raise Exception('{} does not belong to any group'.format(original_fuel_type))
        
# Assert that the final list are the size of the df
assert len(technology_type) == len(df)
assert len(fuel_type) == len(df)

# Remocing the old column about fuel type
df.drop(columns=["Fuel type"], inplace= True)

# Adding the columns to the dataframe
df[ColumnName.TECHNOLOGY.value] = technology_type
df[ColumnName.FUEL.value] = fuel_type
df

Unnamed: 0,Mode,Unit,Variable,Source,Country,Year,ISO Code,Region,Value,Service,Vehicle Type,Technology,Fuel
0,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2015,MLT,EU-27,0.000348,Freight,Heavy Truck,Conventional,Liquid - Fossil
1,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,1998,MLT,EU-27,0.002792,Passenger,LDV,Conventional,Liquid - Fossil
2,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2002,MLT,EU-27,0.005409,Passenger,LDV,Conventional,Liquid - Fossil
3,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2002,MLT,EU-27,0.000037,Freight,Bus,Conventional,Liquid - Fossil
4,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Malta,2007,MLT,EU-27,0.000048,Freight,Bus,Conventional,Liquid - Fossil
...,...,...,...,...,...,...,...,...,...,...,...,...,...
16145,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2016,AUT,EU-27,0.000567,Freight,Light Truck,Alternative,Alternative
16146,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2007,AUT,EU-27,0.000000,Freight,Medium Truck,Alternative,Alternative
16147,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2011,AUT,EU-27,0.000000,Freight,Medium Truck,Alternative,Alternative
16148,Road,10^6 vehicle / yr,Sales (New Vehicles),United Nations Economic Commission for Europe,Austria,2013,AUT,EU-27,0.000000,Freight,Medium Truck,Alternative,Alternative


# Reordering the columns
    Rule: To comply with the current template, the columns must be shown in a specific order. The dataframe manager specifies the order of the columns

In [39]:
df = dataframeManager.reorder_columns(df)
df

Unnamed: 0,Source,Country,ISO Code,Region,Variable,Unit,Service,Mode,Vehicle Type,Technology,Fuel,Value,Year
0,United Nations Economic Commission for Europe,Malta,MLT,EU-27,Sales (New Vehicles),10^6 vehicle / yr,Freight,Road,Heavy Truck,Conventional,Liquid - Fossil,0.000348,2015
1,United Nations Economic Commission for Europe,Malta,MLT,EU-27,Sales (New Vehicles),10^6 vehicle / yr,Passenger,Road,LDV,Conventional,Liquid - Fossil,0.002792,1998
2,United Nations Economic Commission for Europe,Malta,MLT,EU-27,Sales (New Vehicles),10^6 vehicle / yr,Passenger,Road,LDV,Conventional,Liquid - Fossil,0.005409,2002
3,United Nations Economic Commission for Europe,Malta,MLT,EU-27,Sales (New Vehicles),10^6 vehicle / yr,Freight,Road,Bus,Conventional,Liquid - Fossil,0.000037,2002
4,United Nations Economic Commission for Europe,Malta,MLT,EU-27,Sales (New Vehicles),10^6 vehicle / yr,Freight,Road,Bus,Conventional,Liquid - Fossil,0.000048,2007
...,...,...,...,...,...,...,...,...,...,...,...,...,...
16145,United Nations Economic Commission for Europe,Austria,AUT,EU-27,Sales (New Vehicles),10^6 vehicle / yr,Freight,Road,Light Truck,Alternative,Alternative,0.000567,2016
16146,United Nations Economic Commission for Europe,Austria,AUT,EU-27,Sales (New Vehicles),10^6 vehicle / yr,Freight,Road,Medium Truck,Alternative,Alternative,0.000000,2007
16147,United Nations Economic Commission for Europe,Austria,AUT,EU-27,Sales (New Vehicles),10^6 vehicle / yr,Freight,Road,Medium Truck,Alternative,Alternative,0.000000,2011
16148,United Nations Economic Commission for Europe,Austria,AUT,EU-27,Sales (New Vehicles),10^6 vehicle / yr,Freight,Road,Medium Truck,Alternative,Alternative,0.000000,2013


# Exporting the Programming Friendly version of the data

In [38]:
# Programming Friendly View
dataframeManager.create_programming_friendly_file(df)

# User Friendly View
dataframeManager.create_user_friendly_file(df)

> PF File saved at: /Users/hlinero/Documents/database/item/historical/scripts
> UF File saved at: /Users/hlinero/Documents/database/item/historical/scripts
