In [1]:
import pandas as pd
from pprint import pprint
from util.DatasetManager import DatasetManager
from util.DataframeManager import DataframeManager
from util.CountryCodeManager import CountryCodeManager

# Variables used all over the notebook and *not changed*

In [2]:
DATASET_ID = "T004"
datasetManager = DatasetManager(DATASET_ID)
countryCodeManager = CountryCodeManager()

# Opening the dataset and getting general info

In [3]:
filename = "T004_new-road-vehicle-registrations-by-vehicle-category-and-fuel-type.csv"
path = "/Users/hlinero/Desktop/iTEM Material/raw dataset/T004/{}".format(filename)
df = datasetManager.get_dataframe_from_csv_file(path, ";")
df

Unnamed: 0,Country,Date,Frequency,Fuel type,Type of vehicle,Value
0,Malta,2015,Annual,Diesel,New lorries (vehicle wt over 3500 kg),348.0
1,Malta,1998,Annual,Diesel,New passenger cars,2792.0
2,Malta,2002,Annual,Diesel,New passenger cars,5409.0
3,Malta,2002,Annual,Diesel,"New motor coaches, buses and trolley buses",37.0
4,Malta,2007,Annual,Diesel,"New motor coaches, buses and trolley buses",48.0
...,...,...,...,...,...,...
16145,Austria,2016,Annual,Alternative (total),New light goods vehicles,567.0
16146,Austria,2007,Annual,Alternative (total),New road tractors,0.0
16147,Austria,2011,Annual,Alternative (total),New road tractors,0.0
16148,Austria,2013,Annual,Alternative (total),New road tractors,0.0


# Removing unnecessary columns
    Rule: To comply with the latest template, we are dropping unnecessary columns.

In [4]:
# We are dropping the "Fruequncy" column because its value is not part of the template
df.drop(columns=["Frequency"], inplace=True)
df

Unnamed: 0,Country,Date,Fuel type,Type of vehicle,Value
0,Malta,2015,Diesel,New lorries (vehicle wt over 3500 kg),348.0
1,Malta,1998,Diesel,New passenger cars,2792.0
2,Malta,2002,Diesel,New passenger cars,5409.0
3,Malta,2002,Diesel,"New motor coaches, buses and trolley buses",37.0
4,Malta,2007,Diesel,"New motor coaches, buses and trolley buses",48.0
...,...,...,...,...,...
16145,Austria,2016,Alternative (total),New light goods vehicles,567.0
16146,Austria,2007,Alternative (total),New road tractors,0.0
16147,Austria,2011,Alternative (total),New road tractors,0.0
16148,Austria,2013,Alternative (total),New road tractors,0.0


# Adding the 'Source' column
    Rule: Add the same source to all rows since all data comes from the same source

In [5]:
DataframeManager.simple_column_insert(df,"Source","United Nations Economic Commission for Europe")
df

Unnamed: 0,Source,Country,Date,Fuel type,Type of vehicle,Value
0,United Nations Economic Commission for Europe,Malta,2015,Diesel,New lorries (vehicle wt over 3500 kg),348.0
1,United Nations Economic Commission for Europe,Malta,1998,Diesel,New passenger cars,2792.0
2,United Nations Economic Commission for Europe,Malta,2002,Diesel,New passenger cars,5409.0
3,United Nations Economic Commission for Europe,Malta,2002,Diesel,"New motor coaches, buses and trolley buses",37.0
4,United Nations Economic Commission for Europe,Malta,2007,Diesel,"New motor coaches, buses and trolley buses",48.0
...,...,...,...,...,...,...
16145,United Nations Economic Commission for Europe,Austria,2016,Alternative (total),New light goods vehicles,567.0
16146,United Nations Economic Commission for Europe,Austria,2007,Alternative (total),New road tractors,0.0
16147,United Nations Economic Commission for Europe,Austria,2011,Alternative (total),New road tractors,0.0
16148,United Nations Economic Commission for Europe,Austria,2013,Alternative (total),New road tractors,0.0


# Renaming the column "Date" to "Year"
    Rule: To comply with the current version of the template, the columns showing year values must be called "Year"

In [6]:
DataframeManager.rename_column(df, "Date", "Year")
df

Unnamed: 0,Source,Country,Year,Fuel type,Type of vehicle,Value
0,United Nations Economic Commission for Europe,Malta,2015,Diesel,New lorries (vehicle wt over 3500 kg),348.0
1,United Nations Economic Commission for Europe,Malta,1998,Diesel,New passenger cars,2792.0
2,United Nations Economic Commission for Europe,Malta,2002,Diesel,New passenger cars,5409.0
3,United Nations Economic Commission for Europe,Malta,2002,Diesel,"New motor coaches, buses and trolley buses",37.0
4,United Nations Economic Commission for Europe,Malta,2007,Diesel,"New motor coaches, buses and trolley buses",48.0
...,...,...,...,...,...,...
16145,United Nations Economic Commission for Europe,Austria,2016,Alternative (total),New light goods vehicles,567.0
16146,United Nations Economic Commission for Europe,Austria,2007,Alternative (total),New road tractors,0.0
16147,United Nations Economic Commission for Europe,Austria,2011,Alternative (total),New road tractors,0.0
16148,United Nations Economic Commission for Europe,Austria,2013,Alternative (total),New road tractors,0.0


# Determining the vehicle types available in the dataset


In [7]:
list(set(df["Type of vehicle"]))

['New passenger cars',
 'New lorries (vehicle wt over 3500 kg)',
 'New road tractors',
 'New light goods vehicles',
 'New motor coaches, buses and trolley buses']

# Determining the available fuel types in the dataset

In [8]:
list(set(df["Fuel type"]))

['- Biodiesel',
 'Total',
 '- Electricity',
 '- Hybrid electric-diesel',
 '- Hydrogen and fuel cells',
 '- LPG',
 '- Compressed natural gas (CNG)',
 '- Diesel (excluding hybrids)',
 'Diesel',
 '- Plug-in hybrid petrol-electric',
 '- Liquefied natural gas (LNG)',
 '- Hybrid electric-petrol',
 'Alternative (total)',
 '- Petrol (excluding hybrids)',
 'Petrol',
 '- Plug-in hybrid diesel-electric',
 '- Bioethanol',
 '- Bi-fuel vehicles']