# Creating a CSV file holding all appropriate data for Comune di Milano
#### Initialising the dataframe

In [85]:
import pandas as pd

date_range = pd.date_range(start="2017-01-01", end="2024-12-01", freq="MS") 

df = pd.DataFrame({
    "row_id": range(len(date_range)),
    "date": date_range.strftime("%Y-%m")
})

df.head()

Unnamed: 0,row_id,date
0,0,2017-01
1,1,2017-02
2,2,2017-03
3,3,2017-04
4,4,2017-05


#### Adding data for the pollutants

In [86]:
pollutants = ["C6H6", "CO_8h", "NO2", "O3", "PM10", "PM25", "SO2"]

for pol in pollutants:
    df[pol] = pd.NA

for i in range(1, 9):
    filename = f"../original-datasets/ds5-{i}.csv"
    temp = pd.read_csv(filename, sep=";")

    temp["inquinante"] = temp["inquinante"].str.strip()
    temp["data"] = pd.to_datetime(temp["data"], format="%Y-%m-%d")
    temp["month"] = temp["data"].dt.strftime("%Y-%m")

    monthly_means = temp.groupby(["month", "inquinante"])["valore"].mean().reset_index()
    pivoted = monthly_means.pivot(index="month", columns="inquinante", values="valore").reset_index()

    df_merged = df.merge(pivoted, how="left", left_on="date", right_on="month", suffixes=('', '_new'))

    for pol in pollutants:
        new_col = f"{pol}_new"
        if new_col in df_merged.columns:
            df_merged[pol] = df_merged[pol].combine_first(df_merged[new_col])

    df = df_merged.drop(columns=[col for col in df_merged.columns if col.endswith('_new') or col == "month"])

df.head()


  df_merged[pol] = df_merged[pol].combine_first(df_merged[new_col])


Unnamed: 0,row_id,date,C6H6,CO_8h,NO2,O3,PM10,PM25,SO2
0,0,2017-01,3.746429,1.930882,108.92437,30.117647,68.744681,53.870968,3.96875
1,1,2017-02,3.363889,1.644737,83.210884,26.035714,70.789474,52.131579,5.9
2,2,2017-03,1.93012,1.231765,95.21875,74.424242,46.333333,30.973684,6.6
3,3,2017-04,0.85098,0.885294,71.869231,94.098039,28.604167,22.0,5.0
4,4,2017-05,0.872727,0.992208,69.894737,97.846154,20.238095,12.404762,4.416667


#### Low emission bus data

In [87]:
bus_data = pd.read_csv("../original-datasets/ds6.csv", sep=";")
bus_data.columns = bus_data.columns.str.strip()

cols_to_add = [
    "Autobus a basse emissioni - indicatore per 100 autobus",
    "Autobus a basse emissioni - valore assoluto"
]

for col in cols_to_add:
    df[col] = pd.NA

months_per_year = 12

for idx, row in bus_data.iterrows():
    start_idx = idx * months_per_year
    end_idx = start_idx + months_per_year
    for col in cols_to_add:
        df.loc[start_idx:end_idx - 1, col] = row[col]

df.head()


Unnamed: 0,row_id,date,C6H6,CO_8h,NO2,O3,PM10,PM25,SO2,Autobus a basse emissioni - indicatore per 100 autobus,Autobus a basse emissioni - valore assoluto
0,0,2017-01,3.746429,1.930882,108.92437,30.117647,68.744681,53.870968,3.96875,0.4,5.0
1,1,2017-02,3.363889,1.644737,83.210884,26.035714,70.789474,52.131579,5.9,0.4,5.0
2,2,2017-03,1.93012,1.231765,95.21875,74.424242,46.333333,30.973684,6.6,0.4,5.0
3,3,2017-04,0.85098,0.885294,71.869231,94.098039,28.604167,22.0,5.0,0.4,5.0
4,4,2017-05,0.872727,0.992208,69.894737,97.846154,20.238095,12.404762,4.416667,0.4,5.0


#### Public bus emission classes

In [88]:
ds7 = pd.read_csv("../original-datasets/ds7.csv", sep=";")
ds7.columns = ds7.columns.str.strip()

emission_classes = ["Bus Euro 4 o inferiore", "Bus Euro 5", "Bus Euro 6"]

for col in emission_classes:
    df[col] = pd.NA

months_per_year = 12

for idx, row in ds7.iterrows():
    year = row["Anno"]
    emission_class = "Bus " + row["Classe di emissione"].strip()
    value = row["Percentuale autobus utilizzati"]

    start_idx = (year - 2017) * months_per_year
    end_idx = start_idx + months_per_year

    df.loc[start_idx:end_idx - 1, emission_class] = value

df.loc[0:11, ["date"] + emission_classes]


Unnamed: 0,date,Bus Euro 4 o inferiore,Bus Euro 5,Bus Euro 6
0,2017-01,50.2,31.8,18.0
1,2017-02,50.2,31.8,18.0
2,2017-03,50.2,31.8,18.0
3,2017-04,50.2,31.8,18.0
4,2017-05,50.2,31.8,18.0
5,2017-06,50.2,31.8,18.0
6,2017-07,50.2,31.8,18.0
7,2017-08,50.2,31.8,18.0
8,2017-09,50.2,31.8,18.0
9,2017-10,50.2,31.8,18.0


#### Annual public transport passengers

In [89]:
ds8 = pd.read_csv("../original-datasets/ds8.csv", sep=";")
ds8.columns = ds8.columns.str.strip()

tpl_column = "Domanda di TPL - Passeggeri annui per abitante"

df[tpl_column] = pd.NA

months_per_year = 12

for idx, row in ds8.iterrows():
    year = row["Anno"]
    value = row[tpl_column]

    start_idx = (year - 2017) * months_per_year
    end_idx = start_idx + months_per_year

    df.loc[start_idx:end_idx - 1, tpl_column] = value

df.loc[0:11, ["date", tpl_column]]


Unnamed: 0,date,Domanda di TPL - Passeggeri annui per abitante
0,2017-01,480.1
1,2017-02,480.1
2,2017-03,480.1
3,2017-04,480.1
4,2017-05,480.1
5,2017-06,480.1
6,2017-07,480.1
7,2017-08,480.1
8,2017-09,480.1
9,2017-10,480.1


#### Number of vehicles in the city per type

In [90]:
ds9 = pd.read_csv("../original-datasets/ds9.csv", sep=";")
ds9.columns = ds9.columns.str.strip()

vehicle_columns = [col for col in ds9.columns if col != "Anno"]

for col in vehicle_columns:
    df[col] = pd.NA

months_per_year = 12

for idx, row in ds9.iterrows():
    year = row["Anno"]
    start_idx = (year - 2017) * months_per_year
    end_idx = start_idx + months_per_year

    if end_idx > len(df):
        end_idx = len(df)

    for col in vehicle_columns:
        df.loc[start_idx:end_idx - 1, col] = row[col]

df.loc[0:11, ["date"] + vehicle_columns[:3]]


Unnamed: 0,date,AUTOBUS,AUTOCARRI TRASPORTO MERCI,AUTOVEICOLI SPECIALI - SPECIFICI
0,2017-01,2630,63212,11111
1,2017-02,2630,63212,11111
2,2017-03,2630,63212,11111
3,2017-04,2630,63212,11111
4,2017-05,2630,63212,11111
5,2017-06,2630,63212,11111
6,2017-07,2630,63212,11111
7,2017-08,2630,63212,11111
8,2017-09,2630,63212,11111
9,2017-10,2630,63212,11111


#### Display all data for checking the correctness

In [91]:
import numpy as np
df = df.applymap(lambda x: np.nan if pd.isna(x) else x)
df.head(96)

  df = df.applymap(lambda x: np.nan if pd.isna(x) else x)


Unnamed: 0,row_id,date,C6H6,CO_8h,NO2,O3,PM10,PM25,SO2,Autobus a basse emissioni - indicatore per 100 autobus,...,AUTOCARRI TRASPORTO MERCI,AUTOVEICOLI SPECIALI - SPECIFICI,AUTOVETTURE,MOTOCARRI E QUADRICICLI TRASPORTO MERCI,MOTOCICLI,MOTOVEICOLI E QUADRICICLI SPECIALI - SPECIFICI,RIMORCHI E SEMIRIMORCHI SPECIALI - SPECIFICI,RIMORCHI E SEMIRIMORCHI TRASPORTO MERCI,TRATTORI STRADALI O MOTRICI,ALTRI VEICOLI
0,0,2017-01,3.746429,1.930882,108.924370,30.117647,68.744681,53.870968,3.968750,0.4,...,63212.0,11111.0,700723.0,1111.0,166029.0,1464.0,2536.0,4433.0,2872.0,0.0
1,1,2017-02,3.363889,1.644737,83.210884,26.035714,70.789474,52.131579,5.900000,0.4,...,63212.0,11111.0,700723.0,1111.0,166029.0,1464.0,2536.0,4433.0,2872.0,0.0
2,2,2017-03,1.930120,1.231765,95.218750,74.424242,46.333333,30.973684,6.600000,0.4,...,63212.0,11111.0,700723.0,1111.0,166029.0,1464.0,2536.0,4433.0,2872.0,0.0
3,3,2017-04,0.850980,0.885294,71.869231,94.098039,28.604167,22.000000,5.000000,0.4,...,63212.0,11111.0,700723.0,1111.0,166029.0,1464.0,2536.0,4433.0,2872.0,0.0
4,4,2017-05,0.872727,0.992208,69.894737,97.846154,20.238095,12.404762,4.416667,0.4,...,63212.0,11111.0,700723.0,1111.0,166029.0,1464.0,2536.0,4433.0,2872.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,91,2024-08,,,,,,,,,...,,,,,,,,,,
92,92,2024-09,,,,,,,,,...,,,,,,,,,,
93,93,2024-10,1.206061,0.855682,48.172727,46.568182,25.486111,15.174603,3.954545,,...,,,,,,,,,,
94,94,2024-11,2.166667,1.408333,61.533333,27.416667,44.424242,30.196078,6.055556,,...,,,,,,,,,,


#### Export to CSV

In [92]:
df.to_csv("../mashup-datasets/MD2.csv", index=False)