# Modules

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from pandasql import sqldf

# Variables

In [None]:
# National emissions reported to the Convention on Long-range Transboundary Air Pollution (LRTAP Convention), 2023
# https://sdi.eea.europa.eu/catalogue/srv/eng/catalog.search#/metadata/2999364f-be52-4012-b4fd-f98e2cc8fab6
ds_LRTAPemissions = '01.Data/eea_t_lrtap-convention_p_1990-2021_v01_r00/CLRTAP_NVFR19_V23_1_GF_csv.csv'

In [None]:
out_EU27_emissions = '02.Transformed data/01.EU27_emissions.csv'
out_EU27_emissions_Country_Year = '02.Transformed data/01.EU27_emissions_Country_Year.csv'

# Parameters

In [None]:
# European 27 Countries
countryEU27 = ['BE','BG','CZ','DK','DE','EE','IE','GR','ES','FR','HR','IT','CY','LV','LT','LU','HU','MT','NL','AT','PL','PT','RO','SI','SK','FI','SE']

# Load data

In [None]:
df_LRTAPemissions=pd.read_csv(ds_LRTAPemissions,header=0,delimiter="\t", dtype='unicode')

# Manipulate data

## National emissions

### Perimeter of analysis

In [None]:
# Define the sectors of interest
sectors_of_interest = [
    'International aviation LTO (civil)',
    'Domestic aviation LTO (civil)',
    'International aviation cruise (civil)',
    'Domestic aviation cruise (civil)'
]

# Filter the DataFrame for these sectors
df_emissions = df_LRTAPemissions[df_LRTAPemissions['Sector_name'].isin(sectors_of_interest)]

# Filter the DataFrame
df_emissions_Country = df_emissions[(df_emissions.Country_Code != 'EU27')]
df_emissions_Country = df_emissions_Country[(df_emissions_Country.Country_Code != 'EEA32')]

df_emissions = df_emissions[(df_emissions.Country_Code == 'EU27')]

### Management of Nan values

In [None]:
df_emissionsPCB = df_emissions[(df_emissions.Pollutant_name == 'PCB')]

For `Domestic aviation cruise (civil)` and `International aviation cruise (civil)	` we don't have any information so we can't fill the rows with Nan values, we proceed with the analysis of only `nternational aviation LTO (civil)	` rows.

In [None]:
df_emissionsPCB_LTO = df_emissionsPCB[(df_emissionsPCB.Sector_name == 'International aviation LTO (civil)')]
df_emissionsPCB_LTO = df_emissionsPCB_LTO.ffill()

In [None]:
# Delete rows from Pollutant_name == 'PCB' AND Sector_name == 'International aviation LTO (civil)'
df_emissions = df_emissions[~((df_emissions.Pollutant_name == 'PCB') & (df_emissionsPCB.Sector_name == 'International aviation LTO (civil)'))]

# Add df_emissionsPCB_LTO to df_emissions
df_emissions = pd.concat([df_emissions, df_emissionsPCB_LTO], axis=0)

df_emissionsPCB = df_emissions[(df_emissions.Pollutant_name == 'PCB')]
df_emissionsPCB.groupby(['Sector_name']).agg({'Emissions': lambda x: x.isnull().sum()})

### Pivot data

In [None]:
def transform_df(df):
    # Group by Year and Pollutant_name, then sum the Emissions
    grouped = df.groupby(['Year', 'Pollutant_name'])['Emissions'].sum()

    # Pivot the table
    pivoted_df = grouped.unstack(level='Pollutant_name')

    # Reset index to make 'Year' a column
    pivoted_df.reset_index(inplace=True)

    return pivoted_df

In [None]:
# Use the function on your DataFrame
df_emissionsPV = transform_df(df_emissions)

### Moving average

In [None]:
pollutants = []

# Iterate over column names
for column in df_emissionsPV:
    if column != 'Year':
        df_emissionsPV['rolling_'+column] = df_emissionsPV[column].rolling(5).mean()
        pollutants.append(column)