# Modules

In [1]:
import pandas as pd

# Variables

In [2]:
# National emissions reported to the Convention on Long-range Transboundary Air Pollution (LRTAP Convention), 2023
# https://sdi.eea.europa.eu/catalogue/srv/eng/catalog.search#/metadata/2999364f-be52-4012-b4fd-f98e2cc8fab6
ds_NatEmissions='01.Data/eea_t_lrtap-convention_p_1990-2021_v01_r00/CLRTAP_NVFR19_V23_1_GF_csv.csv'

In [3]:
# National emissions reported to the UNFCCC and to the EU Greenhouse Gas Monitoring Mechanism, October 2023
# https://sdi.eea.europa.eu/catalogue/srv/eng/catalog.search#/metadata/e2e7dd1e-0d67-4b20-a0d4-b22c53a59d24
ds_UNFCCEmissions='01.Data/eea_t_national-emissions-reported_p_2023_v01_r01/UNFCCC_v26.csv'

# Load data

In [4]:
df_NatEmissions=pd.read_csv(ds_NatEmissions,header=0,delimiter="\t", dtype='unicode')

In [5]:
df_NatEmissions.head()

Unnamed: 0,Country_Code,Country,Pollutant_name,Format_name,Sector_code,Year,Emissions,Unit,Notation,VersionId,Parent_sector_code,Sector_name
0,AT,Austria,As,NFR 2014-1,1A1a,1990,,Mg,,1,NATIONAL TOTAL,Public electricity and heat production
1,AT,Austria,As,NFR 2014-1,1A1b,1990,,Mg,,1,NATIONAL TOTAL,Petroleum refining
2,AT,Austria,As,NFR 2014-1,1A1c,1990,,Mg,,1,NATIONAL TOTAL,Manufacture of solid fuels and other energy in...
3,AT,Austria,As,NFR 2014-1,1A2a,1990,,Mg,,1,NATIONAL TOTAL,Stationary combustion in manufacturing industr...
4,AT,Austria,As,NFR 2014-1,1A2b,1990,,Mg,,1,NATIONAL TOTAL,Stationary combustion in manufacturing industr...


Let's filter only emissions from domestic and international aviation.

In [15]:
col=df_NatEmissions.Sector_name.unique()
for i in col: 
    if type(i) is str:
        if 'aviation' in i:
            print(i)

International aviation LTO (civil)
Domestic aviation LTO (civil)
International aviation cruise (civil)
Domestic aviation cruise (civil)


In [18]:
df_NatEmissions = df_NatEmissions[
    (df_NatEmissions.Sector_name == 'International aviation LTO (civil)') |
    (df_NatEmissions.Sector_name == 'Domestic aviation LTO (civil)') |
    (df_NatEmissions.Sector_name == 'International aviation cruise (civil)') |
    (df_NatEmissions.Sector_name == 'Domestic aviation cruise (civil)')]
df_NatEmissions.Sector_name.unique()

array(['International aviation LTO (civil)',
       'Domestic aviation LTO (civil)',
       'International aviation cruise (civil)',
       'Domestic aviation cruise (civil)'], dtype=object)

In [20]:
df_NatEmissions.shape

(109824, 12)

In [29]:
def get_var_category(series):
    unique_count = series.nunique(dropna=False)
    total_count = len(series)
    if pd.api.types.is_numeric_dtype(series):
        return 'Numerical'
    elif pd.api.types.is_datetime64_dtype(series):
        return 'Date'
    elif unique_count==total_count:
        return 'Text (Unique)'
    else:
        return 'Categorical'

def print_categories(df):
    for column_name in df.columns:
        print(column_name, ": ", get_var_category(df[column_name]))

In [30]:
print_categories(df_NatEmissions)

Country_Code :  Categorical
Country :  Categorical
Pollutant_name :  Categorical
Format_name :  Categorical
Sector_code :  Categorical
Year :  Categorical
Emissions :  Categorical
Unit :  Categorical
Notation :  Categorical
VersionId :  Categorical
Parent_sector_code :  Categorical
Sector_name :  Categorical


In [23]:
df_NatEmissions.info()

<class 'pandas.core.frame.DataFrame'>
Index: 109824 entries, 11 to 3816372
Data columns (total 12 columns):
 #   Column              Non-Null Count   Dtype 
---  ------              --------------   ----- 
 0   Country_Code        109824 non-null  object
 1   Country             109824 non-null  object
 2   Pollutant_name      109824 non-null  object
 3   Format_name         109824 non-null  object
 4   Sector_code         109824 non-null  object
 5   Year                109824 non-null  object
 6   Emissions           65639 non-null   object
 7   Unit                109824 non-null  object
 8   Notation            0 non-null       object
 9   VersionId           109824 non-null  object
 10  Parent_sector_code  55744 non-null   object
 11  Sector_name         109824 non-null  object
dtypes: object(12)
memory usage: 10.9+ MB


We can see that the column `Emissions` have some null values, let focus this point.

In [28]:
df_NatEmissions[df_NatEmissions.Emissions.isnull()]

array([nan], dtype=object)

In [24]:
df_NatEmissions.describe()

Unnamed: 0,Country_Code,Country,Pollutant_name,Format_name,Sector_code,Year,Emissions,Unit,Notation,VersionId,Parent_sector_code,Sector_name
count,109824,109824,109824,109824,109824,109824,65639,109824,0.0,109824,55744,109824
unique,33,34,26,1,4,32,39595,4,0.0,1,1,4
top,AT,Austria,As,NFR 2014-1,1A3ai(i),1990,0,Mg,,1,NATIONAL TOTAL,International aviation LTO (civil)
freq,3328,3328,4224,109824,27456,3432,17129,59136,,109824,55744,27456


In [25]:
df_NatEmissions.Pollutant_name.unique()

array(['As', 'BC', 'benzo(a)', 'benzo(b)', 'benzo(k)', 'Cd', 'CO', 'Cr',
       'Cu', 'dioxin', 'HCB', 'Hg', 'Indeno', 'NH3', 'Ni', 'NMVOC', 'NOx',
       'Pb', 'PCB', 'PM10', 'PM2.5', 'Se', 'SOx', 'total PAH', 'TSP',
       'Zn'], dtype=object)

In [30]:
df_NatEmissions1 = df_NatEmissions[df_NatEmissions.Pollutant_name == 'CO']

In [36]:
#AND
df_NatEmissions2=df_NatEmissions[(df_NatEmissions.Pollutant_name == 'CO') & (df_NatEmissions.Sector_name == 'International aviation LTO (civil)')]
#OR
df_NatEmissions3=df_NatEmissions[(df_NatEmissions.Pollutant_name == 'CO') | (df_NatEmissions.Sector_name == 'International aviation LTO (civil)')]

In [37]:
#df_NatEmissions3.Pollutant_name.unique()
df_NatEmissions3.Sector_name.unique()

array(['International aviation LTO (civil)',
       'Public electricity and heat production', 'Petroleum refining',
       'Manufacture of solid fuels and other energy industries',
       'Stationary combustion in manufacturing industries and construction: Iron and steel',
       'Stationary combustion in manufacturing industries and construction: Non-ferrous metals',
       'Stationary combustion in manufacturing industries and construction: Chemicals',
       'Stationary combustion in manufacturing industries and construction: Pulp, Paper and Print',
       'Stationary combustion in manufacturing industries and construction: Food processing, beverages and tobacco',
       'Stationary combustion in manufacturing industries and construction: Non-metallic minerals',
       'Mobile Combustion in manufacturing industries and construction',
       'Stationary combustion in manufacturing industries and construction: Other',
       'Domestic aviation LTO (civil)', 'Road transport: Passenger 

In [21]:
input_data=pd.read_csv(ds_UNFCCEmissions,header=0, dtype='unicode')

  input_data=pd.read_csv(ds_UNFCCEmissions,header=0)


In [22]:
input_data.head()

Unnamed: 0,Country_code,Country,Format_name,Pollutant_name,Sector_code,Sector_name,Parent_sector_code,Unit,Year,emissions,Notation,PublicationDate,DataSource
0,AT,Austria,IPCC Common Reporting Format,All greenhouse gases - (CO2 equivalent),-,- 4(IV) Indirect N2O Emissions from Managed S...,4,Gg CO2 equivalent,1994,12.62568,,20231006,EEA
1,AT,Austria,IPCC Common Reporting Format,All greenhouse gases - (CO2 equivalent),-,- 4(IV) Indirect N2O Emissions from Managed S...,4,Gg CO2 equivalent,1999,11.86116,,20231006,EEA
2,AT,Austria,IPCC Common Reporting Format,All greenhouse gases - (CO2 equivalent),-,- 4(IV) Indirect N2O Emissions from Managed S...,4,Gg CO2 equivalent,2010,12.66384,,20231006,EEA
3,AT,Austria,IPCC Common Reporting Format,All greenhouse gases - (CO2 equivalent),-,- 4(IV) Indirect N2O Emissions from Managed S...,4,Gg CO2 equivalent,2011,12.59482,,20231006,EEA
4,AT,Austria,IPCC Common Reporting Format,All greenhouse gases - (CO2 equivalent),1,1 - Energy,Sectors/Totals_incl_incl,Gg CO2 equivalent,1993,52087.32124,,20231006,EEA


In [23]:
input_data.size

8726068