In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

file_path = "data/Indicator_1_1_annual_6562429754166382300.csv"
df = pd.read_csv(file_path)

In [6]:
print("Shape:", df.shape)
print("\nColumns:", df.columns.tolist())
print("\nMissing values:\n", df.isnull().sum())
print("\nUnique countries:", df['Country'].nunique())
print("\nUnique industries:", df['Industry'].nunique())
print("\nUnique gas types:", df['Gas Type'].nunique())

Shape: (1194, 28)

Columns: ['ObjectId2', 'Country', 'ISO2', 'ISO3', 'Indicator', 'Unit', 'Source', 'CTS Code', 'CTS Name', 'CTS Full Descriptor', 'Industry', 'Gas Type', 'Seasonal Adjustment', 'Scale', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023']

Missing values:
 ObjectId2                 0
Country                   0
ISO2                   1194
ISO3                      0
Indicator                 0
Unit                      0
Source                    0
CTS Code                  0
CTS Name                  0
CTS Full Descriptor       0
Industry                  0
Gas Type                  0
Seasonal Adjustment       0
Scale                     0
2010                      0
2011                      0
2012                      0
2013                      0
2014                      0
2015                      0
2016                      0
2017                      0
2018                      0
2019                   

In [7]:
df.head(3)

Unnamed: 0,ObjectId2,Country,ISO2,ISO3,Indicator,Unit,Source,CTS Code,CTS Name,CTS Full Descriptor,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,1,Advanced Economies,,AETMP,Annual greenhouse gas (GHG) air emissions acco...,Million metric tons of CO2 equivalent,Organisation for Economic Co-operation and Dev...,ECNGA,Greenhouse Gas Emissions (GHG); Air Emissions ...,"Environment, Climate Change, Greenhouse Gas Em...",...,173.567963,177.091399,176.675451,178.385861,267.731429,264.950269,259.339703,264.885635,263.950294,261.488111
1,2,Advanced Economies,,AETMP,Annual greenhouse gas (GHG) air emissions acco...,Million metric tons of CO2 equivalent,Organisation for Economic Co-operation and Dev...,ECNGA,Greenhouse Gas Emissions (GHG); Air Emissions ...,"Environment, Climate Change, Greenhouse Gas Em...",...,1.167994,1.216524,1.177738,1.202901,1.078726,1.057855,1.056469,1.017692,0.986116,0.951482
2,3,Advanced Economies,,AETMP,Annual greenhouse gas (GHG) air emissions acco...,Million metric tons of CO2 equivalent,Organisation for Economic Co-operation and Dev...,ECNGA,Greenhouse Gas Emissions (GHG); Air Emissions ...,"Environment, Climate Change, Greenhouse Gas Em...",...,1366.936222,1376.562102,1364.670233,1371.16541,1508.188585,1473.062889,1442.419245,1453.734142,1433.944153,1426.65117


In [14]:
df.columns

Index(['ObjectId2', 'Country', 'ISO2', 'ISO3', 'Indicator', 'Unit', 'Source',
       'CTS Code', 'CTS Name', 'CTS Full Descriptor', 'Industry', 'Gas Type',
       'Seasonal Adjustment', 'Scale', '2010', '2011', '2012', '2013', '2014',
       '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023'],
      dtype='object')

In [8]:
countries = df["Country"].unique()
print("Number of countries:", len(countries))
print(countries)

Number of countries: 25
['Advanced Economies' 'Africa' 'Americas' 'Asia'
 'Australia and New Zealand' 'Central Asia' 'Eastern Asia'
 'Eastern Europe' 'Emerging and Developing Economies' 'Europe' 'G20' 'G7'
 'Latin America and the Caribbean' 'Northern Africa' 'Northern America'
 'Northern Europe' 'Oceania' 'Other Oceania sub-regions'
 'South-eastern Asia' 'Southern Asia' 'Southern Europe'
 'Sub-Saharan Africa' 'Western Asia' 'Western Europe' 'World']


In [9]:
counts = (
    df["Country"]
    .value_counts()
    .reset_index()
    .rename(columns={"index": "Country", "Country": "Row_Count"})
)

print("Total unique regions:", len(counts))
counts

Total unique regions: 25


Unnamed: 0,Country,Row_Count
0,Advanced Economies,50
1,Europe,50
2,Western Europe,50
3,Western Asia,50
4,Southern Europe,50
5,Oceania,50
6,Northern Europe,50
7,G7,50
8,G20,50
9,World,50


In [10]:
counts = (
    df["ISO3"]
    .value_counts()
    .reset_index()
    .rename(columns={"index": "ISO3", "ISO3": "Row_Count"})
)

print("Total unique regions:", len(counts))
counts

Total unique regions: 25


Unnamed: 0,ISO3,Row_Count
0,AETMP,50
1,EURTMP,50
2,NAWE,50
3,NAWA,50
4,NASE,50
5,OCETMP,50
6,NANE,50
7,NA119,50
8,NA120,50
9,WLD,50


In [11]:
print("Indicators:", df["Indicator"].unique()[:3])
print("Gas types:", df["Gas Type"].unique())
print("Industries:", df["Industry"].unique()[:5])

Indicators: ['Annual greenhouse gas (GHG) air emissions accounts']
Gas types: ['Carbon dioxide' 'Fluorinated gases' 'Greenhouse gas' 'Methane'
 'Nitrous oxide']
Industries: ['Agriculture, Forestry and Fishing' 'Construction'
 'Electricity, Gas, Steam and Air Conditioning Supply' 'Manufacturing'
 'Mining']


In [12]:
counts = (
    df["Industry"]
    .value_counts()
    .reset_index()
    .rename(columns={"index": "Industry", "Industry": "Row_Count"})
)

print("Total unique regions:", len(counts))
counts

Total unique regions: 10


Unnamed: 0,Industry,Row_Count
0,Manufacturing,125
1,Other Services Industries,125
2,Total Industry and Households,125
3,Transportation and Storage,120
4,Total Households,119
5,"Agriculture, Forestry and Fishing",116
6,Construction,116
7,"Electricity, Gas, Steam and Air Conditioning S...",116
8,Mining,116
9,"Water supply; sewerage, waste management and r...",116


In [13]:
counts = (
    df["Gas Type"]
    .value_counts()
    .reset_index()
    .rename(columns={"index": "Gas Type", "Gas Type": "Row_Count"})
)

print("Total unique regions:", len(counts))
counts


Total unique regions: 5


Unnamed: 0,Gas Type,Row_Count
0,Carbon dioxide,250
1,Greenhouse gas,250
2,Methane,250
3,Nitrous oxide,250
4,Fluorinated gases,194
