# CO2 Emission Data Exploration
In this notebook, we will explore the CO2 emissions dataset to get a better understanding of its structure, missing values, and general trends. 

In [56]:
# Import necessary libraries
import plotly.graph_objs as go

import pandas as pd
%matplotlib inline

## Load the Dataset

In [57]:
# Load the dataset
data_path = '../data/cleaned/co2_emissions_cleaned.csv'
df = pd.read_csv(data_path, skiprows=4)
df.head()

Unnamed: 0,Africa Western and Central,AFW,CO2 emissions (kt),EN.ATM.CO2E.KT,97190.345,110559.9338,121628.003,114995.869,106198.491,116198.614,...,165498.4949,170350.67,186486.66,196343.63,194363.64,201399.22,200550.9,210618.89,222990.35,215915.61
0,Angola,AGO,CO2 emissions (kt),EN.ATM.CO2E.KT,6564.2,6674.5,6877.3,9269.7,11298.23,12719.48,...,23865.8,23868.0,26958.7,29610.5,31648.9,29520.7,25064.8,23637.4,24382.9,19814.5
1,Albania,ALB,CO2 emissions (kt),EN.ATM.CO2E.KT,6060.5,4119.6,2239.3,2078.4,2084.7,1951.1,...,5136.7,4541.8,4795.4,5188.0,4797.0,4573.2,5403.7,5316.1,4993.3,4383.2
2,Andorra,AND,CO2 emissions (kt),EN.ATM.CO2E.KT,406.704,406.704,406.704,410.368,406.704,425.024,...,490.976,487.312,476.32,461.664,465.328,468.992,465.328,494.64,479.984,448.8844
3,Arab World,ARB,CO2 emissions (kt),EN.ATM.CO2E.KT,632472.87,634670.889,688969.38,741817.84,786209.54,810180.27,...,1567405.8,1674146.7,1706820.79,1766583.07,1809147.15,1827587.97,1843231.7,1826351.9,1845846.37,1765053.0
4,United Arab Emirates,ARE,CO2 emissions (kt),EN.ATM.CO2E.KT,55210.4,61564.1,59702.8,63620.2,70779.6,75166.7,...,166631.5,175687.9,184960.8,186639.8,195409.4,200398.5,191935.0,174220.3,185645.7,188088.7


## Check for Missing Values

In [58]:
# Check for missing values
df.isnull().sum()

Africa Western and Central    0
AFW                           0
CO2 emissions (kt)            0
EN.ATM.CO2E.KT                0
97190.345                     0
110559.9338                   0
121628.003                    0
114995.869                    0
106198.491                    0
116198.614                    0
134252.745                    0
134320.486                    0
129455.595                    0
131960.884                    0
140653.453                    0
147879.42                     0
138656.59                     0
148185.467                    0
149751.265                    0
152041.539                    0
144266.808                    0
139839.036                    0
147214.73                     0
141983.85                     0
159573.957                    0
165498.4949                   0
170350.67                     0
186486.66                     0
196343.63                     0
194363.64                     0
201399.22                     0
200550.9

## Summary Statistics

In [59]:
# Display summary statistics of the dataset
df.describe()

Unnamed: 0,97190.345,110559.9338,121628.003,114995.869,106198.491,116198.614,134252.745,134320.486,129455.595,131960.884,...,165498.4949,170350.67,186486.66,196343.63,194363.64,201399.22,200550.9,210618.89,222990.35,215915.61
count,262.0,262.0,262.0,262.0,262.0,262.0,262.0,262.0,262.0,262.0,...,262.0,262.0,262.0,262.0,262.0,262.0,262.0,262.0,262.0,262.0
mean,809931.3,775227.7,774316.7,778195.3,780481.8,803959.8,819983.5,833298.4,838184.2,841271.0,...,1229094.0,1248205.0,1274341.0,1279529.0,1270300.0,1273744.0,1295802.0,1331537.0,1331785.0,1274737.0
std,2489040.0,2470884.0,2465210.0,2476488.0,2486337.0,2553740.0,2605215.0,2649285.0,2663061.0,2673592.0,...,3901674.0,3963306.0,4057108.0,4075074.0,4041290.0,4049179.0,4123389.0,4253303.0,4271267.0,4126008.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.2,9.8,8.7,8.5,7.7,7.5,7.7,8.8,8.9,6.6
25%,1793.041,1317.048,1482.633,1487.875,1663.275,1793.443,1901.95,1779.428,2017.6,2020.56,...,3111.6,3437.601,3776.3,4170.275,4281.0,4257.3,4761.9,5035.725,4905.7,4421.875
50%,21497.8,20344.75,17657.4,16618.5,16078.1,16526.29,17152.65,17995.85,18733.5,19242.8,...,26367.5,27932.55,28213.45,26677.1,26162.95,27176.4,26241.3,27037.95,26307.8,24327.8
75%,178831.9,161052.6,161165.9,158997.8,158158.9,167139.5,175101.4,177704.8,168643.1,174870.9,...,281374.3,275402.8,268871.8,271824.6,282208.5,291830.9,301361.7,301746.1,299158.3,267154.7
max,21284040.0,21440490.0,21390040.0,21531830.0,21676890.0,22299200.0,22778980.0,23202620.0,23365550.0,23530420.0,...,33079720.0,33460090.0,34119890.0,34261370.0,34070180.0,34145650.0,34687840.0,35560560.0,35477250.0,33566430.0


## Visualize Trends Over Time

In [60]:
df.columns


Index(['Africa Western and Central', 'AFW', 'CO2 emissions (kt)',
       'EN.ATM.CO2E.KT', '97190.345', '110559.9338', '121628.003',
       '114995.869', '106198.491', '116198.614', '134252.745', '134320.486',
       '129455.595', '131960.884', '140653.453', '147879.42', '138656.59',
       '148185.467', '149751.265', '152041.539', '144266.808', '139839.036',
       '147214.73', '141983.85', '159573.957', '165498.4949', '170350.67',
       '186486.66', '196343.63', '194363.64', '201399.22', '200550.9',
       '210618.89', '222990.35', '215915.61'],
      dtype='object')

## Further Exploration Ideas
- Investigate emissions for other countries.
- Analyze the countries with the highest and lowest emissions.
- Check if there's a trend in global emissions over time.