In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [4]:
emission_data = pd.read_csv('raw_data/Emissions_Agriculture_Agriculture_total_E_All_Data_(Norm).csv', sep = ',', encoding = 'latin-1')
emission_data

Unnamed: 0,Country Code,Country,Item Code,Item,Element Code,Element,Year Code,Year,Unit,Value,Flag
0,2,Afghanistan,5058,Enteric Fermentation,7231,Emissions (CO2eq),1961,1961,Gigagrams,5054.3459,A
1,2,Afghanistan,5058,Enteric Fermentation,7231,Emissions (CO2eq),1962,1962,Gigagrams,5151.5228,A
2,2,Afghanistan,5058,Enteric Fermentation,7231,Emissions (CO2eq),1963,1963,Gigagrams,5372.3989,A
3,2,Afghanistan,5058,Enteric Fermentation,7231,Emissions (CO2eq),1964,1964,Gigagrams,5440.3650,A
4,2,Afghanistan,5058,Enteric Fermentation,7231,Emissions (CO2eq),1965,1965,Gigagrams,5577.5580,A
...,...,...,...,...,...,...,...,...,...,...,...
326177,5873,OECD,1709,Agricultural Soils,7243,Emissions (CO2eq) from N2O,2012,2012,Gigagrams,507201.5669,A
326178,5873,OECD,1709,Agricultural Soils,7243,Emissions (CO2eq) from N2O,2013,2013,Gigagrams,515750.7919,A
326179,5873,OECD,1709,Agricultural Soils,7243,Emissions (CO2eq) from N2O,2014,2014,Gigagrams,526015.4357,A
326180,5873,OECD,1709,Agricultural Soils,7243,Emissions (CO2eq) from N2O,2030,2030,Gigagrams,525722.7988,A


## Explanation of agriculture emissions datafile

The emissions data in our dataset covers all emissions from agriculture. That is, both the emissions from crops and livestock production. There are different categories where the emissions come from but we are only interested in the total in this project. We will therefor clean out so that we only have the total emissions for agriculture. We are also only interested in CO2 emissions for this project and will therefore clean out the other types of emissions as well. Since we are studying emission on a continents level we will also only keep the data for the different continents.

## Data cleaning

In [5]:
emission_data.Item.unique()

array(['Enteric Fermentation', 'Manure Management', 'Rice Cultivation',
       'Synthetic Fertilizers', 'Manure applied to Soils',
       'Manure left on Pasture', 'Crop Residues',
       'Cultivation of Organic Soils', 'Burning - Crop residues',
       'Burning - Savanna', 'Agriculture total', 'Agricultural Soils'],
      dtype=object)

In [10]:
emissions = emission_data[emission_data.Item.str.contains('Agriculture total')] # Keep only agriculture total emissions

In [18]:
# Keep only continent data
continents = ['Africa', 'Northern America', 'South America', 'Asia', 'Oceania', 'Europe']
emission_continent = emissions[emissions.Country.isin(continents)].rename({'Country': 'Area'}, axis=1)

emission_continent

Unnamed: 0,Country Code,Area,Item Code,Item,Element Code,Element,Year Code,Year,Unit,Value,Flag
277052,5100,Africa,1711,Agriculture total,7231,Emissions (CO2eq),1961,1961,Gigagrams,233438.4994,A
277053,5100,Africa,1711,Agriculture total,7231,Emissions (CO2eq),1962,1962,Gigagrams,237662.8732,A
277054,5100,Africa,1711,Agriculture total,7231,Emissions (CO2eq),1963,1963,Gigagrams,241883.7192,A
277055,5100,Africa,1711,Agriculture total,7231,Emissions (CO2eq),1964,1964,Gigagrams,248689.6524,A
277056,5100,Africa,1711,Agriculture total,7231,Emissions (CO2eq),1965,1965,Gigagrams,255912.4287,A
...,...,...,...,...,...,...,...,...,...,...,...
307686,5500,Oceania,1711,Agriculture total,7243,Emissions (CO2eq) from N2O,2012,2012,Gigagrams,116863.3514,A
307687,5500,Oceania,1711,Agriculture total,7243,Emissions (CO2eq) from N2O,2013,2013,Gigagrams,78615.5455,A
307688,5500,Oceania,1711,Agriculture total,7243,Emissions (CO2eq) from N2O,2014,2014,Gigagrams,89613.8889,A
307689,5500,Oceania,1711,Agriculture total,7243,Emissions (CO2eq) from N2O,2030,2030,Gigagrams,100990.5540,A


In [19]:
# Is any information missing?
print("Missing information in categorized dataset: \n", emission_continent.isna().sum())

Missing information in categorized dataset: 
 Country Code    0
Area            0
Item Code       0
Item            0
Element Code    0
Element         0
Year Code       0
Year            0
Unit            0
Value           0
Flag            0
dtype: int64


In [21]:
# Save to pickle
emission_continent.to_pickle('./data/pickles/agriculture_emissions_continents.pkl')

We also have another emissions file with some categories to easier be able to determine differences in emission between crops and livestock production. This dataset already has countries filtered out and only contains the different continents.

In [3]:
categorized_data = pd.read_csv('raw_data/Emission_data.csv', sep = ',', encoding = 'latin-1')
categorized_data

Unnamed: 0,Domain Code,Domain,Area Code,Area,Element Code,Element,Item Code,Item,Year Code,Year,Unit,Value,Flag,Flag Description
0,EI,Emissions intensities,5100,Africa,7231,Emissions (CO2eq),1718,Cereals excluding rice,1961,1961,gigagrams,6404.6437,A,"Aggregate, may include official, semi-official..."
1,EI,Emissions intensities,5100,Africa,7231,Emissions (CO2eq),1718,Cereals excluding rice,1962,1962,gigagrams,6841.1636,A,"Aggregate, may include official, semi-official..."
2,EI,Emissions intensities,5100,Africa,7231,Emissions (CO2eq),1718,Cereals excluding rice,1963,1963,gigagrams,7246.6931,A,"Aggregate, may include official, semi-official..."
3,EI,Emissions intensities,5100,Africa,7231,Emissions (CO2eq),1718,Cereals excluding rice,1964,1964,gigagrams,7400.0959,A,"Aggregate, may include official, semi-official..."
4,EI,Emissions intensities,5100,Africa,7231,Emissions (CO2eq),1718,Cereals excluding rice,1965,1965,gigagrams,7668.5256,A,"Aggregate, may include official, semi-official..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8155,EI,Emissions intensities,5500,Oceania,5510,Production,1035,"Meat, pig",2012,2012,tonnes,500824.0000,A,"Aggregate, may include official, semi-official..."
8156,EI,Emissions intensities,5500,Oceania,5510,Production,1035,"Meat, pig",2013,2013,tonnes,503040.0000,A,"Aggregate, may include official, semi-official..."
8157,EI,Emissions intensities,5500,Oceania,5510,Production,1035,"Meat, pig",2014,2014,tonnes,506860.0000,A,"Aggregate, may include official, semi-official..."
8158,EI,Emissions intensities,5500,Oceania,5510,Production,1035,"Meat, pig",2015,2015,tonnes,514315.0000,A,"Aggregate, may include official, semi-official..."


In [6]:
categorized_data['Item'].unique()

array(['Cereals excluding rice', 'Rice, paddy', 'Meat, cattle',
       'Milk, whole fresh cow', 'Meat, goat', 'Milk, whole fresh goat',
       'Meat, buffalo', 'Milk, whole fresh buffalo', 'Meat, sheep',
       'Milk, whole fresh sheep', 'Milk, whole fresh camel',
       'Meat, chicken', 'Eggs, hen, in shell', 'Meat, pig'], dtype=object)

In [20]:
sheep_prod = categorized_data[categorized_data.Item.str.contains('sheep')]

sheep_data = sheep_prod.groupby(['Area','Element','Year','Unit']).agg({'Value':'sum'})
sheep_data['Item'] = 'Sheep'

In [21]:
sheep_data.reset_index()

Unnamed: 0,Area,Element,Year,Unit,Value,Item
0,Africa,Emissions (CO2eq),1961,gigagrams,26107.8934,Sheep
1,Africa,Emissions (CO2eq),1962,gigagrams,25742.8884,Sheep
2,Africa,Emissions (CO2eq),1963,gigagrams,25928.7068,Sheep
3,Africa,Emissions (CO2eq),1964,gigagrams,26531.4870,Sheep
4,Africa,Emissions (CO2eq),1965,gigagrams,27693.8435,Sheep
...,...,...,...,...,...,...
667,South America,Production,2012,tonnes,290741.0000,Sheep
668,South America,Production,2013,tonnes,283684.0000,Sheep
669,South America,Production,2014,tonnes,276685.0000,Sheep
670,South America,Production,2015,tonnes,264212.0000,Sheep
