In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import plotly.express as px

%store -r years

%matplotlib inline

In [2]:
em_key_df = pd.read_csv('Data/total_emissions_key.csv')
em_key_df.head()

Unnamed: 0,Food product,Total_emissions,Total Emissions per Tonne
0,Wheat & Rye (Bread),1.4,1400.0
1,Maize (Meal),1.1,1100.0
2,Barley (Beer),1.1,1100.0
3,Oatmeal,1.6,1600.0
4,Rice,4.0,4000.0


# Link Databases

In [3]:
comp_dict = {'Wheat & Rye (Bread)' : ['Wheat and products', 'Rye and products'],
            'Maize (Meal)' : ['Maize and products'],
            'Barley (Beer)' : ['Barley and products', 'Beer'],
            'Oatmeal' : ['Oats'],
            'Rice' : ['Rice (Milled Equivalent)', 'Ricebran Oil'],
            'Potatoes' : ['Potatoes and products', 'Sweet potatoes', 'Yams'],
            'Cassava' : ['Cassava and products'],
            'Cane Sugar' : ['Sugar cane', 'Sugar (Raw Equivalent)', 'Sugar & Sweeteners', 'Sugar non-centrifugal'],
            'Beet Sugar' : ['Sugar beet', 'Sweeteners, Other', 'Sugar Crops'],
            'Other Pulses' : ['Pulses, Other and products', 'Pulses', 'Beans', 'Millet and products', 'Cereals, Other'],
            'Peas' : ['Peas'],
            'Nuts' : ['Nuts and products', 'Treenuts'],
            'Groundnuts' : ['Groundnut Oil', 'Groundnuts (Shelled Eq)'],
            'Soymilk' : [],
            'Tofu' : [],
            'Soybean Oil' : ['Soyabean Oil'],
            'Palm Oil' : ['Palm Oil', 'Palm kernels', 'Palmkernel Oil'],
            'Sunflower Oil' : ['Sunflowerseed Oil'],
            'Rapeseed Oil' : ['Rape and Mustard Oil'],
            'Olive Oil' : ['Olive Oil'],
            'Tomatoes' : ['Tomatoes and products'],
            'Onions & Leeks' : ['Onions'],
            'Root Vegetables' : ['Starchy Roots', 'Roots, Other'],
            'Brassicas' : ['Rape and Mustardseed'],
            'Other Vegetables' : ['Vegetables, Other', 'Vegetables'],
            'Citrus Fruit' : ['Oranges, Mandarines', 'Citrus, Other', 'Lemons, Limes and products', 'Grapefruit and products'],
            'Bananas' : ['Bananas', 'Plantains'],
            'Apples' : ['Apples and products'],
            'Berries & Grapes' : ['Grapes and products (excl wine)'],
            'Wine' : ['Wine', 'Beverages, Alcoholic', 'Alcoholic Beverages', 'Beverages, Fermented'],
            'Other Fruit' : ['Fruits, Other', 'Fruits - Excluding Wine', 'Pineapples and products', 'Dates'],
            'Coffee' : ['Coffee and products'],
            'Dark Chocolate' : ['Cocoa Beans and products'],
            'Beef (beef herd)' : ['Bovine Meat'],
            'Beef (dairy herd)' : [],
            'Lamb & Mutton' : ['Mutton & Goat Meat'],
            'Pig Meat' : ['Pigmeat'],
            'Poultry Meat' : ['Poultry Meat'],
            'Milk' : ['Butter, Ghee', 'Cream', 'Milk - Excluding Butter'],
            'Cheese' : [],
            'Eggs' : ['Eggs'],
            'Fish (farmed)' : ['Freshwater Fish'],
            'Shrimps (farmed)' : ['Crustaceans']}

In [4]:
other_items = ['Honey', 'Coconuts - Incl Copra', 'Sesame seed', 'Olives (including preserved)', 'Cottonseed Oil', 
               'Sesameseed Oil', 'Oilcrops Oil, Other', 'Tea (including mate)', 'Pepper', 'Spices, Other', 
               'Meat, Other', 'Offals, Edible', 'Fats, Animals, Raw', 'Infant food', 'Cereals - Excluding Beer',
               'Oilcrops', 'Vegetable Oils', 'Stimulants', 'Spices', 'Meat', 'Offals', 'Animal fats', 
               'Fish, Seafood', 'Miscellaneous', 'Sorghum and products', 'Soyabeans', 'Oilcrops, Other', 'Maize Germ Oil', 
               'Pimento', 'Fish, Body Oil', 'Fish, Liver Oil', 'Demersal Fish', 'Pelagic Fish', 'Marine Fish, Other', 
               'Cephalopods', 'Molluscs, Other', 'Aquatic Animals, Others', 'Aquatic Plants', 'Aquatic Products, Other', 
               'Coconut Oil', 'Cloves', 'Sunflower seed', 'Cottonseed', 'Meat, Aquatic Mammals']

In [5]:
food_df = pd.read_csv('Data/who_eats_food_we_grow.csv', encoding = "ISO-8859-1")
food_df.fillna(0.0, inplace=True)
food_df.shape

(21477, 63)

In [6]:
def drop_other_items():
    index_to_drop = []
    for index, item in enumerate(food_df['Item']):
        if item in other_items:
            index_to_drop.append(index)
        else:
            continue
    print("Number of rows dropped: ", len(index_to_drop))
    food_df.drop(index_to_drop, inplace=True)

In [7]:
drop_other_items()

Number of rows dropped:  8017


In [8]:
def get_item_list():
    item_list = []
    for item in food_df['Item']:
        for key in comp_dict.keys():
            if item in comp_dict[key]:
                item_list.append(key)
            else: 
                continue
    return item_list

In [9]:
food_df['Enviro Key'] = get_item_list()
food_df.drop(columns=['Area Code', 'Item Code', 'Element Code', 'Unit', 'latitude', 'longitude'], 
             inplace=True)

In [10]:
def get_emissions():
    emissions = []
    for item in emissions_df['Enviro Key']:
        convert_rate = round((em_key_df.loc[em_key_df['Food product'] == item])['Total Emissions per Tonne'].values[0])
        emissions.append(convert_rate)
    return emissions
        

In [11]:
emissions_df = food_df.copy()
emissions_df['Total Emissions per Tonne']=get_emissions()
emissions_df.reset_index(inplace=True)
emissions_df.drop(columns='index', inplace=True)
emissions_df.head()

Unnamed: 0,Area Abbreviation,Area,Item,Element,Y1961,Y1962,Y1963,Y1964,Y1965,Y1966,...,Y2006,Y2007,Y2008,Y2009,Y2010,Y2011,Y2012,Y2013,Enviro Key,Total Emissions per Tonne
0,AFG,Afghanistan,Wheat and products,Food,1928.0,1904.0,1666.0,1950.0,2001.0,1808.0,...,3704.0,4164.0,4252.0,4538.0,4605.0,4711.0,4810,4895,Wheat & Rye (Bread),1400.0
1,AFG,Afghanistan,Rice (Milled Equivalent),Food,183.0,183.0,182.0,220.0,220.0,195.0,...,546.0,455.0,490.0,415.0,442.0,476.0,425,422,Rice,4000.0
2,AFG,Afghanistan,Barley and products,Feed,76.0,76.0,76.0,76.0,76.0,75.0,...,262.0,263.0,230.0,379.0,315.0,203.0,367,360,Barley (Beer),1100.0
3,AFG,Afghanistan,Barley and products,Food,237.0,237.0,237.0,238.0,238.0,237.0,...,44.0,48.0,62.0,55.0,60.0,72.0,78,89,Barley (Beer),1100.0
4,AFG,Afghanistan,Maize and products,Feed,210.0,210.0,214.0,216.0,216.0,216.0,...,233.0,249.0,247.0,195.0,178.0,191.0,200,200,Maize (Meal),1100.0


In [12]:
# Sanity Check
afghanistan = emissions_df.iloc[0:2]
emissions = afghanistan['Total Emissions per Tonne'].values
print("1961(0):", (afghanistan['Y1961'][0])*emissions[0])
print("1961(1):", (afghanistan['Y1961'][1])*emissions[1])
print()
print("2007(0):", (afghanistan['Y2007'][0])*emissions[0])
print("2007(1):", (afghanistan['Y2007'][1])*emissions[1])

1961(0): 2699200.0
1961(1): 732000.0

2007(0): 5829600.0
2007(1): 1820000.0


In [13]:
def get_calc_emissions():
    new_df_dict = {}
    for year in years:
        this_year=[]
        for index, item in enumerate(emissions_df['Total Emissions per Tonne']):
            #year = 'Y1961'
            this_em = emissions_df[year][index]
            this_year.append(item*this_em)
        new_df_dict[year]=this_year
    new_df = pd.DataFrame.from_dict(new_df_dict)
    return new_df

In [14]:
temp_em_df = get_calc_emissions()
cols = ['Area Abbreviation', 'Area', 'Item', 'Element', 'Enviro Key', 'Total Emissions per Tonne']
new_em_df = pd.DataFrame()
new_em_df[cols] = emissions_df[cols]
new_em_df = new_em_df.join(temp_em_df)

In [15]:
new_em_df

Unnamed: 0,Area Abbreviation,Area,Item,Element,Enviro Key,Total Emissions per Tonne,Y1961,Y1962,Y1963,Y1964,...,Y2004,Y2005,Y2006,Y2007,Y2008,Y2009,Y2010,Y2011,Y2012,Y2013
0,AFG,Afghanistan,Wheat and products,Food,Wheat & Rye (Bread),1400.0,2699200.0,2665600.0,2332400.0,2730000.0,...,4548600.0,4880400.0,5185600.0,5829600.0,5952800.0,6353200.0,6447000.0,6595400.0,6734000.0,6853000.0
1,AFG,Afghanistan,Rice (Milled Equivalent),Food,Rice,4000.0,732000.0,732000.0,728000.0,880000.0,...,1676000.0,1780000.0,2184000.0,1820000.0,1960000.0,1660000.0,1768000.0,1904000.0,1700000.0,1688000.0
2,AFG,Afghanistan,Barley and products,Feed,Barley (Beer),1100.0,83600.0,83600.0,83600.0,83600.0,...,63800.0,259600.0,288200.0,289300.0,253000.0,416900.0,346500.0,223300.0,403700.0,396000.0
3,AFG,Afghanistan,Barley and products,Food,Barley (Beer),1100.0,260700.0,260700.0,260700.0,261800.0,...,203500.0,47300.0,48400.0,52800.0,68200.0,60500.0,66000.0,79200.0,85800.0,97900.0
4,AFG,Afghanistan,Maize and products,Feed,Maize (Meal),1100.0,231000.0,231000.0,235400.0,237600.0,...,132000.0,228800.0,256300.0,273900.0,271700.0,214500.0,195800.0,210100.0,220000.0,220000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13455,ZWE,Zimbabwe,Fruits - Excluding Wine,Food,Other Fruit,700.0,37100.0,39900.0,42000.0,42700.0,...,133700.0,93800.0,116900.0,123900.0,129500.0,128800.0,147700.0,161000.0,172200.0,151900.0
13456,ZWE,Zimbabwe,Alcoholic Beverages,Food,Wine,1400.0,294000.0,362600.0,334600.0,338800.0,...,411600.0,406000.0,442400.0,497000.0,557200.0,611800.0,627200.0,666400.0,735000.0,722400.0
13457,ZWE,Zimbabwe,Eggs,Food,Eggs,4500.0,27000.0,27000.0,27000.0,27000.0,...,67500.0,81000.0,81000.0,94500.0,99000.0,121500.0,121500.0,108000.0,108000.0,112500.0
13458,ZWE,Zimbabwe,Milk - Excluding Butter,Feed,Milk,2800.0,33600.0,33600.0,33600.0,30800.0,...,58800.0,58800.0,58800.0,58800.0,58800.0,64400.0,70000.0,70000.0,84000.0,86800.0


In [16]:
# Sanity Check
this_em = emissions_df['Total Emissions per Tonne'][13458]
this_year = emissions_df['Y2009'][13458]
this_em*this_year

64400.0

In [17]:
new_em_df.to_csv('Data/emissions_df.csv', index=False)