# 2D DDW

## Data Extraction and Cleaning

### Extract food supply by year (y-value)

Name of file: "DDW_Food supply.csv", "DDW_Min Cal Intake.csv"

This code extracts data by year and item == grand total and converts it to a dataframe/csv file.


Dataframe is named as df{year}_food_supply

Ratio = Food_supply/Minimum_energy_requirement

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [2]:
df_food_supply = pd.read_csv("DDW_Food Supply.csv")
df_grand_total = df_food_supply[(df_food_supply["Item"] == "Grand Total")]

df_min_cal_intake = pd.read_csv("DDW_Min Cal Intake.csv")
df_min_cal_intake = df_min_cal_intake[df_min_cal_intake["Value"].notna()]

In [3]:
# create list of common countries for df_min_cal_intake and df_grand_total datasets
df_food_supply_countries = df_grand_total.Area.unique()
df_min_cal_intake_countries = df_min_cal_intake.Area.unique()
y_countries_list = []
for i in df_food_supply_countries:
    if i in df_min_cal_intake_countries:
        y_countries_list.append(i)

In [4]:
# extract food supply by year
# Year 2019
df2019_food_supply = df_grand_total[(df_grand_total["Year"] == 2019)]
df2019_food_supply = df2019_food_supply[["Area", "Value"]]
df2019_food_supply = df2019_food_supply.rename(columns = {"Area":"Country"})

# Year 2018
df2018_food_supply = df_grand_total[(df_grand_total["Year"] == 2018)]
df2018_food_supply = df2018_food_supply[["Area", "Value"]]

# Year 2017
df2017_food_supply = df_grand_total[(df_grand_total["Year"] == 2017)]
df2017_food_supply = df2017_food_supply[["Area", "Value"]]

# Year 2016
df2016_food_supply = df_grand_total[(df_grand_total["Year"] == 2016)]
df2016_food_supply = df2016_food_supply[["Area", "Value"]]

# Year 2015
df2015_food_supply = df_grand_total[(df_grand_total["Year"] == 2015)]
df2015_food_supply = df2015_food_supply[["Area", "Value"]]

# Year 2014
df2014_food_supply = df_grand_total[(df_grand_total["Year"] == 2014)]
df2014_food_supply = df2014_food_supply[["Area", "Value"]]

# Year 2013
df2013_food_supply = df_grand_total[(df_grand_total["Year"] == 2013)]
df2013_food_supply = df2013_food_supply[["Area", "Value"]]

# Year 2012
df2012_food_supply = df_grand_total[(df_grand_total["Year"] == 2012)]
df2012_food_supply = df2012_food_supply[["Area", "Value"]]

# Year 2011
df2011_food_supply = df_grand_total[(df_grand_total["Year"] == 2011)]
df2011_food_supply = df2011_food_supply[["Area", "Value"]]

# Year 2010
df2010_food_supply = df_grand_total[(df_grand_total["Year"] == 2010)]
df2010_food_supply = df2010_food_supply[["Area", "Value"]]

In [5]:
# extract minimum calorie intake by year

# Year 2018
df2018_min_cal = df_min_cal_intake[(df_min_cal_intake["Year"] == 2018)]
df2018_min_cal = df2018_min_cal[["Area", "Value", "Year"]]
df2018_min_cal

Unnamed: 0,Area,Value,Year
8,Afghanistan,1681.0,2018
20,Albania,1910.0,2018
32,Algeria,1778.0,2018
68,Angola,1662.0,2018
80,Antigua and Barbuda,1888.0,2018
...,...,...,...
2384,Vanuatu,1696.0,2018
2396,Venezuela (Bolivarian Republic of),1819.0,2018
2408,Viet Nam,1784.0,2018
2420,Yemen,1707.0,2018


In [6]:
# filter df2018_food_supply and 
df2018_food_supply = df2018_food_supply[df2018_food_supply["Area"].isin(y_countries_list)]
df2018_min_cal = df2018_min_cal[df2018_min_cal["Area"].isin(y_countries_list)]


In [17]:
df_2018_y_val = df2018_food_supply.copy()
toadd = list(df2018_min_cal["Value"])
df_2018_y_val["Minimum_energy_requirement(kcal/capita/day)"] = toadd
df_2018_y_val = df_2018_y_val.rename(columns = {"Value":"Food_supply(kcal/capita/day)"})
df_2018_y_val = df_2018_y_val.reset_index(drop=True)
df_2018_y_val["Food_supply/Minimum_energy_requirement"] = df_2018_y_val["Food_supply(kcal/capita/day)"]/df_2018_y_val["Minimum_energy_requirement(kcal/capita/day)"]
df_2018_y_val

Unnamed: 0,Area,Food_supply(kcal/capita/day),Minimum_energy_requirement(kcal/capita/day),Food_supply/Minimum_energy_requirement
0,Afghanistan,2270,1681.0,1.350387
1,Albania,3296,1910.0,1.725654
2,Algeria,3382,1778.0,1.902137
3,Angola,2451,1662.0,1.474729
4,Antigua and Barbuda,2470,1888.0,1.308263
...,...,...,...,...
169,Vanuatu,2584,1696.0,1.523585
170,Venezuela (Bolivarian Republic of),2225,1819.0,1.223200
171,Viet Nam,2916,1784.0,1.634529
172,Yemen,2009,1707.0,1.176919


### Extract GDP per capita

In [5]:
dfGDP = pd.read_csv("DDW_GDP per capita.csv")
dfGDP = dfGDP.drop(dfGDP.index[(dfGDP["2012"] == "..")])

In [6]:
dfGDP_countries = dfGDP.Country.unique()

In [7]:
# Year 2018
df2018_GDP = dfGDP[["Country", "2018"]]
df2018_GDP = df2018_GDP.rename(columns = {"2018":"GDP"})
df2018_GDP

Unnamed: 0,Country,GDP
0,Afghanistan,1735
1,Albania,12306
2,Algeria,13886
3,Angola,5725
4,Antigua and Barbuda,23768
...,...,...
204,Sub-Saharan Africa,3552
205,Least Developed Countries,2688
206,Small Island Developing States,18086
207,Organization for Economic Co-operation and Dev...,40492


In [8]:
# Year 2017
df2017_GDP = dfGDP[["Country", "2017"]]
df2017_GDP = df2017_GDP.rename(columns = {"2017":"GDP"})

In [9]:
# Year 2016
df2016_GDP = dfGDP[["Country", "2016"]]
df2016_GDP = df2016_GDP.rename(columns = {"2016":"GDP"})

In [10]:
# Year 2015
df2015_GDP = dfGDP[["Country", "2015"]]
df2015_GDP = df2015_GDP.rename(columns = {"2015":"GDP"})

In [11]:
# Year 2014
df2014_GDP = dfGDP[["Country", "2014"]]
df2014_GDP = df2014_GDP.rename(columns = {"2014":"GDP"})

In [12]:
# Year 2013
df2013_GDP = dfGDP[["Country", "2013"]]
df2013_GDP = df2013_GDP.rename(columns = {"2013":"GDP"})

In [13]:
# Year 2012
df2012_GDP = dfGDP[["Country", "2012"]]
df2012_GDP = df2012_GDP.rename(columns = {"2012":"GDP"})

In [14]:
# Year 2011
df2011_GDP = dfGDP[["Country", "2011"]]
df2011_GDP = df2011_GDP.rename(columns = {"2011":"GDP"})

In [15]:
# Year 2010
df2010_GDP = dfGDP[["Country", "2010"]]
df2010_GDP = df2010_GDP.rename(columns = {"2010":"GDP"})

### Extract percentage of land for agriculture

In [16]:
df_agriculture = pd.read_csv("DDW_Agricultural Land.csv")
# remove rows with no value (NaN)
df_agriculture = df_agriculture.dropna()
df_agriculture_countries = df_agriculture.Country.unique()

In [17]:
# Year 2010 - 2020
df2020_agri = df_agriculture[["Country", "2020"]]
df2019_agri = df_agriculture[["Country", "2019"]]
df2018_agri = df_agriculture[["Country", "2018"]]
df2017_agri = df_agriculture[["Country", "2017"]]
df2016_agri = df_agriculture[["Country", "2016"]]
df2015_agri = df_agriculture[["Country", "2015"]]
df2014_agri = df_agriculture[["Country", "2014"]]
df2013_agri = df_agriculture[["Country", "2013"]]
df2012_agri = df_agriculture[["Country", "2012"]]
df2011_agri = df_agriculture[["Country", "2011"]]
df2010_agri = df_agriculture[["Country", "2010"]]

In [18]:
# Year 2010
#df2010_agri = df_agriculture[["Country Name", "2010"]]
#df2010_agri = df2010_agri.rename(columns = {"Country Name": "Country", "2010":"Agriculture"})

### Extract Employement ... (value in 1000s)

In [19]:
df_employment = pd.read_csv("DDW_Employment In Agriculture.csv")
df_employment_countries = df_employment.Area.unique()
df_employment

Unnamed: 0,Domain Code,Domain,Area Code (M49),Area,Indicator Code,Indicator,Sex Code,Sex,Year Code,Year,Source Code,Source,Unit,Value,Flag,Flag Description,Note
0,OEA,Employment Indicators: Agriculture,4,Afghanistan,21066,"Employment in agriculture, forestry and fishin...",13,Total,2012,2012,3021,Household income and expenditure survey,1000 persons,2477.079,X,Figure from international organizations,Repository: ILO-STATISTICS - Micro data proces...
1,OEA,Employment Indicators: Agriculture,4,Afghanistan,21066,"Employment in agriculture, forestry and fishin...",13,Total,2014,2014,3021,Household income and expenditure survey,1000 persons,2824.353,X,Figure from international organizations,Repository: ILO-STATISTICS - Micro data proces...
2,OEA,Employment Indicators: Agriculture,4,Afghanistan,21066,"Employment in agriculture, forestry and fishin...",13,Total,2017,2017,3021,Household income and expenditure survey,1000 persons,2740.235,X,Figure from international organizations,Repository: ILO-STATISTICS - Micro data proces...
3,OEA,Employment Indicators: Agriculture,4,Afghanistan,21066,"Employment in agriculture, forestry and fishin...",13,Total,2020,2020,3023,Labour force survey,1000 persons,2714.301,X,Figure from international organizations,Repository: ILO-STATISTICS - Micro data proces...
4,OEA,Employment Indicators: Agriculture,8,Albania,21066,"Employment in agriculture, forestry and fishin...",13,Total,2009,2009,3023,Labour force survey,1000 persons,511.223,X,Figure from international organizations,Repository: ILO-STATISTICS - Micro data proces...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1299,OEA,Employment Indicators: Agriculture,894,Zambia,21066,"Employment in agriculture, forestry and fishin...",13,Total,2018,2018,3023,Labour force survey,1000 persons,673.139,X,Figure from international organizations,Break in series Break in series: Methodology r...
1300,OEA,Employment Indicators: Agriculture,894,Zambia,21066,"Employment in agriculture, forestry and fishin...",13,Total,2019,2019,3023,Labour force survey,1000 persons,667.582,X,Figure from international organizations,Repository: ILO-STATISTICS - Micro data proces...
1301,OEA,Employment Indicators: Agriculture,716,Zimbabwe,21066,"Employment in agriculture, forestry and fishin...",13,Total,2011,2011,3023,Labour force survey,1000 persons,3577.023,X,Figure from international organizations,Break in series Break in series: Methodology r...
1302,OEA,Employment Indicators: Agriculture,716,Zimbabwe,21066,"Employment in agriculture, forestry and fishin...",13,Total,2014,2014,3023,Labour force survey,1000 persons,4212.176,X,Figure from international organizations,Repository: ILO-STATISTICS - Micro data proces...


In [20]:
df2019_food_supply = df_employment[(df_employment["Year"] == 2019)]
df2019_food_supply = df2019_food_supply[["Area", "Value"]]

### Extract CO2

In [21]:
df_co2 = pd.read_csv("DDW_CO2.csv")
df_co2

Unnamed: 0,country,year,iso_code,population,gdp,cement_co2,cement_co2_per_capita,co2,co2_growth_abs,co2_growth_prct,...,share_global_cumulative_oil_co2,share_global_cumulative_other_co2,share_global_flaring_co2,share_global_gas_co2,share_global_oil_co2,share_global_other_co2,total_ghg,total_ghg_excluding_lucf,trade_co2,trade_co2_share
0,Afghanistan,1949,AFG,7624058.0,,,,0.015,,,...,,,,,,,,,,
1,Afghanistan,1950,AFG,7752117.0,9.421400e+09,,,0.084,0.070,475.00,...,0.00,,,,0.00,,,,,
2,Afghanistan,1951,AFG,7840151.0,9.692280e+09,,,0.092,0.007,8.70,...,0.00,,,,0.00,,,,,
3,Afghanistan,1952,AFG,7935996.0,1.001733e+10,,,0.092,0.000,0.00,...,0.00,,,,0.00,,,,,
4,Afghanistan,1953,AFG,8039684.0,1.063052e+10,,,0.106,0.015,16.00,...,0.00,,,,0.00,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26003,Zimbabwe,2016,ZWE,14030338.0,2.096179e+10,0.639,0.046,10.738,-1.488,-12.17,...,0.02,,,,0.03,,115.92,28.53,1.415,13.18
26004,Zimbabwe,2017,ZWE,14236599.0,2.194784e+10,0.678,0.048,9.582,-1.156,-10.77,...,0.02,,,,0.03,,115.59,28.30,1.666,17.39
26005,Zimbabwe,2018,ZWE,14438812.0,2.271535e+10,0.697,0.048,11.854,2.273,23.72,...,0.02,,,,0.03,,118.22,30.83,1.308,11.04
26006,Zimbabwe,2019,ZWE,14645473.0,,0.697,0.048,10.949,-0.905,-7.64,...,0.02,,,,0.03,,117.96,30.53,1.473,13.45


### Create list of common countries

In [22]:
countries = []
for i in df_food_supply_countries:
    if i in dfGDP_countries and i in df_agriculture_countries and i in df_employment_countries:
        countries.append(i)
print(countries)
print(len(countries))

['Afghanistan', 'Albania', 'Algeria', 'Angola', 'Argentina', 'Armenia', 'Australia', 'Austria', 'Azerbaijan', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'Bulgaria', 'Burkina Faso', 'Burundi', 'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada', 'Chad', 'Chile', 'Colombia', 'Comoros', 'Costa Rica', 'Croatia', 'Cyprus', 'Czechia', 'Denmark', 'Dominican Republic', 'Ecuador', 'El Salvador', 'Estonia', 'Ethiopia', 'Fiji', 'Finland', 'France', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Guatemala', 'Guyana', 'Haiti', 'Honduras', 'Hungary', 'Iceland', 'India', 'Indonesia', 'Ireland', 'Israel', 'Italy', 'Jamaica', 'Japan', 'Jordan', 'Kazakhstan', 'Kenya', 'Kiribati', 'Kuwait', 'Latvia', 'Lebanon', 'Lesotho', 'Liberia', 'Lithuania', 'Madagascar', 'Malawi', 'Malaysia', 'Maldives', 'Mali', 'Malta', 'Mauritania', 'Mauritius', 'Mexico', 'Mongolia', 'Montenegro', 'Morocco', 'Mozambique', 'Myanmar', 'Namibia', 'Nepal', 'Netherlands', 'N