# 2D DDW

## Data Extraction and Cleaning

### Extract food supply by year (y-value)

Name of file: "DDW_Food supply.csv", "DDW_Min Cal Intake.csv"

This code extracts data by year and item == grand total and converts it to a dataframe/csv file.


Dataframe is named as df{year}_food_supply

Ratio = Food_supply/Minimum_energy_requirement

In [950]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [951]:
df_food_supply = pd.read_csv("DDW_Food Supply.csv")
df_grand_total = df_food_supply[(df_food_supply["Item"] == "Grand Total")]

df_min_cal_intake = pd.read_csv("DDW_Min Cal Intake.csv")
df_min_cal_intake = df_min_cal_intake[df_min_cal_intake["Value"].notna()]

In [952]:
# create list of common countries for df_min_cal_intake and df_grand_total datasets
df_food_supply_countries = df_grand_total.Area.unique()
df_min_cal_intake_countries = df_min_cal_intake.Area.unique()
y_countries_list = []
for i in df_food_supply_countries:
    if i in df_min_cal_intake_countries:
        y_countries_list.append(i)

In [953]:
# extract food supply by year
# Year 2019
df2019_food_supply = df_grand_total[(df_grand_total["Year"] == 2019)]
df2019_food_supply = df2019_food_supply[["Area", "Value"]]
df2019_food_supply = df2019_food_supply.rename(columns = {"Area":"Country"})

# Year 2018
df2018_food_supply = df_grand_total[(df_grand_total["Year"] == 2018)]
df2018_food_supply = df2018_food_supply[["Area", "Value"]]

# Year 2017
df2017_food_supply = df_grand_total[(df_grand_total["Year"] == 2017)]
df2017_food_supply = df2017_food_supply[["Area", "Value"]]

# Year 2016
df2016_food_supply = df_grand_total[(df_grand_total["Year"] == 2016)]
df2016_food_supply = df2016_food_supply[["Area", "Value"]]

# Year 2015
df2015_food_supply = df_grand_total[(df_grand_total["Year"] == 2015)]
df2015_food_supply = df2015_food_supply[["Area", "Value"]]

# Year 2014
df2014_food_supply = df_grand_total[(df_grand_total["Year"] == 2014)]
df2014_food_supply = df2014_food_supply[["Area", "Value"]]

# Year 2013
df2013_food_supply = df_grand_total[(df_grand_total["Year"] == 2013)]
df2013_food_supply = df2013_food_supply[["Area", "Value"]]

# Year 2012
df2012_food_supply = df_grand_total[(df_grand_total["Year"] == 2012)]
df2012_food_supply = df2012_food_supply[["Area", "Value"]]

# Year 2011
df2011_food_supply = df_grand_total[(df_grand_total["Year"] == 2011)]
df2011_food_supply = df2011_food_supply[["Area", "Value"]]

# Year 2010
df2010_food_supply = df_grand_total[(df_grand_total["Year"] == 2010)]
df2010_food_supply = df2010_food_supply[["Area", "Value"]]

In [954]:
# extract minimum calorie intake by year

# Year 2018
df2018_min_cal = df_min_cal_intake[(df_min_cal_intake["Year"] == 2018)]
df2018_min_cal = df2018_min_cal[["Area", "Value", "Year"]]
df2018_min_cal

Unnamed: 0,Area,Value,Year
8,Afghanistan,1681.0,2018
20,Albania,1910.0,2018
32,Algeria,1778.0,2018
68,Angola,1662.0,2018
80,Antigua and Barbuda,1888.0,2018
...,...,...,...
2384,Vanuatu,1696.0,2018
2396,Venezuela (Bolivarian Republic of),1819.0,2018
2408,Viet Nam,1784.0,2018
2420,Yemen,1707.0,2018


In [955]:
# filter df2018_food_supply and 
df2018_food_supply = df2018_food_supply[df2018_food_supply["Area"].isin(y_countries_list)]
df2018_min_cal = df2018_min_cal[df2018_min_cal["Area"].isin(y_countries_list)]


In [956]:
df_2018_y_val = df2018_food_supply.copy()
toadd = list(df2018_min_cal["Value"])
df_2018_y_val["Minimum_energy_requirement(kcal/capita/day)"] = toadd
df_2018_y_val = df_2018_y_val.rename(columns = {"Value":"Food_supply(kcal/capita/day)"})
df_2018_y_val = df_2018_y_val.reset_index(drop=True)
df_2018_y_val["Food_supply/Minimum_energy_requirement"] = df_2018_y_val["Food_supply(kcal/capita/day)"]/df_2018_y_val["Minimum_energy_requirement(kcal/capita/day)"]
df_2018_y_val

Unnamed: 0,Area,Food_supply(kcal/capita/day),Minimum_energy_requirement(kcal/capita/day),Food_supply/Minimum_energy_requirement
0,Afghanistan,2270,1681.0,1.350387
1,Albania,3296,1910.0,1.725654
2,Algeria,3382,1778.0,1.902137
3,Angola,2451,1662.0,1.474729
4,Antigua and Barbuda,2470,1888.0,1.308263
...,...,...,...,...
169,Vanuatu,2584,1696.0,1.523585
170,Venezuela (Bolivarian Republic of),2225,1819.0,1.223200
171,Viet Nam,2916,1784.0,1.634529
172,Yemen,2009,1707.0,1.176919


### Extract GDP per capita

In [957]:
dfGDP = pd.read_csv("DDW_GDP per capita adjusted.csv")
dfGDP = dfGDP[(dfGDP["Value"].notna())]

In [958]:
dfGDP_2018 = dfGDP[(dfGDP["Year"])==2018]
dfGDP_2018 = dfGDP_2018[["Year","Area","Value"]]
dfGDP_2018

Unnamed: 0,Year,Area,Value
6,2018,Afghanistan,2033.8
15,2018,Albania,13317.1
25,2018,Algeria,11630.7
35,2018,Angola,6982.1
45,2018,Antigua and Barbuda,21017.3
...,...,...,...
1812,2018,Uzbekistan,7082.9
1822,2018,Vanuatu,3093.1
1832,2018,Viet Nam,9548.7
1842,2018,Zambia,3521.5


#### List of unique countries for GDP per capita data

In [959]:
dfGDP_countries = dfGDP.Area.unique()

### Extracting Population

#### Population for only 2018

In [979]:
df_pop = pd.read_csv("DDW_Population.csv")
# df_pop = df_pop.dropna()
df_pop_2018 = df_pop[(df_pop["year"]==2018)]
df_pop_2018 = df_pop_2018[df_pop_2018["population"].notna()]
df_pop_2018

Unnamed: 0,country,year,population
69,Afghanistan,2018,3.717192e+07
206,Africa,2018,1.275921e+09
294,Albania,2018,2.882735e+06
399,Algeria,2018,4.222842e+07
430,Andorra,2018,7.700800e+04
...,...,...,...
25474,Wallis and Futuna,2018,1.165300e+04
25745,World,2018,7.631081e+09
25816,Yemen,2018,2.849868e+07
25887,Zambia,2018,1.735171e+07


In [980]:
df_pop_coutries = df_pop_2018.country.unique()

### Extract percentage of land for agriculture

In [981]:
df_agriculture = pd.read_csv("DDW_Agricultural Land.csv")
# remove rows with no value (NaN)
df_agriculture_2018 = df_agriculture[["Country Name","2018"]]
df_agriculture_2018 = df_agriculture_2018[df_agriculture_2018["2018"].notna()]
df_agriculture_2018


Unnamed: 0,Country Name,2018
0,Aruba,20.00
1,Africa Eastern and Southern,6556857.12
2,Afghanistan,380100.00
3,Africa Western and Central,3591824.60
4,Angola,569524.90
...,...,...
260,Samoa,551.70
262,"Yemen, Rep.",234520.00
263,South Africa,963410.00
264,Zambia,238360.00


#### Getting unique countries for Agriculture data

In [982]:
df_agriculture_countries = df_agriculture.Country.unique()

AttributeError: 'DataFrame' object has no attribute 'Country'

#### Getting Agriculture land per capita (move down)

In [None]:
df_agriculture = df_agriculture.apply(lambda x:x/(df_pop_2018))

ValueError: If using all scalar values, you must pass an index

In [None]:
# # Year 2010 - 2020
# df2020_agri = df_agriculture[["Country", "2020"]]
# df2019_agri = df_agriculture[["Country", "2019"]]
# df2018_agri = df_agriculture[["Country", "2018"]]
# df2017_agri = df_agriculture[["Country", "2017"]]
# df2016_agri = df_agriculture[["Country", "2016"]]
# df2015_agri = df_agriculture[["Country", "2015"]]
# df2014_agri = df_agriculture[["Country", "2014"]]
# df2013_agri = df_agriculture[["Country", "2013"]]
# df2012_agri = df_agriculture[["Country", "2012"]]
# df2011_agri = df_agriculture[["Country", "2011"]]
# df2010_agri = df_agriculture[["Country", "2010"]]

In [None]:
# Year 2010
#df2010_agri = df_agriculture[["Country Name", "2010"]]
#df2010_agri = df2010_agri.rename(columns = {"Country Name": "Country", "2010":"Agriculture"})

### Extract CO2 (million metric tonnes)

In [None]:
df_co2 = pd.read_csv("DDW_CO2.csv")
df_co2_filter = df_co2[(df_co2["year"])==2018]
df_co2_filter = df_co2_filter[df_co2_filter["co2"].notna()]
df_co2_filter = df_co2_filter[["year","country","co2"]]
df_co2_filter

Unnamed: 0,year,country,co2
69,2018,Afghanistan,8.345
206,2018,Africa,1385.645
294,2018,Albania,4.734
399,2018,Algeria,161.487
430,2018,Andorra,0.495
...,...,...,...
25474,2018,Wallis and Futuna,0.026
25745,2018,World,36646.141
25816,2018,Yemen,9.984
25887,2018,Zambia,7.313


#### Getting unique countries for $CO_{2}$

In [None]:
df_co2_countries = df_co2_filter.country.unique()

### Extract Basic Water Drinking Services

In [None]:
df_water = pd.read_csv("DDW_Basic Water Drinking Services.csv")
df_water = df_water[df_water["Value"].notna()]


In [None]:
df2018_water = df_water[(df_water["Year"] == 2018)]
df2018_water = df2018_water[["Year", "Area", "Value"]]
df2018_water["Value"] = df2018_water["Value"]/100
df2018_water

Unnamed: 0,Year,Area,Value
18,2018,Afghanistan,0.696
39,2018,Albania,0.944
60,2018,Algeria,0.940
81,2018,American Samoa,0.990
102,2018,Andorra,0.990
...,...,...,...
4878,2018,Small Island Developing States,0.831
4899,2018,Low income economies,0.574
4920,2018,Lower-middle-income economies,0.870
4941,2018,High-income economies,0.990


#### Getting unique countries for basic water drinking services

In [None]:
df_water_countries = df2018_water.Area.unique()

### Extract Eating Disorder

In [None]:
df_eating_disorder = pd.read_csv("DDW_Eating Disorder.csv")
df_eating_disorder = df_eating_disorder[df_eating_disorder["Prevalence - Eating disorders - Sex: Both - Age: Age-standardized (Percent)"].notna()]
df_eating_disorder = df_eating_disorder.rename(columns = {"Entity": "Country", "Prevalence - Eating disorders - Sex: Both - Age: Age-standardized (Percent)":"Prevalence"})
df_eating_disorder_2018 = df_eating_disorder[(df_eating_disorder["Year"] == 2018)]
df_eating_disorder_2018 = df_eating_disorder_2018[["Year", "Country", "Prevalence"]]
df_eating_disorder_2018

Unnamed: 0,Year,Country,Prevalence
28,2018,Afghanistan,0.12
58,2018,African Region (WHO),0.11
88,2018,Albania,0.14
118,2018,Algeria,0.22
148,2018,American Samoa,0.13
...,...,...,...
6718,2018,World Bank Lower Middle Income,0.13
6748,2018,World Bank Upper Middle Income,0.17
6778,2018,Yemen,0.13
6808,2018,Zambia,0.12


#### Getting unique countries for eating disorder


In [None]:
df_eating_disorder_countries = df_eating_disorder_2018.Country.unique()

### Extract Population Employed in Agriculture

In [None]:
df_employment = pd.read_csv("DDW_Employment In Agriculture.csv")
df_employment = df_employment[df_employment["Value"].notna()]

#### Getting unique countries for population employed

In [None]:
df_employment_countries = df_employment.Area.unique()

#### Move down Extract population employed in agri per capita

In [None]:
df2018_employement = df_employment[(df_employment["Year"] == 2018)]
df2018_percentage_employed = df2018_population[["year", "country", "population"]]

df2018_percentage_employed


Unnamed: 0,year,country,population
69,2018,Afghanistan,3.717192e+07
206,2018,Africa,1.275921e+09
294,2018,Albania,2.882735e+06
399,2018,Algeria,4.222842e+07
430,2018,Andorra,7.700800e+04
...,...,...,...
25474,2018,Wallis and Futuna,1.165300e+04
25745,2018,World,7.631081e+09
25816,2018,Yemen,2.849868e+07
25887,2018,Zambia,1.735171e+07


### Create list of common countries

In [None]:
countries = []
for i in df_food_supply_countries:
    if i in dfGDP_countries and i in df_agriculture_countries and i in df_employment_countries:
        countries.append(i)
print(countries)
print(len(countries))

['Afghanistan', 'Albania', 'Algeria', 'Angola', 'Argentina', 'Armenia', 'Australia', 'Austria', 'Azerbaijan', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'Bulgaria', 'Burkina Faso', 'Burundi', 'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada', 'Chad', 'Chile', 'Colombia', 'Comoros', 'Costa Rica', 'Croatia', 'Cyprus', 'Czechia', 'Denmark', 'Djibouti', 'Dominican Republic', 'Ecuador', 'El Salvador', 'Estonia', 'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Guatemala', 'Guyana', 'Haiti', 'Honduras', 'Hungary', 'Iceland', 'India', 'Indonesia', 'Ireland', 'Israel', 'Italy', 'Jamaica', 'Japan', 'Jordan', 'Kazakhstan', 'Kenya', 'Kiribati', 'Kuwait', 'Latvia', 'Lebanon', 'Lesotho', 'Liberia', 'Lithuania', 'Madagascar', 'Malawi', 'Malaysia', 'Maldives', 'Mali', 'Malta', 'Mauritania', 'Mauritius', 'Mexico', 'Mongolia', 'Montenegro', 'Morocco', 'Mozambique', 'Myanmar', 'Namibia', 'N