In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import requests
import json

from sqlalchemy import create_engine

In [2]:
# Read in fert data
csv_path = "Full_Fertilizer_Data.csv"
fertilizer_df = pd.read_csv(csv_path)

# Read in GDP data
csv_file = "gdp_per_capita.csv"
gdp_df = pd.read_csv(csv_file);

# Output File (CSV)
output_data_file = "../Project-2/clean_merged_data.csv"

In [3]:
# Filter required columns
fertilizer_df = fertilizer_df[["Area","Item","Year","Value"]]

# Rename nutrients in Item column
fertilizer_df['Item'] = fertilizer_df['Item'].replace(['Nutrient nitrogen N (total)'],'Nitrogen')
fertilizer_df['Item'] = fertilizer_df['Item'].replace(['Nutrient potash K2O (total)'],'Potash')
fertilizer_df['Item'] = fertilizer_df['Item'].replace(['Nutrient phosphate P2O5 (total)'],'Phosphate')

# Rename column headers
fertilizer_df = fertilizer_df.rename(columns={"Area": "Country/Area", "Item": "Type_of_Fertilizer" , "Value": "Value(kg/ha)"})

fertilizer_df

Unnamed: 0,Country/Area,Type_of_Fertilizer,Year,Value(kg/ha)
0,Afghanistan,Nitrogen,1961,0.13
1,Afghanistan,Nitrogen,1962,0.13
2,Afghanistan,Nitrogen,1963,0.13
3,Afghanistan,Nitrogen,1964,0.13
4,Afghanistan,Nitrogen,1965,0.13
...,...,...,...,...
30558,OECD,Potash,2015,22.39
30559,OECD,Potash,2016,22.72
30560,OECD,Potash,2017,23.55
30561,OECD,Potash,2018,23.35


In [4]:
# Filter required columns
gdp_df = gdp_df[['Entity','Code','Year','GDP per capita, PPP (constant 2017 international $)']]

# Rename column headers
gdp_df = gdp_df.rename(columns={"Entity": "Country/Area",
                                "GDP per capita, PPP (constant 2017 international $)": "GDP_per_capita"})

gdp_df

Unnamed: 0,Country/Area,Code,Year,GDP_per_capita
0,Abkhazia,OWID_ABK,2015,
1,Afghanistan,AFG,2002,1189.784668
2,Afghanistan,AFG,2003,1235.810059
3,Afghanistan,AFG,2004,1200.277954
4,Afghanistan,AFG,2005,1286.793701
...,...,...,...,...
6912,Zimbabwe,ZWE,2001,4400.328125
6913,Zimbabwe,ZWE,2018,3923.030518
6914,Zimbabwe,ZWE,2019,3630.033936
6915,Zimbabwe,ZWE,2020,3353.411377


In [5]:
fert_gdp_merge_df = fertilizer_df.merge(gdp_df, on=["Country/Area","Year"])

fert_gdp_merge_df


Unnamed: 0,Country/Area,Type_of_Fertilizer,Year,Value(kg/ha),Code,GDP_per_capita
0,Afghanistan,Nitrogen,2002,3.16,AFG,1189.784668
1,Afghanistan,Phosphate,2002,0.00,AFG,1189.784668
2,Afghanistan,Potash,2002,0.00,AFG,1189.784668
3,Afghanistan,Nitrogen,2003,2.58,AFG,1235.810059
4,Afghanistan,Phosphate,2003,0.84,AFG,1235.810059
...,...,...,...,...,...,...
13344,OECD,Phosphate,2016,25.89,,
13345,OECD,Potash,2016,22.72,,
13346,OECD,Nitrogen,2017,77.11,,
13347,OECD,Phosphate,2017,26.95,,


In [6]:
nitrogen_df = fert_gdp_merge_df.loc[(fert_gdp_merge_df['Type_of_Fertilizer'] == 'Nitrogen')]
nitrogen_df

Unnamed: 0,Country/Area,Type_of_Fertilizer,Year,Value(kg/ha),Code,GDP_per_capita
0,Afghanistan,Nitrogen,2002,3.16,AFG,1189.784668
3,Afghanistan,Nitrogen,2003,2.58,AFG,1235.810059
6,Afghanistan,Nitrogen,2004,2.82,AFG,1200.277954
9,Afghanistan,Nitrogen,2005,2.59,AFG,1286.793701
12,Afghanistan,Nitrogen,2006,2.59,AFG,1315.789063
...,...,...,...,...,...,...
13334,OECD,Nitrogen,2013,76.85,,
13337,OECD,Nitrogen,2014,75.23,,
13340,OECD,Nitrogen,2015,75.45,,
13343,OECD,Nitrogen,2016,77.65,,


In [7]:
potash_df = fert_gdp_merge_df.loc[(fert_gdp_merge_df['Type_of_Fertilizer'] == 'Potash')]
potash_df

Unnamed: 0,Country/Area,Type_of_Fertilizer,Year,Value(kg/ha),Code,GDP_per_capita
2,Afghanistan,Potash,2002,0.00,AFG,1189.784668
5,Afghanistan,Potash,2003,0.00,AFG,1235.810059
8,Afghanistan,Potash,2004,0.00,AFG,1200.277954
11,Afghanistan,Potash,2005,0.01,AFG,1286.793701
14,Afghanistan,Potash,2006,0.00,AFG,1315.789063
...,...,...,...,...,...,...
13336,OECD,Potash,2013,23.09,,
13339,OECD,Potash,2014,22.10,,
13342,OECD,Potash,2015,22.39,,
13345,OECD,Potash,2016,22.72,,


In [8]:
phosphate_df = fert_gdp_merge_df.loc[(fert_gdp_merge_df['Type_of_Fertilizer'] == 'Phosphate')]
phosphate_df

Unnamed: 0,Country/Area,Type_of_Fertilizer,Year,Value(kg/ha),Code,GDP_per_capita
1,Afghanistan,Phosphate,2002,0.00,AFG,1189.784668
4,Afghanistan,Phosphate,2003,0.84,AFG,1235.810059
7,Afghanistan,Phosphate,2004,1.36,AFG,1200.277954
10,Afghanistan,Phosphate,2005,1.16,AFG,1286.793701
13,Afghanistan,Phosphate,2006,0.56,AFG,1315.789063
...,...,...,...,...,...,...
13335,OECD,Phosphate,2013,25.33,,
13338,OECD,Phosphate,2014,24.76,,
13341,OECD,Phosphate,2015,25.43,,
13344,OECD,Phosphate,2016,25.89,,


In [24]:
fert_merge_df = nitrogen_df.merge(potash_df, on=["Country/Area","Year"])

fert_merge_df = fert_merge_df.merge(phosphate_df, on=["Country/Area","Year"])

# Remove unrequired columns
fert_merge_df = fert_merge_df[['Country/Area',
                            'Code',
                            'Year',
                            'Value(kg/ha)_x',
                            'Value(kg/ha)_y',
                            'Value(kg/ha)',
                            'GDP_per_capita']]

# Rename column headers
fert_merge_df = fert_merge_df.rename(columns={'Value(kg/ha)_x': "Nitrogen(kg/ha)",
                                'Value(kg/ha)_y': "Potash(kg/ha)",
                                'Value(kg/ha)': "Phosphate(kg/ha)"})

# Remove rows with no values
fert_merge_df = fert_merge_df.dropna()

# Remove World
fert_merge_df = fert_merge_df.loc[fert_merge_df["Country/Area"] != "World"]

fert_merge_df

Unnamed: 0,Country/Area,Code,Year,Nitrogen(kg/ha),Potash(kg/ha),Phosphate(kg/ha),GDP_per_capita
0,Afghanistan,AFG,2002,3.16,0.00,0.00,1189.784668
1,Afghanistan,AFG,2003,2.58,0.00,0.84,1235.810059
2,Afghanistan,AFG,2004,2.82,0.00,1.36,1200.277954
3,Afghanistan,AFG,2005,2.59,0.01,1.16,1286.793701
4,Afghanistan,AFG,2006,2.59,0.00,0.56,1315.789063
...,...,...,...,...,...,...,...
3886,Zimbabwe,ZWE,2015,10.39,3.17,5.61,3707.622559
3887,Zimbabwe,ZWE,2016,14.17,8.10,9.51,3678.217041
3888,Zimbabwe,ZWE,2017,15.85,9.76,10.10,3795.642334
3889,Zimbabwe,ZWE,2018,12.22,9.00,11.17,3923.030518


In [25]:
# Output review in csv
fert_merge_df.to_csv(output_data_file)

In [60]:
rds_connection_string = "postgres:postgres@localhost:5432/Fertilizer_DB"
engine = create_engine(f'postgresql://{rds_connection_string}')

In [61]:
engine.table_names()

  engine.table_names()


['Fertilizer_DB']

In [62]:
fertilizer_df.to_sql(name='Fertilizer_DB', con=engine, if_exists='append', index=False)

In [63]:
pd.read_sql_query('select * from "Fertilizer_DB"', con=engine).head()

Unnamed: 0,Country/Area,Type_of_Fertilizer,Year,Value(kg/ha)
0,Afghanistan,Nutrient nitrogen N (total),1961,0.13
1,Afghanistan,Nutrient nitrogen N (total),1962,0.13
2,Afghanistan,Nutrient nitrogen N (total),1963,0.13
3,Afghanistan,Nutrient nitrogen N (total),1964,0.13
4,Afghanistan,Nutrient nitrogen N (total),1965,0.13
