In [194]:
# import dependencies
import pandas as pd
import numpy as np
from sqlalchemy import create_engine

In [231]:
filter_years = [2008, 2013]
data_input_folder = "crop_yield_data/"
data_output_folder = "crop_yield_data_cleaned/"

In [89]:
#load in datasets
# Yields
yield_df = pd.read_csv('yield.csv')

# Average Temperature (C)
temp_df = pd.read_csv(f"{data_input_folder}temp.csv")

# Nutrients in fertilizer used for Agricultural Use
# Item Codes 3102 = Nitrogen, 3103 = Phosphate, 3104 = Potash
nutrients_df = pd.read_csv(f"{data_input_folder}nutrients_au.csv")

# Pesticides
pesticides_df = pd.read_csv(f"{data_input_folder}pesticides.csv")

# Land Use
# Item Codes 6610 = Agricultural Land, 6621 = Arable Land
land_df = pd.read_csv(f"{data_input_folder}land.csv")

In [210]:
# Count the number of values for different years to select a year range for forecasting

yield_counts = pd.DataFrame(yield_df.drop_duplicates(['Year Code', 'Area']) \
                           .dropna()['Year Code'].value_counts().sort_index(ascending=False).head(15))
temp_counts = pd.DataFrame(temp_df.drop_duplicates(['year', 'country']) \
                           .dropna()['year'].value_counts().sort_index(ascending=False).head(15))
nitrogen_counts = pd.DataFrame(nutrients_df.loc[nutrients_df['Item Code'] == 3102].drop_duplicates(['Year Code', 'Area']) \
                               .dropna()['Year Code'].value_counts().sort_index(ascending=False).head(15))
phosphate_counts = pd.DataFrame(nutrients_df.loc[nutrients_df['Item Code'] == 3103].drop_duplicates(['Year Code', 'Area']) \
                               .dropna()['Year Code'].value_counts().sort_index(ascending=False).head(15))
potash_counts = pd.DataFrame(nutrients_df.loc[nutrients_df['Item Code'] == 3104].drop_duplicates(['Year Code', 'Area']) \
                               .dropna()['Year Code'].value_counts().sort_index(ascending=False).head(15))
pesticides_counts = pd.DataFrame(pesticides_df.drop_duplicates(['Year', 'Area']) \
                           .dropna()['Year'].value_counts().sort_index(ascending=False).head(15))
agri_counts = pd.DataFrame(land_df.loc[land_df['Item Code'] == 6610].drop_duplicates(['Year Code', 'Area']) \
                               .dropna()['Year Code'].value_counts().sort_index(ascending=False).head(15))
arable_counts = pd.DataFrame(land_df.loc[land_df['Item Code'] == 6621].drop_duplicates(['Year Code', 'Area']) \
                               .dropna()['Year Code'].value_counts().sort_index(ascending=False).head(15))

In [211]:
merged = yield_counts.merge(temp_counts, "outer", left_index = True, right_index = True)
merged = merged.merge(nitrogen_counts, "outer", left_index = True, right_index = True, suffixes = [None, "_n"])
merged = merged.merge(phosphate_counts, "outer", left_index = True, right_index = True, suffixes = [None, "_ph"])
merged = merged.merge(potash_counts, "outer", left_index = True, right_index = True, suffixes = [None, "_po"])
merged = merged.merge(pesticides_counts, "outer", left_index = True, right_index = True, suffixes = [None, "_pes"])
merged = merged.merge(agri_counts, "outer", left_index = True, right_index = True, suffixes = [None, "_ag"])
merged = merged.merge(arable_counts, "outer", left_index = True, right_index = True, suffixes = [None, "_ar"])

merged.sort_index(ascending=False)

Unnamed: 0,Year Code,year,Year Code_n,Year Code_ph,Year Code_po,Year,Year Code_ag,Year Code_ar
2020,,,168.0,167.0,167.0,,227.0,221.0
2019,,,168.0,168.0,168.0,,227.0,221.0
2018,,,168.0,168.0,168.0,,227.0,221.0
2017,,,168.0,168.0,168.0,,226.0,220.0
2016,204.0,,168.0,168.0,168.0,163.0,227.0,221.0
2015,204.0,,166.0,167.0,166.0,163.0,227.0,221.0
2014,204.0,,168.0,168.0,168.0,163.0,226.0,221.0
2013,204.0,137.0,168.0,168.0,168.0,163.0,226.0,220.0
2012,204.0,137.0,167.0,167.0,167.0,163.0,227.0,221.0
2011,203.0,137.0,165.0,162.0,164.0,163.0,226.0,220.0


In [220]:
# years = [2008, 2013]
def clean_dataset(df, years, dup_col_list, col_name, num_years, keep=None):
    
    year_name = dup_col_list[0]
    country_name = dup_col_list[1]
    
    # Drop duplicates and NaNs, reset the index
    df2 = df.drop_duplicates(dup_col_list).dropna().reset_index(drop=True)
    
    #Filter by year
    df2 = df2.loc[(df2[year_name] >= years[0]) & (df2[year_name] <= years[1])]
    
    df2 = df2[keep]
    
    new_cols = []
    for i in range(num_years):
        new_cols.append(f"{col_name}_{i+1}")
        df2[new_cols[i-1]] = np.nan

    for index, row in df2.iterrows():
        year = row[year_name]
        country = row[country_name]
        year_list = df.loc[df[country_name] == country][year_name].to_list()
        for j in range(len(new_cols)):
            if (year - j - 1) in year_list:
                loc_bool = (df[year_name] == (year - j - 1)) & (df[country_name] == country)
                df2.loc[index, new_cols[j]] = df.loc[loc_bool][col_name].values[0]
    
    return df2

In [233]:
yield_df_clean = clean_dataset(yield_df, [2008, 2013], ['Year', 'Area', 'Item'], 'Value', 5, ['Year', 'Area', 'Item', 'Value'])
yield_df_clean.head(20)

Unnamed: 0,Year,Area,Item,Value,Value_1,Value_2,Value_3,Value_4,Value_5
47,2008,Afghanistan,Maize,26277,26277.0,26204.0,12069.0,16000.0,8400.0
48,2009,Afghanistan,Maize,21429,26277.0,26277.0,26204.0,12069.0,16000.0
49,2010,Afghanistan,Maize,16448,21429.0,26277.0,26277.0,26204.0,12069.0
50,2011,Afghanistan,Maize,16400,16448.0,21429.0,26277.0,26277.0,26204.0
51,2012,Afghanistan,Maize,21986,16400.0,16448.0,21429.0,26277.0,26277.0
52,2013,Afghanistan,Maize,21972,21986.0,16400.0,16448.0,21429.0,26277.0
103,2008,Afghanistan,Potatoes,140000,26277.0,26204.0,12069.0,16000.0,8400.0
104,2009,Afghanistan,Potatoes,140000,26277.0,26277.0,26204.0,12069.0,16000.0
105,2010,Afghanistan,Potatoes,120000,21429.0,26277.0,26277.0,26204.0,12069.0
106,2011,Afghanistan,Potatoes,100000,16448.0,21429.0,26277.0,26277.0,26204.0


In [221]:
temp_df_clean = clean_dataset(temp_df, [2008, 2013], ['year', 'country'], 'avg_temp', 5, ['year', 'country', 'avg_temp'])
temp_df_clean.head(20)

Unnamed: 0,year,country,avg_temp,avg_temp_1,avg_temp_2,avg_temp_3,avg_temp_4,avg_temp_5
145,2008,Côte D'Ivoire,26.94,27.01,26.99,26.98,26.99,27.05
146,2009,Côte D'Ivoire,26.98,26.94,27.01,26.99,26.98,26.99
147,2010,Côte D'Ivoire,27.45,26.98,26.94,27.01,26.99,26.98
148,2011,Côte D'Ivoire,27.02,27.45,26.98,26.94,27.01,26.99
149,2012,Côte D'Ivoire,26.77,27.02,27.45,26.98,26.94,27.01
150,2013,Côte D'Ivoire,27.23,26.77,27.02,27.45,26.98,26.94
304,2008,United Arab Emirates,27.59,28.08,27.94,27.9,28.07,28.12
305,2009,United Arab Emirates,28.04,27.59,28.08,27.94,27.9,28.07
306,2010,United Arab Emirates,28.69,28.04,27.59,28.08,27.94,27.9
307,2011,United Arab Emirates,28.25,28.69,28.04,27.59,28.08,27.94


In [222]:
nitrogen_df_clean = clean_dataset(nutrients_df.loc[nutrients_df['Item Code'] == 3102], \
                                  [2008, 2013], ['Year', 'Area'], 'Value', 3, ['Year', 'Area', 'Value'])
nitrogen_df_clean.head(20)

Unnamed: 0,Year,Area,Value,Value_1,Value_2,Value_3
45,2008,Afghanistan,14432.03,16377.29,20463.22,20466.15
46,2009,Afghanistan,14155.88,14432.03,16377.29,20463.22
47,2010,Afghanistan,12929.01,14155.88,14432.03,16377.29
48,2011,Afghanistan,22970.63,12929.01,14155.88,14432.03
49,2012,Afghanistan,17065.25,22970.63,12929.01,14155.88
50,2013,Afghanistan,34903.79,17065.25,22970.63,12929.01
105,2008,Albania,28744.8,31483.82,30333.88,37210.64
106,2009,Albania,33940.18,28744.8,31483.82,30333.88
107,2010,Albania,31292.44,33940.18,28744.8,31483.82
108,2011,Albania,36892.7,31292.44,33940.18,28744.8


In [223]:
phosphate_df_clean = clean_dataset(nutrients_df.loc[nutrients_df['Item Code'] == 3103], \
                                  [2008, 2013], ['Year', 'Area'], 'Value', 3, ['Year', 'Area', 'Value'])
phosphate_df_clean.head(20)

Unnamed: 0,Year,Area,Value,Value_1,Value_2,Value_3
39,2008,Afghanistan,0.0,0.0,4406.3,9202.64
40,2009,Afghanistan,556.23,0.0,0.0,4406.3
41,2010,Afghanistan,925.85,556.23,0.0,0.0
42,2011,Afghanistan,6845.57,925.85,556.23,0.0
43,2012,Afghanistan,1261.44,6845.57,925.85,556.23
44,2013,Afghanistan,3020.03,1261.44,6845.57,925.85
99,2008,Albania,17990.54,19297.12,19632.71,23135.83
100,2009,Albania,20852.74,17990.54,19297.12,19632.71
101,2010,Albania,21633.44,20852.74,17990.54,19297.12
102,2011,Albania,23341.22,21633.44,20852.74,17990.54


In [224]:
potash_df_clean = clean_dataset(nutrients_df.loc[nutrients_df['Item Code'] == 3104], \
                                  [2008, 2013], ['Year', 'Area'], 'Value', 3, ['Year', 'Area', 'Value'])
potash_df_clean.head(20)

Unnamed: 0,Year,Area,Value,Value_1,Value_2,Value_3
11,2008,Afghanistan,0.0,0.0,0.0,105.6
12,2009,Afghanistan,0.0,0.0,0.0,0.0
13,2010,Afghanistan,0.0,0.0,0.0,0.0
14,2011,Afghanistan,0.0,0.0,0.0,0.0
15,2012,Afghanistan,196.78,0.0,0.0,0.0
16,2013,Afghanistan,88.11,196.78,0.0,0.0
66,2008,Albania,423.6,325.8,142.05,111.45
67,2009,Albania,247.5,423.6,325.8,142.05
68,2010,Albania,299.1,247.5,423.6,325.8
69,2011,Albania,370.2,299.1,247.5,423.6


In [225]:
pesticides_df_clean = clean_dataset(pesticides_df, \
                                  [2008, 2013], ['Year', 'Area'], 'Value', 3, ['Year', 'Area', 'Value'])
pesticides_df_clean.head(20)

Unnamed: 0,Year,Area,Value,Value_1,Value_2,Value_3
18,2008,Albania,1069.54,1006.57,943.61,880.64
19,2009,Albania,1132.5,1069.54,1006.57,943.61
20,2010,Albania,1311.17,1132.5,1069.54,1006.57
21,2011,Albania,1302.63,1311.17,1132.5,1069.54
22,2012,Albania,766.25,1302.63,1311.17,1132.5
23,2013,Albania,982.32,766.25,1302.63,1311.17
45,2008,Algeria,11556.12,5390.79,4110.72,3662.87
46,2009,Algeria,3867.99,11556.12,5390.79,4110.72
47,2010,Algeria,1295.88,3867.99,11556.12,5390.79
48,2011,Algeria,13861.76,1295.88,3867.99,11556.12


In [229]:
agri_df_clean = clean_dataset(land_df.loc[land_df['Item Code'] == 6610], \
                                  [2008, 2013], ['Year', 'Area'], 'Value', 0, ['Year', 'Area', 'Value'])
agri_df_clean.head(20)

Unnamed: 0,Year,Area,Value
47,2008,Afghanistan,37910.0
48,2009,Afghanistan,37910.0
49,2010,Afghanistan,37911.0
50,2011,Afghanistan,37910.0
51,2012,Afghanistan,37910.0
52,2013,Afghanistan,37910.0
107,2008,Albania,1181.0
108,2009,Albania,1201.3
109,2010,Albania,1201.3
110,2011,Albania,1201.0


In [230]:
arable_df_clean = clean_dataset(land_df.loc[land_df['Item Code'] == 6621], \
                                  [2008, 2013], ['Year', 'Area'], 'Value', 0, ['Year', 'Area', 'Value'])
arable_df_clean.head(20)

Unnamed: 0,Year,Area,Value
47,2008,Afghanistan,7794.0
48,2009,Afghanistan,7793.0
49,2010,Afghanistan,7793.0
50,2011,Afghanistan,7791.0
51,2012,Afghanistan,7790.0
52,2013,Afghanistan,7785.0
107,2008,Albania,610.0
108,2009,Albania,609.0
109,2010,Albania,626.0
110,2011,Albania,622.0


In [235]:
#load in datasets
# Yields
yield_df_clean.to_csv(f"{data_output_folder}yield_clean.csv", index=False)

# Average Temperature (C)
temp_df_clean.to_csv(f"{data_output_folder}temp_clean.csv", index=False)

# Nutrients in fertilizer used for Agricultural Use
# Item Codes 3102 = Nitrogen, 3103 = Phosphate, 3104 = Potash
nitrogen_df_clean.to_csv(f"{data_output_folder}nitrogen_clean.csv", index=False)
phosphate_df_clean.to_csv(f"{data_output_folder}phosphate_clean.csv", index=False)
potash_df_clean.to_csv(f"{data_output_folder}potash_clean.csv", index=False)

# Pesticides
pesticides_df.to_csv(f"{data_output_folder}pesticides_clean.csv", index=False)

# Land Use
# Item Codes 6610 = Agricultural Land, 6621 = Arable Land
agri_df_clean.to_csv(f"{data_output_folder}agri_clean.csv", index=False)
arable_df_clean.to_csv(f"{data_output_folder}arable_clean.csv", index=False)

In [10]:
# check to see if the countries in the DF dont include 
#many of the countries in other dfs or if they are just not in alphabetical order
temp_df['country'].unique()

array(["Côte D'Ivoire", 'United Arab Emirates', 'Nigeria', 'Ghana',
       'Turkey', 'Australia', 'India', 'United States', 'Egypt',
       'Algeria', 'Kazakhstan', 'Netherlands', 'China', 'Madagascar',
       'Eritrea', 'Greece', 'Iraq', 'Azerbaijan', 'Mali', 'Indonesia',
       'Thailand', 'Central African Republic', 'Spain', 'Venezuela',
       'Colombia', 'Lebanon', 'United Kingdom', 'Serbia', 'Brazil',
       'Libya', 'Germany', 'Switzerland', 'Guinea Bissau', 'Slovakia',
       'Congo', 'Belgium', 'Romania', 'Hungary', 'Burundi', 'Morocco',
       'Russia', 'Moldova', 'Sri Lanka', 'Guinea', 'Denmark', 'Argentina',
       'Senegal', 'Syria', 'Tanzania', 'Bangladesh', 'Qatar', 'Cameroon',
       'Ireland', 'South Africa', 'Tajikistan', 'Mexico', 'Pakistan',
       'Sierra Leone', 'Botswana', 'Guyana', 'Guatemala', 'Ecuador',
       'Vietnam', 'Zimbabwe', 'Finland', 'Japan', 'Sudan', 'Afghanistan',
       'Uganda', 'Taiwan', 'Nepal', 'Ukraine', 'Rwanda', 'Canada',
       'Jamaica', 

In [None]:
# load in fertilizer data included data for nitrogren and phosphate
phosphate_df

In [None]:
#drop unecessary columns and rename value column to avoid confusion when tables are merged
phosphate_df = phosphate_df.drop(['Domain Code', 'Domain', 'Area Code (FAO)', 'Element Code', 'Flag', \
                                      'Element', 'Item Code', 'Year Code', 'Unit','Flag Description'], axis=1)
phosphate_df = phosphate_df.rename(columns={'Value':'Tons of Nutrient'})
phosphate_df

In [None]:
phosphate_df.isnull().value_counts()

In [None]:
phosphate_df2000 = phosphate_df.loc[phosphate_df['Year'] >1999]
phosphate_df2000

In [82]:
# load in fertilizer data included data for nitrogren and phosphate
nitrogen_df

Unnamed: 0,Domain Code,Domain,Area Code (FAO),Area,Element Code,Element,Item Code,Item,Year Code,Year,Unit,Value,Flag,Flag Description
0,EMN,Livestock Manure,2,Afghanistan,72538,Amount excreted in manure (N content),1755,All Animals,1961,1961,kg,3.926858e+08,A,"Aggregate, may include official, semi-official..."
1,EMN,Livestock Manure,2,Afghanistan,72538,Amount excreted in manure (N content),1755,All Animals,1962,1962,kg,3.871064e+08,A,"Aggregate, may include official, semi-official..."
2,EMN,Livestock Manure,2,Afghanistan,72538,Amount excreted in manure (N content),1755,All Animals,1963,1963,kg,3.976333e+08,A,"Aggregate, may include official, semi-official..."
3,EMN,Livestock Manure,2,Afghanistan,72538,Amount excreted in manure (N content),1755,All Animals,1964,1964,kg,4.034528e+08,A,"Aggregate, may include official, semi-official..."
4,EMN,Livestock Manure,2,Afghanistan,72538,Amount excreted in manure (N content),1755,All Animals,1965,1965,kg,4.135377e+08,A,"Aggregate, may include official, semi-official..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99841,EMN,Livestock Manure,181,Zimbabwe,723812,Manure applied to soils that leaches (N content),1755,All Animals,2015,2015,kg,4.562177e+06,A,"Aggregate, may include official, semi-official..."
99842,EMN,Livestock Manure,181,Zimbabwe,723812,Manure applied to soils that leaches (N content),1755,All Animals,2016,2016,kg,4.967021e+06,A,"Aggregate, may include official, semi-official..."
99843,EMN,Livestock Manure,181,Zimbabwe,723812,Manure applied to soils that leaches (N content),1755,All Animals,2017,2017,kg,4.186705e+06,A,"Aggregate, may include official, semi-official..."
99844,EMN,Livestock Manure,181,Zimbabwe,723812,Manure applied to soils that leaches (N content),1755,All Animals,2018,2018,kg,4.073850e+06,A,"Aggregate, may include official, semi-official..."


In [85]:
nitrogen_drop = nitrogen_df.drop_duplicates(['Year', 'Area']).dropna().reset_index()
nitrogen_drop

Unnamed: 0,index,Domain Code,Domain,Area Code (FAO),Area,Element Code,Element,Item Code,Item,Year Code,Year,Unit,Value,Flag,Flag Description
0,0,EMN,Livestock Manure,2,Afghanistan,72538,Amount excreted in manure (N content),1755,All Animals,1961,1961,kg,3.926858e+08,A,"Aggregate, may include official, semi-official..."
1,1,EMN,Livestock Manure,2,Afghanistan,72538,Amount excreted in manure (N content),1755,All Animals,1962,1962,kg,3.871064e+08,A,"Aggregate, may include official, semi-official..."
2,2,EMN,Livestock Manure,2,Afghanistan,72538,Amount excreted in manure (N content),1755,All Animals,1963,1963,kg,3.976333e+08,A,"Aggregate, may include official, semi-official..."
3,3,EMN,Livestock Manure,2,Afghanistan,72538,Amount excreted in manure (N content),1755,All Animals,1964,1964,kg,4.034528e+08,A,"Aggregate, may include official, semi-official..."
4,4,EMN,Livestock Manure,2,Afghanistan,72538,Amount excreted in manure (N content),1755,All Animals,1965,1965,kg,4.135377e+08,A,"Aggregate, may include official, semi-official..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11089,99369,EMN,Livestock Manure,181,Zimbabwe,72538,Amount excreted in manure (N content),1755,All Animals,2015,2015,kg,3.263882e+08,A,"Aggregate, may include official, semi-official..."
11090,99370,EMN,Livestock Manure,181,Zimbabwe,72538,Amount excreted in manure (N content),1755,All Animals,2016,2016,kg,3.225094e+08,A,"Aggregate, may include official, semi-official..."
11091,99371,EMN,Livestock Manure,181,Zimbabwe,72538,Amount excreted in manure (N content),1755,All Animals,2017,2017,kg,3.284803e+08,A,"Aggregate, may include official, semi-official..."
11092,99372,EMN,Livestock Manure,181,Zimbabwe,72538,Amount excreted in manure (N content),1755,All Animals,2018,2018,kg,3.335923e+08,A,"Aggregate, may include official, semi-official..."


In [86]:
for index, val in nitrogen_drop['Year'].value_counts().sort_index().iteritems():
    print(index, val)

1961 177
1962 177
1963 177
1964 177
1965 177
1966 177
1967 177
1968 177
1969 177
1970 177
1971 177
1972 177
1973 177
1974 177
1975 177
1976 177
1977 177
1978 177
1979 177
1980 177
1981 177
1982 177
1983 177
1984 177
1985 177
1986 177
1987 177
1988 177
1989 177
1990 177
1991 177
1992 195
1993 197
1994 198
1995 199
1996 199
1997 199
1998 199
1999 199
2000 200
2001 200
2002 200
2003 200
2004 200
2005 200
2006 201
2007 201
2008 201
2009 201
2010 201
2011 201
2012 202
2013 202
2014 202
2015 202
2016 202
2017 202
2018 202
2019 202


In [None]:
#drop unecessary columns and rename value column to avoid confusion when tables are merged
nitrogen_df = nitrogen_df.drop(['Domain Code', 'Domain', 'Area Code (FAO)', 'Element Code', 'Flag', \
                                      'Element', 'Item Code', 'Year Code', 'Unit','Flag Description'], axis=1)
nitrogen_df = nitrogen_df.rename(columns={'Value':'Tons of Nutrient'})
nitrogen_df

In [None]:
# load in pesticide data
pesticide_df = pd.read_csv('crop_yield_data/pesticides.csv')
pesticide_df

In [None]:
# drop unecessary columns and rename value column so that column is not confusing when tables are merged
pesticide_df = pesticide_df.drop(['Domain', 'Element', 'Item', 'Unit'], axis=1)
pesticide_df = pesticide_df.rename(columns={'Value': 'Pesticides Amount(tons)'})
pesticide_df

In [None]:
# checking for null values
pesticide_df.isnull().value_counts()

In [None]:
# load in the employee data, I chose mean weekly hours worked per employee 
# but there were many other variables to choose from
employee_df = pd.read_csv('crop_yield_data/employee_hours.csv')
employee_df

In [None]:
#Drop unnecessary columns
employee_df = employee_df.drop(['Domain Code', 'Domain', 'Flag', 'Area Code (FAO)', 'Indicator Code', \
                                      'Sex Code', 'Sex', 'Year Code', 'Source', 'Indicator',\
                                'Source Code', 'Note', 'Flag Description', 'Unit'], axis=1)
employee_df = employee_df.rename(columns={'Value':'Mean Hours Worked/employee'})
employee_df

In [None]:
# filter table to include only Maize, this could be any crop of our choosing
yield_df = yield_df.loc[yield_df['Item']== 'Maize']
yield_df

In [None]:
#drop unecessary columns
yield_df = yield_df.drop(['Domain Code', 'Domain', 'Area Code', 'Element Code', 'Item Code', 'Unit', 'Element', \
                          'Year Code'], axis=1)
yield_df

In [None]:
# rename value column. yield will be our target variable
yield_df = yield_df.rename(columns={'Value': 'Yield (hg/ha)'})
yield_df

## Connect to SQL Database

In [None]:
# load tables into a SQL database. we will need to join at least some of the 
# tables in SQL to satisfy the project requirements
# create database in postgres 
# store db password in .gitignore (do we want to use a config.py or .env?)
# from config import db_password
# "postgresql://[user]:[password]@[location]:[port]/[database]"

In [None]:
# connection string for local server
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5432/[name of database]"

In [None]:
# create engine
engine = create_engine(db_string)

In [None]:
# IS THERE DATA LARGE ENOUGH to REQUIRE CHUNKS??
#if not continue below
#import data to SQL tables using to_sql()
yield_df.to_sql(name='yield', con=engine)
fertilizers_df.to_sql(name='fertilizers', con=engine)
employee_df.to_sql(name='employee', con=engine)
temp_df.to_sql(name='temperature', con=engine)
pesticide_df.to_sql(name='pesticides', con=engine)