In [None]:
#import dependenceis
import pandas as pd
import sqlalchemy
from sqlalchemy import inspect, create_engine, select
import numpy as np

In [None]:
# bring in countries by continent from url

url = "https://statisticstimes.com/geography/countries-by-continents.php"

countries = pd.read_html(url)
countries_df = countries[2].copy()
countries_df.fillna("")

### Import Crops

In [None]:
crops = pd.read_csv("../project_2_whats_for_breakfast/data/crop1.csv")
crops_df = crops
crops_df.head()

### Import Livestock

In [None]:
livestock = pd.read_csv("../project_2_whats_for_breakfast/data/live1.csv")
livestock_df = livestock
livestock_df.head()

### Start Transformation

In [None]:
# Trim crops
trim_crops = crops.loc[(crops['Year']>1979)]
trim_crops.to_csv("../project_2_whats_for_breakfast/data/trimmed_crops.csv")
trimmed_crops_df = trim_crops.copy()
trimmed_crops_df.head()

In [None]:
# Trim Livestock
trim_livestock = livestock.loc[(livestock['Year']>1979)]
trim_livestock.to_csv("../project_2_whats_for_breakfast/data/trimmed_livestock.csv")
trimmed_livestock_df = trim_livestock.copy()
trimmed_crops_df.head()

In [None]:
# Create Crop Items DF
crop_items = trimmed_crops_df[['Item', 'Element']].copy()
crop_items_dim = pd.DataFrame(crop_items, columns=['Item', 'Element'])
crop_items_dim = crop_items_dim.drop_duplicates()
crop_items_dim

In [None]:
# Creat Livestock Items DF
livestock_items = trimmed_livestock_df[['Item', 'Element']].copy()
livestock_items_dim = pd.DataFrame(livestock_items, columns=['Item', 'Element'])
livestock_items_dim = livestock_items_dim.drop_duplicates()
livestock_items_dim

### Append Crop & Livestock items

In [None]:
items_dimension = crop_items_dim.append(livestock_items_dim, ignore_index=True)
items_dimension

### Create year dimension DF

In [None]:
year = trim_crops['Year']
year = year.drop_duplicates()
year = pd.DataFrame(year, columns=['Year'])
year.dtypes

In [None]:
year.dtypes

### Create Database Connection

In [None]:
protocol = 'postgresql'
username = 'postgres'
password = 'bootcamp'
host = 'localhost'
port = 5432
db_name = 'captain_crunch'
rds_connection_string = f'{protocol}://{username}:{password}@{host}:{port}/{db_name}'
engine = create_engine(rds_connection_string)

### Inspect Tables

In [None]:
inspector = inspect(engine)
inspector.get_table_names()

### Push Data to Database

In [None]:
# Push Year to tbl_year
year.rename(columns={'Year':'year'}, inplace=True)

In [None]:
year.to_sql(name='tbl_year', con=engine, if_exists='append', index=False)

In [None]:
# Push Countries to tbl_countries
countries_df.rename(columns={'Country or Area':'country', 'ISO-alpha3 Code':'country_code', 'Region 1':'region', 'Continent':'continent'}, inplace=True)
countries_df= countries_df[['country', 'country_code', 'region', 'continent']].copy()
countries_df = countries_df.dropna()

countries_df.to_sql(name='tbl_countries', con=engine, if_exists='append', index=False)


In [None]:
# Push Item to tbl_item
items_dimension.rename(columns={'Item_name':'item_name', 'Element':'item_type'}, inplace=True)
items_dimension

In [None]:
items_dimension.to_sql(name='tbl_item', con=engine, if_exists='append', index=False)

In [None]:
# Adjust countries_df
countries_df = countries_df.rename(columns={'Country or Area':'country', 'ISO-alpha3 Code':'country_code', 'Region 1':'region', 'Continent':'continent'})
countries_df = countries_df[['country', 'country_code', 'region', 'continent']]
countries_df

In [None]:
# Push Crops to tbl_crops
trimmed_crops_df
trimmed_crops_df.rename(columns={'Area':'country', 'Item':'item_name', \
   'Element':'item_type', 'Year':'year', 'Unit':'item_unit', 'Value':'value'}, inplace=True)
trimmed_crops_df
crops = pd.merge(trimmed_crops_df, countries_df, how='left', on='country')
crops = crops[['country_code','item_name', 'item_type', 'year', 'item_unit', 'value']]
crops = crops.dropna(how='any')
crops['value'] = crops['value'].astype(int)

crops.to_sql(name='tbl_crops', con=engine, if_exists='append', index=False)


In [None]:
# Push Livestock to tbl_livestock
trimmed_livestock_df.copy()
trimmed_livestock_df.rename(columns={'Area':'country', 'Item':'item_name', \
    'Element':'item_type', 'Year':'year', 'Unit':'item_unit', 'Value':'value'}, inplace=True)
trimmed_livestock_df = trimmed_livestock_df.dropna(how='any')
trimmed_livestock_df['value'] = trimmed_livestock_df['value'].astype(int)
livestock = pd.merge(trimmed_livestock_df, countries_df, how='left', on='country')
livestock = livestock[['country_code', 'item_name', 'item_type', 'year', 'item_unit', 'value']]
livestock = livestock.dropna()
livestock.to_sql(name='tbl_livestock', con=engine, if_exists='append', index=False)


In [None]:
#As the columns of the population dataframe are strings, in order to melt the years we want, we create a list from the
## year dataframe year column, convert that to a string, and use that in our .melt method 
year_list = year['year'].values.tolist()
year_str = [str(i) for i in year_list]

In [None]:
# Use melt to transpose certain columns
population_df = pd.read_csv("../project_2_whats_for_breakfast/data/pop1.csv")
population_df.copy()
population_df = population_df.rename(columns={'Country Name':'country', 'Country Code':'country_code'})
population_df
population_df.dropna(how='any')
population = pd.merge(population_df, countries_df, how='inner', on='country')
population

population = population.melt(
    id_vars=['country', 'country_code_x'], value_vars=year_str, \
        var_name='year', value_name='population'
)
population = population.rename(columns={'country_code_x':'country_code'})
population = population.dropna(how='any')

In [None]:
#Convert the years into int in preperation for database loading. 
population['year']=population['year'].astype(int)

In [None]:
population.loc[(population['year']==1984)]

In [None]:
population.to_sql(name='tbl_population', con=engine, if_exists='append', index=False)