## Creating a trade table

Importing the trade data from the FAO, examining and cleaning the table and exporting it as an SQL database.

In [None]:
# Import packages
import pandas as pd
import numpy as np 
import requests 
import zipfile
import psycopg2
import sqlalchemy

import _functions_sql as fs
import _functions_data_files as fdf

# Specify source directory and file (required for the fdf function to work as expected)
source_dir = 'faostat_trade_matrix'
source_file = 'Trade_DetailedTradeMatrix_E_All_Data_(Normalized).csv'

In [None]:
# Import raw data into a pandas dataframe
trade_matrix = pd.read_csv(
      fdf.get_path(source_file, source_dir)
    , encoding='latin-1'
    , converters={'Note': str} # handle DtypeWarning without 'low_memory=False' 
)

In [None]:
# Examine the data
trade_matrix.head()

In [None]:
trade_matrix.info()

In [None]:
# Check for full duplicates
trade_matrix.duplicated().value_counts()

In [None]:
# Drop unneeded columns
trade_matrix.drop(columns = ['Reporter Country Code', 'Reporter Country Code (M49)', 'Partner Country Code', 'Partner Country Code (M49)', 'Item Code (CPC)','Year Code', 'Element Code', 'Flag'], inplace=True)

In [None]:
# Rename remaining columns
trade_matrix.rename(columns = {'Reporter Countries':'reporting_country', 'Partner Countries': 'partner_country', 'Item Code':'item_code', 'Item':'item', 'Element':'element', 'Year':'year', 'Unit':'unit', 'Value':'value'}, inplace=True)

In [None]:
# Getting a list of unique values in the element column
trade_matrix['element'].unique()

In [None]:
# Getting a list of unique values in the unit column
trade_matrix['unit'].unique()

In [None]:
trade_matrix_new = trade_matrix.pivot_table(index=['reporting_country','partner_country','item_code', 'item', 'year', 'unit'],
                    columns='element', values='value', aggfunc='sum').reset_index()

In [None]:
trade_matrix_new.info()

In [None]:
trade_matrix_new.head()

In [None]:
# Checking for duplicates
duplicates = trade_matrix_new[trade_matrix_new[['reporting_country', 'partner_country', 'year', 'item', 'unit','Export Quantity', 'Export Value']].duplicated(keep=False)]
duplicates.head()

In [None]:
# Rename new columns
trade_matrix_new.rename(columns = {'Export Quantity':'export_quantity', 'Export Value': 'export_value', 'Import Value':'import_value', 'Import Quantity':'import_quantity'}, inplace=True)

In [None]:
# Removing the 'Element' name from the index column
trade_matrix_new = trade_matrix_new.rename_axis(None, axis=1)

In [None]:
# Export the table
engine = fs.get_engine()
schema = 'capstone_envirolytics'
table_name = 'fao_trade_matrix_new'

if engine!=None:
    try:
        trade_matrix_new.to_sql(name=table_name, # Name of SQL table
                        con=engine, # Engine or connection
                        if_exists='replace', # Drop the table before inserting new values 
                        schema=schema, # Use schema that was defined earlier
                        index=False, # Write DataFrame index as a column
                        chunksize=5000, # Specify the number of rows in each batch to be written at a time
                        method='multi') # Pass multiple values in a single INSERT clause
        print(f"The {table_name} table was imported successfully.")
    # Error handling
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        engine = None