In [2]:
# Import packages
import pandas as pd
import numpy as np 
import requests 
import zipfile
import psycopg2
import sqlalchemy
import _functions_sql as fs
import _functions_data_files as fdf

In [4]:
# Import CV file
source_dir = 'faostat_trade_matrix'
file_name = 'Trade_DetailedTradeMatrix_E_All_Data_(Normalized).csv'
# define converters for 'read_csv' & import file
conv = {
    'Note': str # handle DtypeWarning not using 'low_memory=False' (deprecated)
                # so missing values are now read as empty string ('')
}
trade_matrix = pd.read_csv(
      fdf.get_file_path(file_name, source_dir)
    , encoding='latin-1'
    , converters=conv
)

In [5]:
trade_matrix.head()

Unnamed: 0,Reporter Country Code,Reporter Country Code (M49),Reporter Countries,Partner Country Code,Partner Country Code (M49),Partner Countries,Item Code,Item Code (CPC),Item,Element Code,Element,Year Code,Year,Unit,Value,Flag
0,2,'004,Afghanistan,4,'012,Algeria,230,'21424,"Cashew nuts, shelled",5910,Export Quantity,2016,2016,t,3.0,X
1,2,'004,Afghanistan,4,'012,Algeria,230,'21424,"Cashew nuts, shelled",5922,Export Value,2016,2016,1000 USD,23.0,X
2,3,'008,Albania,3,'008,Albania,828,'25020.01,Cigarettes,5610,Import Quantity,2004,2004,t,12.0,A
3,3,'008,Albania,3,'008,Albania,828,'25020.01,Cigarettes,5622,Import Value,2004,2004,1000 USD,104.0,A
4,4,'012,Algeria,2,'004,Afghanistan,231,'21422,"Almonds, shelled",5610,Import Quantity,2005,2005,t,3.0,A


In [11]:
trade_matrix.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49159659 entries, 0 to 49159658
Data columns (total 8 columns):
 #   Column             Dtype  
---  ------             -----  
 0   reporting_country  object 
 1   partner_country    object 
 2   item_code          int64  
 3   item               object 
 4   element            object 
 5   year               int64  
 6   unit               object 
 7   value              float64
dtypes: float64(1), int64(2), object(5)
memory usage: 2.9+ GB


In [7]:
# Check for full duplicates
trade_matrix.duplicated().value_counts()

KeyboardInterrupt: 

In [8]:
# Drop unneeded columns
trade_matrix.drop(columns = ['Reporter Country Code', 'Reporter Country Code (M49)', 'Partner Country Code', 'Partner Country Code (M49)', 'Item Code (CPC)','Year Code', 'Element Code', 'Flag'], inplace=True)

In [9]:
# Rename remaining columns
trade_matrix.rename(columns = {'Reporter Countries':'reporting_country', 'Partner Countries': 'partner_country', 'Item Code':'item_code', 'Item':'item', 'Element':'element', 'Year':'year', 'Unit':'unit', 'Value':'value'}, inplace=True)

In [10]:
# Getting a list of unique values in the element column
trade_matrix['element'].unique()

array(['Export Quantity', 'Export Value', 'Import Quantity',
       'Import Value'], dtype=object)

In [26]:
trade_matrix_new = trade_matrix.pivot_table(index=['reporting_country','partner_country','item_code', 'item', 'year'],
                    columns='element', values='value', aggfunc='sum').reset_index()

In [47]:
trade_matrix_new.head()

Unnamed: 0,reporting_country,partner_country,item_code,item,year,export_quantity,export_value,import_quantity,import_value
0,Afghanistan,Algeria,230,"Cashew nuts, shelled",2016,3.0,23000.0,,
1,Afghanistan,Algeria,561,Raisins,2014,12.46,27000.0,,
2,Afghanistan,Algeria,723,"Other stimulant, spice and aromatic crops, n.e.c.",2014,0.16,0.0,,
3,Afghanistan,Algeria,1293,Crude organic material n.e.c.,2015,,1000.0,,
4,Afghanistan,Algeria,1293,Crude organic material n.e.c.,2016,,1000.0,,


In [35]:
# Rename new columns
trade_matrix_new.rename(columns = {'Export Quantity':'export_quantity', 'Export Value': 'export_value', 'Import Value':'import_value', 'Import Quantity':'import_quantity', 'element ': 'id'}, inplace=True)

In [31]:
trade_matrix_new.loc[:, 'export_value'] *= 1000

In [33]:
trade_matrix_new.loc[:, 'import_value'] *= 1000

In [46]:
trade_matrix_new = trade_matrix_new.rename_axis(None, axis=1)

In [48]:
# Export the table
engine = fs.get_engine()
schema = 'capstone_envirolytics'
table_name = 'fao_trade_matrix'

if engine!=None:
    try:
        trade_matrix_new.to_sql(name=table_name, # Name of SQL table
                        con=engine, # Engine or connection
                        if_exists='replace', # Drop the table before inserting new values 
                        schema=schema, # Use schema that was defined earlier
                        index=False, # Write DataFrame index as a column
                        chunksize=5000, # Specify the number of rows in each batch to be written at a time
                        method='multi') # Pass multiple values in a single INSERT clause
        print(f"The {table_name} table was imported successfully.")
    # Error handling
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        engine = None