# Project 3 - Create and load table for Illegal Ivory Trade


### Running PageRank on the first graph. 
    Who are the most influential importing countries?

In [1]:
import csv

import math
import numpy as np
import pandas as pd

import psycopg2

In [2]:
# function to run a select query and return rows in a pandas dataframe
# pandas puts all numeric values from postgres to float
# if it will fit in an integer, change it to integer
#

def my_select_query_pandas(query, rollback_before_flag, rollback_after_flag):
    "function to run a select query and return rows in a pandas dataframe"
    
    if rollback_before_flag:
        connection.rollback()
    
    df = pd.read_sql_query(query, connection)
    
    if rollback_after_flag:
        connection.rollback()
    
    # fix the float columns that really should be integers
    
    for column in df:
    
        if df[column].dtype == "float64":

            fraction_flag = False

            for value in df[column].values:
                
                if not np.isnan(value):
                    if value - math.floor(value) != 0:
                        fraction_flag = True

            if not fraction_flag:
                df[column] = df[column].astype('Int64')
    
    return(df)
    

In [3]:
connection = psycopg2.connect(
    user = "postgres",
    password = "ucb",
    host = "postgres",
    port = "5432",
    database = "postgres"
)

In [4]:
cursor = connection.cursor()

In [5]:
def my_read_csv_file(file_name, limit):
    "read the csv file and print only the first limit rows"
    
    csv_file = open(file_name, "r")
    
    csv_data = csv.reader(csv_file)
    
    i = 0
    
    for row in csv_data:
        i += 1
        if i <= limit:
            print(row)
            
    print("\nPrinted ", min(limit, i), "lines of ", i, "total lines.")

# Consolidation for 51 csv files with ~500,000 entries each.

In [6]:
# # # DO NOT RUN CELL AGAIN... TAKES A LONG TIME...
# # Combine csv files into one 

# file_list = [f'/user/projects/project-3-shengminx/data/Trade_database_download_v2023.1/trade_db_{i}.csv' for i in range(5, 52)]
# output_file = '/user/projects/project-3-shengminx/data/ivory_trade.csv'

# # Set to True so the header is only written once
# first = True


# chunk_size = 100000  # adjust if needed

# for file in file_list:
#     print(f"Processing: {file}")
#     try:
#         for chunk in pd.read_csv(file, dtype=str, engine='python', chunksize=chunk_size):
#             chunk.to_csv(output_file, mode='a', index=False, header=first)
#             first = False
#     except Exception as e:
#         print(f"Error reading {file}: {e}")

Processing: /user/projects/project-3-shengminx/data/Trade_database_download_v2023.1/trade_db_5.csv
Processing: /user/projects/project-3-shengminx/data/Trade_database_download_v2023.1/trade_db_6.csv
Processing: /user/projects/project-3-shengminx/data/Trade_database_download_v2023.1/trade_db_7.csv
Processing: /user/projects/project-3-shengminx/data/Trade_database_download_v2023.1/trade_db_8.csv
Processing: /user/projects/project-3-shengminx/data/Trade_database_download_v2023.1/trade_db_9.csv
Processing: /user/projects/project-3-shengminx/data/Trade_database_download_v2023.1/trade_db_10.csv
Processing: /user/projects/project-3-shengminx/data/Trade_database_download_v2023.1/trade_db_11.csv
Processing: /user/projects/project-3-shengminx/data/Trade_database_download_v2023.1/trade_db_12.csv
Processing: /user/projects/project-3-shengminx/data/Trade_database_download_v2023.1/trade_db_13.csv
Processing: /user/projects/project-3-shengminx/data/Trade_database_download_v2023.1/trade_db_14.csv
Proce

In [6]:
my_read_csv_file('/user/projects/project-3-shengminx/data/ivory_trade.csv', 10)

['Id', 'Year', 'Appendix', 'Taxon', 'Class', 'Order', 'Family', 'Genus', 'Term', 'Quantity', 'Unit', 'Importer', 'Exporter', 'Origin', 'Purpose', 'Source', 'Reporter.type', 'Import.permit.RandomID', 'Export.permit.RandomID', 'Origin.permit.RandomID']
['779124505', '2003', 'II', 'Acanthastrea echinata', 'Anthozoa', 'Scleractinia', 'Mussidae', 'Acanthastrea', 'live', '11', '', 'AT', 'ID', '', 'T', 'W', 'I', '', '695114dbf3', '']
['736178525', '2003', 'II', 'Acanthastrea echinata', 'Anthozoa', 'Scleractinia', 'Mussidae', 'Acanthastrea', 'live', '5', '', 'DE', 'ID', '', 'T', 'W', 'I', '0b96462380', '33c8b7b223', '']
['513454525', '2003', 'II', 'Acanthastrea echinata', 'Anthozoa', 'Scleractinia', 'Mussidae', 'Acanthastrea', 'live', '5', '', 'DE', 'ID', '', 'T', 'W', 'I', '859233900b', '04c82c7a7b', '']
['601075995', '2003', 'II', 'Acanthastrea echinata', 'Anthozoa', 'Scleractinia', 'Mussidae', 'Acanthastrea', 'live', '5', '', 'DE', 'ID', '', 'T', 'W', 'I', '957c7a20e5', '6b3376e2ce', '']
['

# Drop Table If Exist

In [7]:
connection.rollback()

query = """

drop table if exists ivory_trade;

"""

cursor.execute(query)

connection.commit()

# Create Table ivory_trade

In [8]:
connection.rollback()

query = """

create table ivory_trade (

  Id bigint primary key,
  Year integer,
  Appendix varchar(5),
  Taxon text,
  Class varchar(100),
  "Order" varchar(100),
  Family varchar(100),
  Genus varchar(100),
  Term varchar(50),
  Quantity numeric,
  Unit varchar(20),
  Importer char(2),
  Exporter char(2),
  Origin char(2),
  Purpose varchar(10),
  Source varchar(10),
  Reporter_type varchar(20),
  Import_permit_RandomID varchar(64),
  Export_permit_RandomID varchar(64),
  Origin_permit_RandomID varchar(64)

);

"""

cursor.execute(query)

connection.commit()

# Load csv files to database table

In [9]:
connection.rollback()

query = """

copy ivory_trade
from '/user/projects/project-3-shengminx/data/ivory_trade.csv' delimiter ',' NULL '' csv header;

"""

cursor.execute(query)

connection.commit()

# Check Table

In [10]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select *
from ivory_trade
limit 10

"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)


Unnamed: 0,id,year,appendix,taxon,class,Order,family,genus,term,quantity,unit,importer,exporter,origin,purpose,source,reporter_type,import_permit_randomid,export_permit_randomid,origin_permit_randomid
0,779124505,2003,II,Acanthastrea echinata,Anthozoa,Scleractinia,Mussidae,Acanthastrea,live,11,,AT,ID,,T,W,I,,695114dbf3,
1,736178525,2003,II,Acanthastrea echinata,Anthozoa,Scleractinia,Mussidae,Acanthastrea,live,5,,DE,ID,,T,W,I,0b96462380,33c8b7b223,
2,513454525,2003,II,Acanthastrea echinata,Anthozoa,Scleractinia,Mussidae,Acanthastrea,live,5,,DE,ID,,T,W,I,859233900b,04c82c7a7b,
3,601075995,2003,II,Acanthastrea echinata,Anthozoa,Scleractinia,Mussidae,Acanthastrea,live,5,,DE,ID,,T,W,I,957c7a20e5,6b3376e2ce,
4,313595725,2003,II,Acanthastrea echinata,Anthozoa,Scleractinia,Mussidae,Acanthastrea,live,5,,DE,ID,,T,W,I,51c0ea3d6b,5d5ecf15bb,
5,979117825,2003,II,Acanthastrea echinata,Anthozoa,Scleractinia,Mussidae,Acanthastrea,live,5,,DE,ID,,T,W,I,13a5a013bd,aac2fd94a2,
6,717463085,2003,II,Acanthastrea echinata,Anthozoa,Scleractinia,Mussidae,Acanthastrea,live,11,,DE,ID,,T,W,I,5682b6d89d,63182b53c1,
7,557344485,2003,II,Acanthastrea echinata,Anthozoa,Scleractinia,Mussidae,Acanthastrea,live,10,,DE,ID,,T,W,I,561eb5bcdf,71d44aa681,
8,212870465,2003,II,Acanthastrea echinata,Anthozoa,Scleractinia,Mussidae,Acanthastrea,live,8,,FR,ID,,T,W,I,d8d62c0881,51147f70e0,
9,989226655,2003,II,Acanthastrea echinata,Anthozoa,Scleractinia,Mussidae,Acanthastrea,live,4,,ES,ID,,T,W,I,,bfeabf81bb,
