In [1]:
import pandas as pd             # data package
import matplotlib.pyplot as plt # graphics 
import datetime as dt
import numpy as np
from census import Census # This is new...

import requests, io             # internet and input tools  
import zipfile as zf            # zip file tools 
import os  

#import weightedcalcs as wc
#import numpy as np

import pyarrow as pa
import pyarrow.parquet as pq


In [4]:
tariff_list = pd.read_csv("./tariff-lists/canada-steel-list.csv", header = None, usecols=[0])


tariff_list.columns = ['hs-code']


In [5]:
tariff_list.head()

Unnamed: 0,hs-code
0,3406.00.90
1,3506.10.00
2,6601.10.00
3,6601.91.00
4,6601.99.00


In [6]:
def fix_list(tariff_list):
    
    foo = tariff_list.copy(deep=True)
    
    foo["hs-code"] = foo["hs-code"].astype(str)
    
    # Remove periods from hs-code
    foo["hs-code"] = foo["hs-code"].str.replace(".", "")
    
    for index, row in foo.iterrows():
        
        if len(row["hs-code"]) < 8:
            foo.loc[index, "hs-code"] = "0" + row["hs-code"]
            
    foo["hs6"] = foo["hs-code"].str[0:6]

    print(foo["hs6"].iloc[1:5])

    out = foo.groupby(["hs6"]).agg({"hs6":"first"})

    return out
            

In [7]:
canada_list = fix_list(tariff_list)



1    350610
2    660110
3    660191
4    660199
Name: hs6, dtype: object


  


In [9]:
canada_list.tail(25)

Unnamed: 0_level_0,hs6
hs6,Unnamed: 1_level_1
950823,950823
950824,950824
950825,950825
950826,950826
950829,950829
950830,950830
950840,950840
960310,960310
960329,960329
960330,960330


In [10]:
def get_aggregate_hs6_exports(ecom):
    
    my_key = "&key=34e40301bda77077e24c859c6c6c0b721ad73fc7"

    end_use = "hs?get=CTY_NAME,ALL_VAL_MO,CTY_CODE,COMM_LVL,E_COMMODITY_SDESC"
    
    surl = "https://api.census.gov/data/timeseries/intltrade/exports/" + end_use 

    surl  = surl + my_key + "&time=" + "from+2024-01+to+2024-12" + "&COMM_LVL=HS6" + "&E_COMMODITY=" + ecom + "*"
    # the issue is that uniform HScodes are at the HS6 level, after that they may differ by country
    # the China ones are at HS8 and appear different, so we need to go back to the HS6 level

    url = surl + "&CTY_CODE=" + ""

    r = requests.get(url) 

    #print(r.status_code)

    df = pd.DataFrame(r.json()[1:]) # This then converts it to a dataframe

    df.columns = r.json()[0]

    df.time = pd.to_datetime(df.time, format="%Y-%m") 

    df["exports"] = df["ALL_VAL_MO"].astype(float)

    return df[df["CTY_NAME"] == "TOTAL FOR ALL COUNTRIES"].exports.sum()

In [11]:
get_aggregate_hs6_exports("962000")

31916102.0

In [12]:
def get_aggregate_exports():
    
    my_key = "&key=34e40301bda77077e24c859c6c6c0b721ad73fc7"

    end_use = "hs?get=CTY_NAME,ALL_VAL_MO,E_COMMODITY,E_COMMODITY_SDESC"

    surl = "https://api.census.gov/data/timeseries/intltrade/exports/" + end_use 

    surl  = surl + my_key + "&time=" + "from+2024-01+to+2024-12" + "&COMM_LVL=HS2" 
    # the issue is that uniform HScodes are at the HS6 level, after that they may differ by country
    # the China ones are at HS8 and appear different, so we need to go back to the HS6 level

    url = surl + "&CTY_CODE=" + "1220"

    r = requests.get(url) 

    print(r.status_code)

    df = pd.DataFrame(r.json()[1:]) # This then converts it to a dataframe

    df.columns = r.json()[0]

    df.time = pd.to_datetime(df.time, format="%Y-%m") 

    df["exports"] = df["ALL_VAL_MO"].astype(float)

    return df.exports.sum()

In [13]:
def get_exports_hs10(ecom, tariff):
    
    my_key = "&key=34e40301bda77077e24c859c6c6c0b721ad73fc7"

    end_use = "hs?get=CTY_NAME,ALL_VAL_MO,CTY_CODE,COMM_LVL,E_COMMODITY_SDESC"
    
    surl = "https://api.census.gov/data/timeseries/intltrade/exports/" + end_use 

    surl  = surl + my_key + "&time=" + "from+2024-01+to+2024-12" + "&COMM_LVL=HS6" + "&E_COMMODITY=" + ecom + "*"
    # the issue is that uniform HScodes are at the HS6 level, after that they may differ by country
    # the China ones are at HS8 and appear different, so we need to go back to the HS6 level

    url = surl + "&CTY_CODE=" + "1220"

    # Canada is 1220

    r = requests.get(url) 
    
    #print(r.status_code)
    
    if r.status_code == 200:
    
        df = pd.DataFrame(r.json()[1:]) # This then converts it to a dataframe

        df.columns = r.json()[0]

        df.time = pd.to_datetime(df.time, format="%Y-%m") 

        df["exports"] = df["ALL_VAL_MO"].astype(float)
        
        df["description"] = df["E_COMMODITY_SDESC"]
            
        df["hs6"] = ecom
        # Now there whould only be one hs code per call, but I'm going use the groupby to be safe

        df.drop(["ALL_VAL_MO", "E_COMMODITY", "E_COMMODITY_SDESC", "COMM_LVL"], axis = 1, inplace = True)
        
        grp = df.groupby(["hs6"]) # group all exports over all the months (again there should only be one hs6 code)

        top_products = grp.agg({"exports":"sum", "description":"first"})
               
        top_products["hs-code"] = ecom
        
        top_products["tariff"] = tariff

        top_products["export_share"] = 100.*( top_products["exports"] / get_aggregate_hs6_exports(ecom) )

        top_products["color"] = "#ff0000"
    
        return top_products
    
    else:
        # some of the products have no trade, this just deals with these issues. 
    
        df = pd.DataFrame(columns=['exports', 'description', 'hs-code', 'tariff', 'export_share', 'color'])
        
        df.index.name = "hs6"
        
        new_row = {'exports': 0.0, 'description': " ", 'hs-code': ecom, 'tariff': tariff, 'export_share': 0.0, 'color': "#EE1C25"}
        # so if its empty, we just create a row with zero exports and the description comes from the
        # chinese side
        
        df = pd.concat([df, pd.DataFrame([new_row], index=[ecom])])
    
    return df

In [14]:
foo = get_exports_hs10("962000", 25.0)

In [15]:
foo

Unnamed: 0_level_0,exports,description,hs-code,tariff,export_share,color
hs6,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
962000,7350862.0,"MONOPODS, BIPODS, TRIPODS AND SIMILIAR ARTICLES",962000,25.0,23.031829,#ff0000


In [17]:
# this will then work through the tariff list and construct the data frame

trade_df_canada = pd.DataFrame(columns=['exports',"description",'hs-code'])

tariff_rate = 25.0

for index, row in canada_list.iterrows():
    
#     print(index)
    
#     print(row['hs-code'])
    
    foo = get_exports_hs10(row['hs6'], tariff_rate)
    
    trade_df_canada = pd.concat([trade_df_canada, foo])

In [18]:
trade_df_canada.sort_values(by = ["exports"], ascending = False)[0:20]

Unnamed: 0,exports,description,hs-code,tariff,export_share,color
711291,3852355000.0,WASTE & SCRAP GOLD EXCL SWPNGS CNTNG OTH PREC ...,711291,25.0,96.988731,#ff0000
851762,2789308000.0,MACH FOR RECP/CONVR/TRANS/REGN OF VOICE/IMAGE/...,851762,25.0,12.377326,#ff0000
847130,1924976000.0,PORT DIGTL AUTOMATIC DATA PROCESS MACH NOT > 1...,847130,25.0,28.947967,#ff0000
851713,1400582000.0,SMARTPHONES,851713,25.0,14.56411,#ff0000
760612,1003747000.0,"ALUMINUM ALLOY RECT PLATES ETC, OVER .2 MM THICK",760612,25.0,35.576448,#ff0000
732690,815696800.0,ARTICLES OF IRON OR STEEL NESOI,732690,25.0,13.625715,#ff0000
950300,764783300.0,"TRICYCLE, SCOOTR, PEDAL CAR & SIM WHEELED TYS;...",950300,25.0,62.103975,#ff0000
940199,679886900.0,"PARTS OF SEATS, NESOI",940199,25.0,31.756389,#ff0000
731815,620433600.0,THREADED SCREWS AND BOLTS NESOI OF IRON OR STEEL,731815,25.0,24.821158,#ff0000
710812,619758700.0,"GOLD, NONMONETARY, UNWROUGHT NESOI",710812,25.0,2.134695,#ff0000


In [19]:
trade_df_canada.exports.sum()

34286716216.0

In [20]:
trade_df_canada= trade_df_canada.astype({
    'hs-code': 'string',
})

pq.write_table(pa.Table.from_pandas(trade_df_canada), "./data/canada-steel-US-exports-031225.parquet")

trade_df_canada.to_csv("./data/canada-steel-US-exports-031225.csv", index=False)

In [21]:
trade_df_canada.sort_values(by = ["exports"], ascending = False)[0:20].exports.sum()

18621437772.0