In [35]:
import pandas as pd
import requests
from datetime import datetime
from tqdm import tqdm
import os
import logging

### Function to get Data from Trade API

Update the parameters as needed based on pdf in API Reference Docs Folder

In [32]:
## get data from 2018 to 2022-11 by HS Code, Country and District


def getCountryDistrictValsHS(code,year,month,dirPath,comm_lvl,summ_lvl2):
    # os.chdir(".apiResponses")
    logging.basicConfig(filename=os.path.join(dirPath,f'{comm_lvl}_{summ_lvl2}_{code}_download.log'), encoding='utf-8', level=logging.DEBUG)
    if code == 'hs':
        base = f"https://api.census.gov/data/timeseries/intltrade/imports/{code}"
        params = {"get":r','.join(["I_COMMODITY","I_COMMODITY_SDESC","I_COMMODITY_LDESC",
                                "CTY_CODE","DISTRICT",'DIST_NAME',"RP","DUT_VAL_MO",
                                "CON_VAL_MO","CON_QY1_MO","CAL_DUT_MO",
                                "GEN_CHA_MO","GEN_VAL_MO","GEN_CIF_MO","GEN_QY1_MO",'UNIT_QY1','CC_MO',
                                "CNT_CHA_MO","CNT_VAL_MO","CNT_WGT_MO",
                                "AIR_CHA_MO","AIR_VAL_MO","AIR_WGT_MO",
                                "VES_CHA_MO","VES_VAL_MO","VES_WGT_MO"]),
                "CTY_NAME":['INDIA','CHINA','INDONESIA','VIETNAM','MEXICO','CANADA','BRAZIL','TAIWAN'],
                "YEAR":year,
                "MONTH":month,
                "COMM_LVL":comm_lvl,
                "SUMMARY_LVL2":summ_lvl2}

# I_COMMODITY_LDESCCTY_CODE

    if code == 'naics':
        base = f"https://api.census.gov/data/timeseries/intltrade/imports/{code}"
        params = {"get":r','.join(["NAICS", "NAICS_LDESC", "NAICS_SDESC", 
                                "CTY_CODE",'CTY_NAME',"DISTRICT",'DIST_NAME',"DUT_VAL_MO",
                                "CON_VAL_MO","CAL_DUT_MO",
                               "GEN_CHA_MO","GEN_VAL_MO","GEN_CIF_MO",'CC_MO',
                                "CNT_CHA_MO","CNT_VAL_MO","CNT_WGT_MO",
                                "AIR_CHA_MO","AIR_VAL_MO","AIR_WGT_MO",
                                "VES_CHA_MO","VES_VAL_MO","VES_WGT_MO"]),
                # "CTY_NAME":['INDIA','CHINA','INDONESIA','VIETNAM','MEXICO','CANADA','BRAZIL'],
                "YEAR":year,
                "MONTH":month,
                "COMM_LVL":comm_lvl,
                "SUMMARY_LVL2":summ_lvl2}
                    
    temp = requests.get(base,params=params)
    # print(temp.url)
    
    df = pd.DataFrame(temp.json())
    new_header = df.iloc[0] #grab the first row for the header
    df = df[1:] #take the data less the header row
    df.columns = new_header #set the header row as the df header

    logTime = str(datetime.now().strftime(r"%Y_%m_%d-%I_%M_%S_%p"))
    fileName = f"{dirPath}/{code}/{params['COMM_LVL']}_{params['SUMMARY_LVL2']}_{year}_{month}.gzip"
    
    df.to_csv(fileName, compression='gzip')
    logging.info(f"{logTime}_{df.shape} rows,columns saved.! {df.memory_usage().sum()/1e6}")

    return df

In [33]:
years = [str(year) for year in range(2014,2023)]
months = [str(month).zfill(2) for month in range(1,13)]
yearMonthFilter = [tuple([year,month])for year in years for month in months][:-1]

In [34]:
for year,month in tqdm(yearMonthFilter):
    getCountryDistrictValsHS(code='naics',
                            year=year,
                            month=month,
                            dirPath='.apiResponses',
                            comm_lvl='NA3',
                            summ_lvl2='NACYDT')

100%|██████████| 107/107 [29:22<00:00, 16.47s/it]
