In [2]:
import requests
import bs4
import pandas as pd
import os
from tqdm.notebook import tqdm
import itertools
from datetime import datetime


In [3]:
if not os.path.exists('INDIA'):
    os.mkdir('INDIA')
dirname = os.getcwd()
filename = os.path.join(dirname, 'INDIA')

In [4]:
def parse_data(data, type):
    soup = bs4.BeautifulSoup(data, 'html.parser')
    rows = soup.find_all('tr')
    headers_list = rows[0].find_all_next('th')
    headers = [header.text.strip().replace('\xa0', ' ') for header in headers_list]
    rows = rows[1:] # удаляем нулевую строку с заголовками
    rows = [row.find_all('td') for row in rows] # достаем столбцы из каждой строки таблицы (кроме заголовков)
    rows_data = []
    for row in rows:
        rows_data.append([col.text.strip().lstrip().replace('\xa0', ' ') for col in row])
    df = pd.DataFrame(columns = headers, data=rows_data)
    if (type=='q'):
        df = df[[headers[0], headers[1], headers[2], headers[5], headers[3]]].rename(columns={headers[5] : 'Quantity'})
    else:
        df = df[[headers[0], headers[1], headers[2], headers[4]]].rename(columns={headers[4] : 'USD'})
    return(df)

In [5]:
def get_table_in_quantity_and_usd(month, year):
    # количество
    data_for_quantity = {
        'radioCY': 1,
        'Mm1': month,
        'yy1': year,
        'cntcode': 344,
        'hslevel': 8,
        'sort': 0,
        'radioDAll': 1,
        'radioqty': 1
    }
    # usd
    data_for_usd = {
        'radioCY': 1,
        'Mm1': month,
        'yy1': year,
        'cntcode': 344,
        'hslevel': 8,
        'sort': 0,
        'radioDAll': 1,
        'radiousd': 1
    }
    # India export data
    url = 'https://tradestat.commerce.gov.in/meidb/cntcom.asp?ie=e'
    response = requests.post(url, data=data_for_quantity, verify=False).text
    df_q = parse_data(response, type='q')
    response = requests.post(url, data=data_for_usd, verify=False).text
    df_usd = parse_data(response, type='usd')
    df_ex = df_usd.merge(df_q, how='inner', on=['HSCode', 'Commodity', 'S.No.'])
    df_ex['Flow'] = 'Ex'
    # India import data
    url = 'https://tradestat.commerce.gov.in/meidb/cntcom.asp?ie=i'
    response = requests.post(url, data=data_for_quantity, verify=False).text
    df_q = parse_data(response, type='q')
    response = requests.post(url, data=data_for_usd, verify=False).text
    df_usd = parse_data(response, type='usd')
    df_im = df_usd.merge(df_q, how='inner', on=['HSCode', 'Commodity', 'S.No.'])
    df_im['Flow'] = 'Im'
    df = pd.concat([df_im, df_ex])
    df['HSCode'] = df['HSCode'].str.zfill(8)
    df.to_csv(filename + '/full' + str(year) + f"{month:02d}" + '.csv')

In [5]:
# check latest month here
# https://tradestat.commerce.gov.in/meidb/cntcomq.asp?ie=e

In [6]:
last = (2025,2)
a = [x for x in range(2010, last[0]+1)]
b = [x for x in range(1,13)]
all_dates = set([x for x in list(itertools.product(a, b)) if (x[0]<=last[0]-1) or (x[0]==last[0] and x[1]<=last[1])])

In [7]:
downloaded = set([ (int(x.split('.')[0][4:8]), int(x.split('.')[0][8:10])) for x in os.listdir('INDIA')])
to_download = all_dates - downloaded
to_download

set()

In [8]:
for i in tqdm(to_download):
    print('Downloading ' + str(i[0]) + ', month ' + str(i[1]))
    get_table_in_quantity_and_usd(i[1], i[0])

ImportError: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html