In [2]:
url_import = 'https://tradestat.commerce.gov.in/eidb/icomq.asp'
url_export = 'https://tradestat.commerce.gov.in/eidb/ecomq.asp'

In [3]:
# importing Library 
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
import time
import pandas as pd
from datetime import datetime
import numpy as np
from sqlalchemy import create_engine, Column, Integer, String, Float, Date, MetaData, Table
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
import pandas as pd
import os
from dotenv import load_dotenv
import math

In [19]:
# Open the testing browser
chrome_options = Options()
chrome_options.use_chromium = True

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)


In [18]:
def get_import_qty(hsn_code, years):
    driver.get(url_import)
    data = []
    for code in hsn_code:
        for year in years:
            select_element_year = driver.find_element(By.ID, "select2")
            dropdown_year = Select(select_element_year)
    
            # Selecting the year for which we need to fetch data
            dropdown_year.select_by_value(year)
    
            input = driver.find_element(By.NAME, 'hscode')
            input.clear()
            input.send_keys(code)
            
            select_element = driver.find_element(By.ID, "select1")
            dropdown = Select(select_element)
            
            # Select hsn_code digit
            dropdown.select_by_value(str(len(code)))
            
            # Quantity is available for 6 & 8 digit HSN code
            if len(code) > 4:

                # Pressing Radio button
                qty = driver.find_element(By.ID, 'radioqty')
                qty.click()

                # Submit
                submit_btn = driver.find_element(By.ID, 'button1')
                submit_btn.click()
    
                # Scrap the table data
                table_data = driver.find_elements(By.TAG_NAME, 'td')
                table_header = driver.find_elements(By.TAG_NAME, 'th')

                
                hs_code = code
                y1 = int(year) - 1
                y2 = year
                year_text = str(y1)+'-'+str(y2)
                qty_text = 0

                if table_header:
                    arr_th = [th.text for th in table_header]
                    year_text = arr_th[4]

                if table_data:
                    arr_td = [td.text for td in table_data]
                    qty_text = arr_td[4].replace(',', '') if arr_td[4] != ' ' else 0

                if qty_text == '' or qty_text == ' ':
                    qty_text = 0

                
                # Add the row to the data list
                row = [year_text, hs_code, float(qty_text)]
                data.append(row)
                
                # Back Button
                back_btn = driver.find_element(By.ID, 'IMG1')
                back_btn.click()
    
    df = pd.DataFrame(data, columns=['Year', 'HS_Code', 'Import_Qty_thousand_units'])
       
    return df

In [17]:
def get_export_qty(hsn_code, years):
    driver.get(url_export)
    data = []
    for code in hsn_code:
        for year in years:
            select_element_year = driver.find_element(By.ID, "select2")
            dropdown_year = Select(select_element_year)
    
            # Selecting the year for which we need to fetch data
            dropdown_year.select_by_value(year)
    
            input = driver.find_element(By.NAME, 'hscode')
            input.clear()
            input.send_keys(code)
            
            select_element = driver.find_element(By.ID, "select1")
            dropdown = Select(select_element)
            
            # Select hsn_code digit
            dropdown.select_by_value(str(len(code)))
            
            # Quantity is available for 6 & 8 digit HSN code
            if len(code) > 4:

                # Pressing Radio button
                qty = driver.find_element(By.ID, 'radioqty')
                qty.click()

                # Submit
                submit_btn = driver.find_element(By.ID, 'button1')
                submit_btn.click()
    
                # Scrap the table data
                table_data = driver.find_elements(By.TAG_NAME, 'td')
                table_header = driver.find_elements(By.TAG_NAME, 'th')

                
                hs_code = code
                y1 = int(year) - 1
                y2 = year
                year_text = str(y1)+'-'+str(y2)
                qty_text = 0

                if table_header:
                    arr_th = [th.text for th in table_header]
                    year_text = arr_th[4]

                if table_data:
                    arr_td = [td.text for td in table_data]
                    qty_text = arr_td[4].replace(',', '') if arr_td[4] != ' ' else 0

                if qty_text == '' or qty_text == ' ':
                    qty_text = 0

                
                # Add the row to the data list
                row = [year_text, hs_code, float(qty_text)]
                data.append(row)
                
                # Back Button
                back_btn = driver.find_element(By.ID, 'IMG1')
                back_btn.click()
    
    df = pd.DataFrame(data, columns=['Year', 'HS_Code', 'Export_Qty_thousand_units'])
       
    return df

In [22]:
hsn_code = ['260300']
years = ['2021', '2022', '2023', '2024']

In [23]:
get_import_qty(hsn_code, years)


Unnamed: 0,Year,HS_Code,Import_Qty_thousand_units
0,2020-2021,260300,415136.09
1,2021-2022,260300,1018935.06
2,2022-2023,260300,1178919.88
3,2023-2024,260300,1016300.94


In [24]:
get_export_qty(hsn_code, years)

Unnamed: 0,Year,HS_Code,Export_Qty_thousand_units
0,2020-2021,260300,82462.78
1,2021-2022,260300,34826.63
2,2022-2023,260300,26336.0
3,2023-2024,260300,23187.55


In [26]:
def get_trade_qty(hsn_code, years):
    df1 = get_import_qty(hsn_code, years)
    df2 = get_export_qty(hsn_code, years)
    merged_df = pd.merge(df1, df2, on=['Year','HS_Code'])
    return merged_df



In [32]:
df = get_trade_qty(hsn_code, years)

Unnamed: 0,Year,HS_Code,Import_Qty_thousand_units,Export_Qty_thousand_units
0,2020-2021,260300,415136.09,82462.78
1,2021-2022,260300,1018935.06,34826.63
2,2022-2023,260300,1178919.88,26336.0
3,2023-2024,260300,1016300.94,23187.55
