In [21]:
import requests
import pandas as pd
import numpy as np
from datetime import datetime
import os

In [2]:
symbol = 'TCB'
query_string = {
    'symbol': symbol, 
}
query_string["resolution"] = "1D"
query_string["from"] = str(datetime(2000, 1, 1).timestamp())
query_string["to"] = str(datetime.now().timestamp())

In [3]:
query_string

{'symbol': 'TCB',
 'resolution': '1D',
 'from': '946659600.0',
 'to': '1732365808.425751'}

In [4]:
headers = {'accept': 'application/json, text/plain, */*',
           'accept-encoding': 'gzip, deflate, br, zstd',
           'accept-language': 'en-US,en;q=0.9',
           'origin': 'https://iboard.ssi.com.vn',
           'priority': 'u=1, i',
           'referer': 'https://iboard.ssi.com.vn/',
           'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="126", "Microsoft Edge";v="126"',
           'sec-ch-ua-mobile': '?0',
           'sec-ch-ua-platform': '"Windows"',
           'sec-fetch-dest': 'empty',
           'sec-fetch-mode': 'cors',
           'sec-fetch-site': 'same-site',
           'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36 Edg/126.0.0.0'}

In [5]:
url_without_params = 'https://iboard-api.ssi.com.vn/statistics/charts/history'

In [6]:
response = requests.get(
    url=url_without_params,
    headers=headers,
    params=query_string
)

In [7]:
response.status_code

200

In [8]:
df = pd.DataFrame(response.json()["data"])

In [9]:
df

Unnamed: 0,t,c,o,h,l,v,s
0,1528070400,16.55,16.55,16.97,16.55,2811840,ok
1,1528156800,15.52,16.02,17.13,15.52,1357500,ok
2,1528243200,14.87,15.35,15.52,14.71,1605480,ok
3,1528329600,15.90,15.27,15.90,15.05,1376540,ok
4,1528416000,17.00,16.32,17.00,16.08,2008500,ok
...,...,...,...,...,...,...,...
1617,1731888000,22.45,22.40,22.70,22.20,12110400,ok
1618,1731974400,22.40,22.55,22.55,22.20,6936100,ok
1619,1732060800,22.85,22.30,22.85,22.25,10247500,ok
1620,1732147200,23.20,22.80,23.20,22.60,11213000,ok


In [10]:
df['t'] = pd.to_datetime(df['t'], unit='s').dt.strftime('%m-%d-%Y 00:00:00+00:00')
df['t'] = pd.to_datetime(df['t'])

In [11]:
df

Unnamed: 0,t,c,o,h,l,v,s
0,2018-06-04 00:00:00+00:00,16.55,16.55,16.97,16.55,2811840,ok
1,2018-06-05 00:00:00+00:00,15.52,16.02,17.13,15.52,1357500,ok
2,2018-06-06 00:00:00+00:00,14.87,15.35,15.52,14.71,1605480,ok
3,2018-06-07 00:00:00+00:00,15.90,15.27,15.90,15.05,1376540,ok
4,2018-06-08 00:00:00+00:00,17.00,16.32,17.00,16.08,2008500,ok
...,...,...,...,...,...,...,...
1617,2024-11-18 00:00:00+00:00,22.45,22.40,22.70,22.20,12110400,ok
1618,2024-11-19 00:00:00+00:00,22.40,22.55,22.55,22.20,6936100,ok
1619,2024-11-20 00:00:00+00:00,22.85,22.30,22.85,22.25,10247500,ok
1620,2024-11-21 00:00:00+00:00,23.20,22.80,23.20,22.60,11213000,ok


In [12]:
df.rename(
    columns={
        "t": "Date",
        "c": "Close",
        "o": "Open",
        "h": "High",
        "l": "Low",
        "v": "Volume"
    },
    inplace=True
)
df.drop(columns=["s"], inplace=True)

In [13]:
df

Unnamed: 0,Date,Close,Open,High,Low,Volume
0,2018-06-04 00:00:00+00:00,16.55,16.55,16.97,16.55,2811840
1,2018-06-05 00:00:00+00:00,15.52,16.02,17.13,15.52,1357500
2,2018-06-06 00:00:00+00:00,14.87,15.35,15.52,14.71,1605480
3,2018-06-07 00:00:00+00:00,15.90,15.27,15.90,15.05,1376540
4,2018-06-08 00:00:00+00:00,17.00,16.32,17.00,16.08,2008500
...,...,...,...,...,...,...
1617,2024-11-18 00:00:00+00:00,22.45,22.40,22.70,22.20,12110400
1618,2024-11-19 00:00:00+00:00,22.40,22.55,22.55,22.20,6936100
1619,2024-11-20 00:00:00+00:00,22.85,22.30,22.85,22.25,10247500
1620,2024-11-21 00:00:00+00:00,23.20,22.80,23.20,22.60,11213000


In [14]:
df["Date"].min()

Timestamp('2018-06-04 00:00:00+0000', tz='UTC')

In [15]:
df["Date"].max()

Timestamp('2024-11-22 00:00:00+0000', tz='UTC')

In [16]:
symbol = symbol.replace(".", "_").lower()

In [22]:
min_date = df["Date"].min().date()
max_date = df["Date"].max().date()
folder_save = f"../../data/{symbol}"
os.makedirs(folder_save, exist_ok=True)
file_name = f"{folder_save}/{symbol}-{str(min_date)}-to-{str(max_date)}.csv"

In [23]:
file_name = file_name.replace("-", "_")

In [24]:
file_name

'../../data/tcb/tcb_2018_06_04_to_2024_11_22.csv'

In [25]:
df.to_csv(file_name, index=False)