In [1]:
import requests
import pandas as pd
import numpy as np
from datetime import datetime

In [2]:
symbol = 'VN30'

In [3]:
query_string = {
    'symbol': symbol, 
}

In [4]:
query_string["resolution"] = "1D"
query_string["from"] = str(datetime(2012, 1, 1).timestamp())
query_string["to"] = str(datetime.now().timestamp())

In [5]:
query_string

{'symbol': 'VN30',
 'resolution': '1D',
 'from': '1325350800.0',
 'to': '1731770388.514675'}

In [6]:
headers = {'accept': 'application/json, text/plain, */*',
           'accept-encoding': 'gzip, deflate, br, zstd',
           'accept-language': 'en-US,en;q=0.9',
           'origin': 'https://iboard.ssi.com.vn',
           'priority': 'u=1, i',
           'referer': 'https://iboard.ssi.com.vn/',
           'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="126", "Microsoft Edge";v="126"',
           'sec-ch-ua-mobile': '?0',
           'sec-ch-ua-platform': '"Windows"',
           'sec-fetch-dest': 'empty',
           'sec-fetch-mode': 'cors',
           'sec-fetch-site': 'same-site',
           'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36 Edg/126.0.0.0'}

In [7]:
url_without_params = 'https://iboard-api.ssi.com.vn/statistics/charts/history'

In [8]:
response = requests.get(
    url=url_without_params,
    headers=headers,
    params=query_string
)

In [9]:
response.status_code

200

In [10]:
df = pd.DataFrame(response.json()["data"])

In [11]:
df

Unnamed: 0,t,c,o,h,l,v,s
0,1328486400,447.47,449.38,449.38,441.60,13467070,ok
1,1328572800,449.31,448.37,450.13,443.38,17261070,ok
2,1328659200,460.98,452.13,460.98,450.00,20406280,ok
3,1328745600,464.73,467.18,467.18,461.49,20379320,ok
4,1328832000,458.23,464.25,464.25,456.52,20816360,ok
...,...,...,...,...,...,...,...
3185,1731283200,1310.46,1317.34,1318.71,1298.78,302541700,ok
3186,1731369600,1301.95,1312.55,1315.80,1297.99,184858300,ok
3187,1731456000,1304.04,1301.95,1305.42,1290.61,197224300,ok
3188,1731542400,1286.65,1304.04,1304.04,1286.52,186252300,ok


In [12]:
df['t'] = pd.to_datetime(df['t'], unit='s').dt.strftime('%m-%d-%Y 00:00:00+00:00')
df['t'] = pd.to_datetime(df['t'])

In [13]:
df

Unnamed: 0,t,c,o,h,l,v,s
0,2012-02-06 00:00:00+00:00,447.47,449.38,449.38,441.60,13467070,ok
1,2012-02-07 00:00:00+00:00,449.31,448.37,450.13,443.38,17261070,ok
2,2012-02-08 00:00:00+00:00,460.98,452.13,460.98,450.00,20406280,ok
3,2012-02-09 00:00:00+00:00,464.73,467.18,467.18,461.49,20379320,ok
4,2012-02-10 00:00:00+00:00,458.23,464.25,464.25,456.52,20816360,ok
...,...,...,...,...,...,...,...
3185,2024-11-11 00:00:00+00:00,1310.46,1317.34,1318.71,1298.78,302541700,ok
3186,2024-11-12 00:00:00+00:00,1301.95,1312.55,1315.80,1297.99,184858300,ok
3187,2024-11-13 00:00:00+00:00,1304.04,1301.95,1305.42,1290.61,197224300,ok
3188,2024-11-14 00:00:00+00:00,1286.65,1304.04,1304.04,1286.52,186252300,ok


In [14]:
df.rename(
    columns={
        "t": "Date",
        "c": "Close",
        "o": "Open",
        "h": "High",
        "l": "Low",
        "v": "Volume"
    },
    inplace=True
)
df.drop(columns=["s"], inplace=True)

In [15]:
df

Unnamed: 0,Date,Close,Open,High,Low,Volume
0,2012-02-06 00:00:00+00:00,447.47,449.38,449.38,441.60,13467070
1,2012-02-07 00:00:00+00:00,449.31,448.37,450.13,443.38,17261070
2,2012-02-08 00:00:00+00:00,460.98,452.13,460.98,450.00,20406280
3,2012-02-09 00:00:00+00:00,464.73,467.18,467.18,461.49,20379320
4,2012-02-10 00:00:00+00:00,458.23,464.25,464.25,456.52,20816360
...,...,...,...,...,...,...
3185,2024-11-11 00:00:00+00:00,1310.46,1317.34,1318.71,1298.78,302541700
3186,2024-11-12 00:00:00+00:00,1301.95,1312.55,1315.80,1297.99,184858300
3187,2024-11-13 00:00:00+00:00,1304.04,1301.95,1305.42,1290.61,197224300
3188,2024-11-14 00:00:00+00:00,1286.65,1304.04,1304.04,1286.52,186252300


In [16]:
df["Date"].min()

Timestamp('2012-02-06 00:00:00+0000', tz='UTC')

In [17]:
df["Date"].max()

Timestamp('2024-11-15 00:00:00+0000', tz='UTC')

In [18]:
symbol = symbol.replace(".", "_").lower()

In [None]:
min_date = df["Date"].min().date()
max_date = df["Date"].max().date()
file_name = f"../../data/{symbol}/{symbol}-{str(min_date)}-to-{str(max_date)}.csv"

In [20]:
file_name = file_name.replace("-", "_")

In [21]:
file_name

'./data/vn30_2012_02_06_to_2024_11_15.csv'

In [22]:
df.to_csv(file_name, index=False)