In [7]:
import requests
import json

import pandas as pd
from pandas.tseries.frequencies import to_offset

import numpy as np

import matplotlib.pyplot as plt
import plotly.graph_objects as go
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler

from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.callbacks import EarlyStopping

import time
import datetime

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error

from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor

In [8]:
def get_ohlc_data(symbols: list):
    url = "https://mt.vietcap.com.vn/api/chart/OHLCChart/gap"

    payload = json.dumps({
        "timeFrame": "ONE_DAY",
        "symbols": symbols,
        "from": 1401865600,
        "to": 1669488762
        # "to": 1885535200
    })
    headers = {'Content-Type': 'application/json'}

    response = requests.request("POST", url, headers=headers, data=payload)

    return response.json()


def find_in_list(list, value):
    for i in range(len(list)):
        if list[i]["symbol"] == value:
            return list[i]
    return {}


def collect_data():
    list_symbol = ["HPG", "VNINDEX", "VN30", "HNXIndex", "HNX30", "HNXUpcomIndex"]
    data = get_ohlc_data(list_symbol)
    symbol_data = find_in_list(data, "HPG")
    vn_index_data = find_in_list(data, "VNINDEX")
    vn_30_data = find_in_list(data, "VN30")
    hnx_index_data = find_in_list(data, "HNXIndex")
    hnx_30_data = find_in_list(data, "HNX30")
    hnx_upcom_index_data = find_in_list(data, "HNXUpcomIndex")

    df = pd.DataFrame()

    df["trunc_time"] = symbol_data["t"]
    df["open_price"] = symbol_data["o"]
    df["high_price"] = symbol_data["h"]
    df["low_price"] = symbol_data["l"]
    df["close_price"] = symbol_data["c"]
    df["volume"] = symbol_data["v"]
    df["vn_index_price"] = vn_index_data["c"]
    df["vn_30_price"] = vn_30_data["c"]
    df["hnx_index_price"] = hnx_index_data["c"]
    df["hnx_30_price"] = hnx_30_data["c"]
    df["hnx_upcom_index_price"] = hnx_upcom_index_data["c"]

    return df


def json_to_csv(data):
    symbol = data["symbol"]

    df = pd.DataFrame()
    df["trunc_time"] = data["t"]
    df["open_price"] = data["o"]
    df["high_price"] = data["h"]
    df["low_price"] = data["l"]
    df["close_price"] = data["c"]
    df["volume"] = data["v"]
    df['trunc_time'] = df['trunc_time'].apply(lambda d: datetime.date.fromtimestamp(int(d)))

    df.to_csv(f'{symbol}.csv', index=False)


if __name__ == '__main__':
    # Step 1: Collecting data
    # df = collect_data()

    # HPG, STB, VND, SSI, DIG, HAG, VHM, VCI, NVL...

    data = get_ohlc_data(
        ["HPG", "STB", "VND", "SSI", "DIG", "HAG", "VHM", "VCI", "NVL", "VNINDEX", "VN30", "HNXIndex", "HNX30",
         "HNXUpcomIndex"])

    for item in data:
        json_to_csv(item)

#     df_vn30 = pd.read_csv("VN30.csv")
#     df_vnindex = pd.read_csv("VNINDEX.csv")
#     df_hnxindex = pd.read_csv("HNXIndex.csv")
#     df_hnx30 = pd.read_csv("HNX30.csv")
#     df_hnxupcomindex = pd.read_csv("HNXUpcomIndex.csv")

#     for item in ["HPG", "STB", "VND", "SSI", "DIG", "HAG", "VHM", "VCI", "NVL"]:
#         df = pd.read_csv(f'{item}.csv')
#         df = df.merge(df_vn30.rename(columns={"close_price": "vn30_price"})[["vn30_price", "trunc_time"]], on="trunc_time", how="left")
#         df = df.merge(df_vnindex.rename(columns={"close_price": "vnindex_price"})[["vnindex_price", "trunc_time"]], on="trunc_time", how="left")
#         df = df.merge(df_hnxindex.rename(columns={"close_price": "hnxindex_price"})[["hnxindex_price", "trunc_time"]], on="trunc_time", how="left")
#         df = df.merge(df_hnx30.rename(columns={"close_price": "hnx30_price"})[["hnx30_price", "trunc_time"]], on="trunc_time", how="left")
#         df = df.merge(df_hnxupcomindex.rename(columns={"close_price": "hnxupcomindex_price"})[["hnxupcomindex_price", "trunc_time"]], on="trunc_time", how="left")
#         df = df.sort_values(by="trunc_time", ascending=False)
#         df.to_csv(f'{item}.csv', index=False)


    print("Done")


Done


In [9]:
def candlestick_visuallize(stock_name): 
  fig = go.Figure(data=[go.Candlestick(x=stock_name['Date'],open=stock_name['Open'],high=stock_name['High'],
                  low=stock_name['Low'], close=stock_name['Close'])])

  fig.show()

In [10]:
def rename_col(stock_name):
  stock_name.rename(columns={"trunc_time": "Date", "open_price":"Open", "high_price":"High", "low_price": "Low", "close_price":"Close", "volume":"Volume",
                    "vnindex_price": "VNIndex", "hnxindex_price":"HNXIndex", "hnx30_price":"HNX30", "hnxupcomindex_price":"HNXUpcomIndex"}, inplace=True)

In [11]:
def drop_col(stock_name, col_list):
  stock_name.drop(columns=col_list, inplace=True)

In [12]:
def create_data(filename):
    file = pd.read_csv(filename + ".csv")
    
    rename_col(file)
    
    file = file[::-1].reset_index(drop=True)
    
    file = file.dropna()
    
    candlestick_visuallize(file)
    
#     X = file.iloc[:,1:]
#     y = file.iloc[:,4]

#     model = ExtraTreesClassifier()
#     model.fit(X,y)
#     print(model.feature_importances_)
    
    corrmat = file.corr()
    top_corr_features = corrmat.index
    plt.figure(figsize=(10,10))
    #plot heat map
    g=sns.heatmap(file[top_corr_features].corr(),annot=True,cmap="RdYlGn")
    
    return file
