In [None]:
import pandas as pd
import numpy as np
import plotly as pio

from random_forest.utils import calculate_delta
from utils import split_dataset

tickernames = [col[:4] for col in pd.read_csv("./data/bigdata.csv", skiprows=3).columns if not col.startswith("Unnamed")]

labels = pd.read_csv("./data/jkse.csv", skiprows=5, index_col="Dates")
labels.index = pd.to_datetime(labels.index)

labels = labels.rename(columns={"PX_LAST": "JKSE_PRICE"})
labels['PCT_CHANGE_20_JKSE'] = labels['JKSE_PRICE'].pct_change(periods=20) * 100

covs = pd.read_csv("./data/bigdata.csv", index_col="Dates", skiprows=5)
covlist = split_dataset(covs)

for i in range(len(covlist)):
    covlist[i] = covlist[i].loc[:, ['PX_LAST']]

for cov in covlist:
    cov['PCT_CHANGE_20'] = cov.pct_change(periods=20) * 100

def calculate_delta_fix(labels: pd.DataFrame, covariates_list: list):
    """
    Calculates the delta from label, function is adapted to handle missing labels
    :param labels: label dataframe
    :param covariates_list: list of dataframes containing covariates
    :return: covariates list
    """

    # Remove duplicate indices if any
    aligned = []
    covars = []
    
    for cov in covariates_list:
        cov = cov[~cov.index.duplicated(keep='first')]
        cov.index = pd.to_datetime(cov.index, format="%m/%d/%Y")
        labels.index = pd.to_datetime(labels.index, format="%m/%d/%Y")

        # Copy the cov dataframe to avoid SettingWithCopyWarning
        cov_copy = cov.copy()

        # Ensure that the indices (dates) are aligned
        aligned_df = labels.join(cov_copy[['PCT_CHANGE_20']], how='inner')

        # Debugging step: check if aligned_df is empty
        cov_copy.loc[aligned_df.index, 'DELTA_20_CHANGE'] = aligned_df['PCT_CHANGE_20_JKSE'] - aligned_df['PCT_CHANGE_20']

        covars.append(cov)
        aligned.append(aligned_df)

    return covars, aligned 
    
covars, aligned = calculate_delta_fix(labels, covlist) 

In [1]:
import pandas as pd
from utils import split_dataset

labels = pd.read_csv("./data/jkse.csv", skiprows=5, index_col="Dates")
labels.index = pd.to_datetime(labels.index)

labels = labels.rename(columns={"PX_LAST": "JKSE_PRICE"})
labels['PCT_CHANGE_20_JKSE'] = labels['JKSE_PRICE'].pct_change(periods=20) * 100

covs = pd.read_csv("./data/bigdata.csv", index_col="Dates", skiprows=5)
covlist = split_dataset(covs)

for i in range(len(covlist)):
    covlist[i] = covlist[i].loc[:, ['PX_LAST']]

for cov in covlist:
    cov['PCT_CHANGE_20'] = cov.pct_change(periods=20) * 100

def calculate_delta_fix(labels: pd.DataFrame, covariates_list: list):
    """
    Calculates the delta from label, function is adapted to handle missing labels
    :param labels: label dataframe
    :param covariates_list: list of dataframes containing covariates
    :return: covariates list
    """

    # Remove duplicate indices if any
    aligned = []
    covars = []

    for cov in covariates_list:
        cov = cov[~cov.index.duplicated(keep='first')]
        cov.index = pd.to_datetime(cov.index, format="%m/%d/%Y")
        labels.index = pd.to_datetime(labels.index, format="%m/%d/%Y")

        # Copy the cov dataframe to avoid SettingWithCopyWarning
        cov_copy = cov.copy()

        # Ensure that the indices (dates) are aligned
        aligned_df = labels.join(cov_copy[['PCT_CHANGE_20']], how='inner')

        # Debugging step: check if aligned_df is empty
        if aligned_df.empty:
            print(f"aligned_df is empty. Inspecting indices:")
            print("Labels index range:", labels.index.min(), labels.index.max())
            print("Covariates index range:", cov_copy.index.min(), cov_copy.index.max())

        else:
            cov_copy.loc[aligned_df.index, 'DELTA_20_CHANGE'] = aligned_df['PCT_CHANGE_20_JKSE'] - aligned_df['PCT_CHANGE_20']

        covars.append(cov_copy)  # Append the modified cov_copy to covars
        aligned.append(aligned_df)

    return covars, aligned

covars, aligned = calculate_delta_fix(labels, covlist)

In [2]:
for cov in covlist:
    cov.drop(['PX_LAST', 'PCT_CHANGE_20'], axis=1, inplace=True)

covariates = pd.concat(covlist, axis=1)


In [None]:
covariates.dropna(axis=0, inplace=True)

In [3]:
covariates

8/14/2009
8/17/2009
8/18/2009
8/19/2009
8/20/2009
...
7/25/2024
7/26/2024
7/29/2024
7/30/2024
7/31/2024


In [None]:
import plotly.graph_objs as go
import plotly.io as pio

# Create traces for each ticker
traces = []
for ticker in ['ACES', 'ASII', 'ICBP', 'MDKA', 'SMGR', 'ADRO', 'BBCA', 'INCO', 'MEDC',
               'TLKM', 'AKRA', 'BBNI', 'INDF', 'PGAS', 'UNTR', 'AMRT', 'BBRI', 'INKP',
               'PGEO', 'UNVR', 'ANTM', 'BMRI', 'ITMG', 'ARTO', 'BRPT', 'KLBF', 'PTBA']:
    traces.append(go.Scatter(x=covariates.index, y=covariates[ticker], mode='lines', name=ticker))

# Define the layout
layout = go.Layout(
    title='Time Series Data for Selected Tickers',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Value'),
    showlegend=True
)

# Create the figure
fig = go.Figure(data=traces, layout=layout)

# Show the plot
pio.show(fig)

In [8]:
import pandas as pd

covariates = []
for i in range(11):
    cov = pd.read_csv(f"./covset0_hmm/train_{i}.csv")
    covariates.append(cov[['DELTA_20_CHANGE']])
    

In [9]:
covariates = pd.concat(covariates, axis=1)

In [11]:
covariates.columns = ['ACES', 'ASII', 'ICBP', 'MDKA', 'SMGR', 'ADRO', 'BBCA', 'INCO', 'MEDC',
                      'TLKM', 'AKRA', 'BBNI', 'INDF', 'PGAS', 'UNTR', 'AMRT', 'BBRI', 'INKP',
                      'PGEO', 'UNVR', 'ANTM', 'BMRI', 'ITMG', 'ARTO', 'BRPT', 'KLBF', 'PTBA']

ValueError: Length mismatch: Expected axis has 11 elements, new values have 27 elements

In [13]:
covariates

Unnamed: 0,DELTA_20_CHANGE,DELTA_20_CHANGE.1,DELTA_20_CHANGE.2,DELTA_20_CHANGE.3,DELTA_20_CHANGE.4,DELTA_20_CHANGE.5,DELTA_20_CHANGE.6,DELTA_20_CHANGE.7,DELTA_20_CHANGE.8,DELTA_20_CHANGE.9,DELTA_20_CHANGE.10
0,-3.306006,-4.723780,-5.129216,-9.964232,-7.387484,10.592973,-14.919527,0.099679,-23.093422,10.734400,-8.410515
1,-6.651001,-4.068039,-4.693503,-8.858349,-10.267766,15.608053,-15.322276,-0.219377,-12.935838,7.178586,-8.232309
2,-6.342782,-2.366836,-3.019762,-6.755963,-7.393897,15.049278,-11.872461,-0.392914,-3.233927,8.197825,-8.228518
3,-7.190950,-4.554604,-0.991545,-2.613665,-7.285428,16.828396,-11.555827,-0.243759,-20.954899,4.882219,-3.583814
4,-7.915734,-5.576967,-1.693815,1.561726,-9.099560,21.986689,-11.918246,-2.602311,-13.308471,-4.027786,-1.347573
...,...,...,...,...,...,...,...,...,...,...,...
247,-10.028288,-11.526863,3.555663,4.231391,39.886806,9.030581,15.248005,-1.134811,-8.081292,-3.386421,-12.963009
248,-8.110779,-13.662227,-0.005921,0.358815,42.225324,9.433569,17.808396,-4.717208,-8.121371,-1.799342,-11.230081
249,-4.539016,-12.605479,-5.446045,0.794662,39.716310,7.690441,14.642565,-8.152606,-8.171100,-0.540891,-9.911435
250,-6.684655,-9.612342,-1.845335,-0.884291,37.155396,9.618737,6.484205,-9.585989,-7.898574,-2.224045,-5.890939
