In [2]:
import numpy as np
import pandas as pd
from xgboost import XGBClassifier, XGBRegressor
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras import Model
from catboost import Pool, cv, CatBoostClassifier, CatBoostRegressor
import os
from datetime import timedelta

In [3]:
def classifier_pipeline(train, test, y, types, name):
    train = train.drop(['Unnamed: 0', 'ticker', 'date', 'time'], axis=1)
    test = test.drop(['Unnamed: 0', 'ticker', 'date', 'time'], axis=1)

    train = train.dropna()
    test = test.dropna()

    X = train.drop(types, axis=1)
    Y = train[y]
    Xt = test.drop(types, axis=1)
    Yt = test[y]

    mapping = {-1: 0, 0: 1, 1: 2}
    Y = np.array([mapping[label] for label in Y])
    Yt = np.array([mapping[label] for label in Yt])

    model = XGBClassifier(n_jobs=-1, tree_method='hist',
                          objective='reg:squarederror')
    model.fit(X, Y)

    Yt_pred = model.predict(Xt)
    Yt_pred = np.array([0 if label < 1 else 1 if label <
                       2 else 2 for label in Yt_pred])
    
    report = classification_report(Yt, Yt_pred, target_names=['-1', '0', '1'])
    path = f'./final_new_data/results/XGB Classifier/report/classification_report_{name}.txt'
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with open(path, 'a') as f:
        f.write(f'Parameter {y} Report:\n')
        f.write(str(report))
        f.write('\n\n')
    pd.DataFrame(Yt_pred).to_csv("./prediction.csv")
    # Yt_pred.to_csv("./prediction.csv")
    return classification_report(Yt, Yt_pred, target_names=['-1', '0', '1'], output_dict=True)['accuracy'] #type: ignore


In [4]:

paths = ["./output.csv"]

types = ['y5(3)', 'y5(4)', 'y10(3)', 'y10(4)', 'y15(3)', 'y15(4)', 'y20(3)', 'y20(4)', 'y25(3)', 'y25(4)', 'y30(3)', 'y30(4)']

matrix = []

s = ['BANKNIFTY23FEBFUT', 'NIFTY23FEBFUT'] 

for path in paths:
    for j in types:
        print(f'Interval type: {j}')
        df = pd.read_csv(path)
        df['date'] = pd.to_datetime(df['date'])
        test_date = df['date'].max() - timedelta(days=3) if path.split("/")[-1].split(".")[0] in s else df['date'].max() - timedelta(days=2)
        train = df[df['date'] < test_date]
        test = df[df['date'] >= test_date]
        matrix.append(classifier_pipeline(train, test, j, types, path.split("/")[-1].split(".")[0]))
    x = pd.DataFrame(matrix)
    path = f'./final_new_data/results/XGB Classifier/accuracy martices/{path.split("/")[-1].split(".")[0]}_xgb.csv'
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with open(path, 'w') as f:
        x.to_csv(f, index=False)
    # path = f'./final_new_data/results/XGB Classifier/report/classification_report_{path.split("/")[-1].split(".")[0]}_xgb.csv'
    # os.makedirs(os.path.dirname(path), exist_ok=True)
    # with open(path, 'a') as f:
    #     f.write(f'Parameter {y} Report:\n')
    #     f.write(str(report))


Interval type: y5(3)


  df['date'] = pd.to_datetime(df['date'])


Interval type: y5(4)


  df['date'] = pd.to_datetime(df['date'])


Interval type: y10(3)


  df['date'] = pd.to_datetime(df['date'])


Interval type: y10(4)


  df['date'] = pd.to_datetime(df['date'])


Interval type: y15(3)


  df['date'] = pd.to_datetime(df['date'])


Interval type: y15(4)


  df['date'] = pd.to_datetime(df['date'])


Interval type: y20(3)


  df['date'] = pd.to_datetime(df['date'])


Interval type: y20(4)


  df['date'] = pd.to_datetime(df['date'])


Interval type: y25(3)


  df['date'] = pd.to_datetime(df['date'])


Interval type: y25(4)


  df['date'] = pd.to_datetime(df['date'])


Interval type: y30(3)


  df['date'] = pd.to_datetime(df['date'])


Interval type: y30(4)


  df['date'] = pd.to_datetime(df['date'])


In [5]:
def catboost_pipeline(train, test, y, types, name):
    train = train.drop(['Unnamed: 0', 'ticker', 'date', 'time'], axis=1)
    test = test.drop(['Unnamed: 0', 'ticker', 'date', 'time'], axis=1)

    train = train.dropna()
    test = test.dropna()
    
    X = train.drop(types, axis=1)
    Y = train[y]
    Xt = test.drop(types, axis=1)
    Yt = test[y]
    
    mapping = {-1: 0, 0: 1, 1: 2}
    Y = np.array([mapping[label] for label in Y])
    Yt = np.array([mapping[label] for label in Yt])
    
    train_pool = Pool(data=X,label =Y)

    model = CatBoostClassifier()
    model.fit(train_pool, plot=True, silent=True)
    
    Yt_pred = model.predict(Xt)
    Yt_pred = np.array([0 if label < 1 else 1 if label <
                       2 else 2 for label in Yt_pred])
    report = classification_report(Yt, Yt_pred, target_names=['-1', '0', '1'])
    path = f'./final_new_data/results/Catboost Classifier/report/classification_report_{name}.txt'
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with open(path, 'a') as f:
        f.write(f'Parameter {y} Report:\n')
        f.write(str(report))
        f.write('\n\n')
    pd.DataFrame(Yt_pred).to_csv("./prediction.csv")
    # Yt_pred.to_csv("./prediction1.csv")
    return classification_report(Yt, Yt_pred, target_names=['-1', '0', '1'], output_dict=True)['accuracy'] #type: ignore


In [6]:

matrix = []



for path in paths:
    for j in types:
        print(f'Interval type: {j}')
        df = pd.read_csv(path)
        df['date'] = pd.to_datetime(df['date'])
        test_date = df['date'].max() - timedelta(days=3) if path.split(
            "/")[-1].split(".")[0] in s else df['date'].max() - timedelta(days=2)
        train = df[df['date'] < test_date]
        test = df[df['date'] >= test_date]
        matrix.append(catboost_pipeline(train, test, j, types, path.split("/")[-1].split(".")[0]))
    x = pd.DataFrame(matrix)
    path = f'./final_new_data/results/Catboost Classifier/accuracy martices/{path.split("/")[-1].split(".")[0]}_catb.csv'
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with open(path, 'w') as f:
        x.to_csv(f, index=False)


Interval type: y5(3)


  df['date'] = pd.to_datetime(df['date'])


MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Interval type: y5(4)


  df['date'] = pd.to_datetime(df['date'])


MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Interval type: y10(3)


  df['date'] = pd.to_datetime(df['date'])


MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Interval type: y10(4)


  df['date'] = pd.to_datetime(df['date'])


MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Interval type: y15(3)


  df['date'] = pd.to_datetime(df['date'])


MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Interval type: y15(4)


  df['date'] = pd.to_datetime(df['date'])


MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Interval type: y20(3)


  df['date'] = pd.to_datetime(df['date'])


MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Interval type: y20(4)


  df['date'] = pd.to_datetime(df['date'])


MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Interval type: y25(3)


  df['date'] = pd.to_datetime(df['date'])


MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Interval type: y25(4)


  df['date'] = pd.to_datetime(df['date'])


MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Interval type: y30(3)


  df['date'] = pd.to_datetime(df['date'])


MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Interval type: y30(4)


  df['date'] = pd.to_datetime(df['date'])


MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))