In [2]:
import pandas as pd
import numpy as np
import math
import statistics as st
from datetime import timedelta
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.metrics import silhouette_score
from collections import defaultdict
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN

In [3]:
df = pd.read_excel("../datasets/indian_stock_indices/Stock_Indices.xlsx")

In [4]:
df.head()

Unnamed: 0,Date,SENSEX,NIFTY50,NIFTY_CONSUMPTION
0,2014-01-01,21140.48,6323.8,2590.0
1,2014-01-02,20888.33,6301.25,2544.649902
2,2014-01-03,20851.33,6194.55,2546.949951
3,2014-01-06,20787.3,6220.85,2544.649902
4,2014-01-07,20693.24,6203.9,2543.399902


In [5]:
total_size = 2469

train_size = 1728 # Training data of 7 years (01-01-2014 to 31-12-2020)
test_size = 741 # Testing data of 3 years (01-01-2021 to 31-12-2023)

In [41]:
window_size = 30

sensex_graph_lists = []
nifty_graph_lists = []
niftyc_graph_lists = []

sensex_closing_prices = []
nifty_closing_prices = []
niftyc_closing_prices = []

i = 0
j = window_size

while j < total_size:
    k = i
    
    sensex_graph_list = []
    nifty_graph_list = []
    niftyc_graph_list = []

    while k <= j:
        sensex_graph_list.append(df['SENSEX'][k])
        nifty_graph_list.append(df['NIFTY50'][k])
        niftyc_graph_list.append(df['NIFTY_CONSUMPTION'][k])
        
        k += 1
    sensex_graph_lists.append(sensex_graph_list)
    nifty_graph_lists.append(nifty_graph_list)
    niftyc_graph_lists.append(niftyc_graph_list)
    
    sensex_closing_prices.append(df['SENSEX'][j])
    nifty_closing_prices.append(df['NIFTY50'][j])
    niftyc_closing_prices.append(df['NIFTY_CONSUMPTION'][j])
    
    i += 1
    j += 1

In [55]:
print(len(sensex_graph_lists[0]))

31


In [42]:
print(len(sensex_closing_prices))

2409


In [57]:
N = 5 # N-day returns

sensex_N_day_returns = []
nifty_N_day_returns = []
niftyc_N_day_returns = []

sensex_volatis = []
nifty_volatis = []
niftyc_volatis = []

for (sensex_graph_list, nifty_graph_list, niftyc_graph_list) in (zip(sensex_graph_lists, nifty_graph_lists, niftyc_graph_lists)):
    
    sensex_returns = []
    nifty_returns = []
    niftyc_returns = []
    
    sensex_vol = []
    nifty_vol = []
    niftyc_vol = []
    
    i = 0
    j = i + 1
    k = i + N    
    
    while k < window_size:
        sensex_ret = math.log(sensex_graph_list[k] / sensex_graph_list[i])
        nifty_ret = math.log(nifty_graph_list[k] / nifty_graph_list[i])
        niftyc_ret = math.log(niftyc_graph_list[k] / niftyc_graph_list[i])
        
        sensex_returns.append(sensex_ret)
        nifty_returns.append(nifty_ret)
        niftyc_returns.append(niftyc_ret)
        
        i += 1
        k += 1
    sensex_N_day_returns.append(sensex_returns)
    nifty_N_day_returns.append(nifty_returns)
    niftyc_N_day_returns.append(niftyc_returns)
    
    i = j - 1
    
    while j < window_size:
        sensex_returns = []
        nifty_returns = []
        niftyc_returns = []
        
        a = i
        b = j
        
        itr = 0
        while b < window_size and itr < N:
            r1 = math.log(sensex_graph_list[b] / sensex_graph_list[a])
            r2 = math.log(nifty_graph_list[b] / nifty_graph_list[a])
            r3 = math.log(niftyc_graph_list[b] / niftyc_graph_list[a])
            
            sensex_returns.append(r1)
            nifty_returns.append(r2)
            niftyc_returns.append(r3)
            
            itr += 1
            
            a += 1
            b += 1
        
        if itr == N:
            vol1 = st.stdev(sensex_returns) * math.sqrt(N)
            vol2 = st.stdev(nifty_returns) * math.sqrt(N)
            vol3 = st.stdev(niftyc_returns) * math.sqrt(N)
    
            sensex_vol.append(vol1)
            nifty_vol.append(vol2)
            niftyc_vol.append(vol3)
        
        j += 1
        i += 1
    sensex_volatis.append(sensex_vol)
    nifty_volatis.append(nifty_vol)
    niftyc_volatis.append(niftyc_vol) 

In [58]:
print(len(sensex_volatis[0]))

25


In [59]:
# print(len(sensex_volatis))
# print(len(sensex_N_day_returns))
# print(len(sensex_closing_prices))

In [60]:
total_size = len(sensex_volatis)

In [61]:
train_size = 0.75
test_size = 1 - train_size

In [62]:
train_sensex_volatis = []
train_nifty_volatis = []
train_niftyc_volatis = []

train_sensex_N_day_returns = []
train_nifty_N_day_returns = []
train_niftyc_N_day_returns = []

train_sensex_closing_prices = []
train_nifty_closing_prices = []
train_niftyc_closing_prices = []

In [63]:
test_sensex_volatis = []
test_nifty_volatis = []
test_niftyc_volatis = []

test_sensex_N_day_returns = []
test_nifty_N_day_returns = []
test_niftyc_N_day_returns = []

test_sensex_closing_prices = []
test_nifty_closing_prices = []
test_niftyc_closing_prices = []

In [64]:
for i in range(int(0.75 * total_size)):
    train_sensex_volatis.append(sensex_volatis[i])
    train_nifty_volatis.append(nifty_volatis[i])
    train_niftyc_volatis.append(niftyc_volatis[i])

    train_sensex_N_day_returns.append(sensex_N_day_returns[i])
    train_nifty_N_day_returns.append(nifty_N_day_returns[i])
    train_niftyc_N_day_returns.append(niftyc_N_day_returns[i])

    train_sensex_closing_prices.append(sensex_closing_prices[i])
    train_nifty_closing_prices.append(nifty_closing_prices[i])
    train_niftyc_closing_prices.append(niftyc_closing_prices[i])

In [65]:
for i in range(int(0.75 * total_size), total_size):
    test_sensex_volatis.append(sensex_volatis[i])
    test_nifty_volatis.append(nifty_volatis[i])
    test_niftyc_volatis.append(niftyc_volatis[i])

    test_sensex_N_day_returns.append(sensex_N_day_returns[i])
    test_nifty_N_day_returns.append(nifty_N_day_returns[i])
    test_niftyc_N_day_returns.append(niftyc_N_day_returns[i])

    test_sensex_closing_prices.append(sensex_closing_prices[i])
    test_nifty_closing_prices.append(nifty_closing_prices[i])
    test_niftyc_closing_prices.append(niftyc_closing_prices[i])

In [66]:
# print(len(train_sensex_volatis))
# print(len(test_sensex_volatis))
print(len(train_sensex_volatis[1]))
print(len(train_sensex_N_day_returns[1]))

25
25


In [59]:
train_pattern_graphs = []

for i in range(int(train_size * total_size)):
    train_sensex_volatis_avg = np.average(train_sensex_volatis[i])
    train_nifty_volatis_avg = np.average(train_nifty_volatis[i])
    train_niftyc_volatis_avg = np.average(train_niftyc_volatis[i])
    
    print("Done")
    shape = (64, 64)
    graph = np.zeros(shape)
    
    list_size = len(train_sensex_volatis[i])
    
    for j in range(list_size):
        if train_sensex_N_day_returns[i][j] >= 0 and train_sensex_volatis[i][j] >= train_sensex_volatis_avg:
            prev_sensex_node = 0
        if train_sensex_N_day_returns[i][j] >= 0 and train_sensex_volatis[i][j] < train_sensex_volatis_avg:
            prev_sensex_node = 1
        if train_sensex_N_day_returns[i][j] < 0 and train_sensex_volatis[i][j] >= train_sensex_volatis_avg:
            prev_sensex_node = 2
        if train_sensex_N_day_returns[i][j] < 0 and train_sensex_volatis[i][j] < train_sensex_volatis_avg:
            prev_sensex_node = 3
            
        if train_nifty_N_day_returns[i][j] >= 0 and train_nifty_volatis[i][j] >= train_nifty_volatis_avg:
            prev_nifty_node = 0
        if train_nifty_N_day_returns[i][j] >= 0 and train_nifty_volatis[i][j] < train_nifty_volatis_avg:
            prev_nifty_node = 1
        if train_nifty_N_day_returns[i][j] < 0 and train_nifty_volatis[i][j] >= train_nifty_volatis_avg:
            prev_nifty_node = 2
        if train_nifty_N_day_returns[i][j] < 0 and train_nifty_volatis[i][j] < train_nifty_volatis_avg:
            prev_nifty_node = 3
        
        if train_niftyc_N_day_returns[i][j] >= 0 and train_niftyc_volatis[i][j] >= train_niftyc_volatis_avg:
            prev_niftyc_node = 0
        if train_niftyc_N_day_returns[i][j] >= 0 and train_niftyc_volatis[i][j] < train_niftyc_volatis_avg:
            prev_niftyc_node = 1
        if train_niftyc_N_day_returns[i][j] < 0 and train_niftyc_volatis[i][j] >= train_niftyc_volatis_avg:
            prev_niftyc_node = 2
        if train_niftyc_N_day_returns[i][j] < 0 and train_niftyc_volatis[i][j] < train_niftyc_volatis_avg:
            prev_niftyc_node = 3
            
        if j + 1 < list_size:
            if train_sensex_N_day_returns[i][j + 1] >= 0 and train_sensex_volatis[i][j + 1] >= train_sensex_volatis_avg:
                next_sensex_node = 0
            if train_sensex_N_day_returns[i][j + 1] >= 0 and train_sensex_volatis[i][j + 1] < train_sensex_volatis_avg:
                next_sensex_node = 1
            if train_sensex_N_day_returns[i][j + 1] < 0 and train_sensex_volatis[i][j + 1] >= train_sensex_volatis_avg:
                next_sensex_node = 2
            if train_sensex_N_day_returns[i][j + 1] < 0 and train_sensex_volatis[i][j + 1] < train_sensex_volatis_avg:
                next_sensex_node = 3

            if train_nifty_N_day_returns[i][j + 1] >= 0 and train_nifty_volatis[i][j + 1] >= train_nifty_volatis_avg:
                next_nifty_node = 0
            if train_nifty_N_day_returns[i][j + 1] >= 0 and train_nifty_volatis[i][j + 1] < train_nifty_volatis_avg:
                next_nifty_node = 1
            if train_nifty_N_day_returns[i][j + 1] < 0 and train_nifty_volatis[i][j + 1] >= train_nifty_volatis_avg:
                next_nifty_node = 2
            if train_nifty_N_day_returns[i][j + 1] < 0 and train_nifty_volatis[i][j + 1] < train_nifty_volatis_avg:
                next_nifty_node = 3

            if train_niftyc_N_day_returns[i][j + 1] >= 0 and train_niftyc_volatis[i][j + 1] >= train_niftyc_volatis_avg:
                next_niftyc_node = 0
            if train_niftyc_N_day_returns[i][j + 1] >= 0 and train_niftyc_volatis[i][j + 1] < train_niftyc_volatis_avg:
                next_niftyc_node = 1
            if train_niftyc_N_day_returns[i][j + 1] < 0 and train_niftyc_volatis[i][j + 1] >= train_niftyc_volatis_avg:
                next_niftyc_node = 2
            if train_niftyc_N_day_returns[i][j + 1] < 0 and train_niftyc_volatis[i][j + 1] < train_niftyc_volatis_avg:
                next_niftyc_node = 3

        prev_node = prev_sensex_node * (4 ** 0) + prev_nifty_node * (4 ** 1) + prev_niftyc_node * (4 ** 2)
        next_node = next_sensex_node * (4 ** 0) + next_nifty_node * (4 ** 1) + next_niftyc_node * (4 ** 2)
        
        graph[prev_node][next_node] += 1
    train_pattern_graphs.append(graph)

Done


IndexError: list index out of range

In [44]:
print(len(train_pattern_graphs))

0
