In [39]:
import pandas as pd
import numpy as np
import ta

# Import KMeans from sklearn
from sklearn.cluster import KMeans
from sklearn.linear_model import LinearRegression

import yfinance as yf
from pandas_datareader import data as pdr
yf.pdr_override()

In [2]:
# Get data
data = yf.download('^NSEBANK', start="2021-05-01", end="2024-05-01")

[*********************100%%**********************]  1 of 1 completed


In [3]:
print(data.head())

                    Open          High           Low         Close  \
Date                                                                 
2021-05-03  32368.550781  32566.599609  31906.550781  32465.750000   
2021-05-04  32610.400391  33011.550781  32191.400391  32270.349609   
2021-05-05  32577.099609  32862.148438  32068.550781  32783.699219   
2021-05-06  32900.750000  32956.351562  32564.000000  32827.800781   
2021-05-07  33041.648438  33259.148438  32771.351562  32904.500000   

               Adj Close  Volume  
Date                              
2021-05-03  32465.373047  351000  
2021-05-04  32269.974609  786800  
2021-05-05  32783.316406       0  
2021-05-06  32827.417969       0  
2021-05-07  32904.117188       0  


In [4]:
def calculate_psy(closing_prices, period=13):
    """
    Calculate the 13-day rolling PSY (Psychological Line) for a list of closing prices.
    
    Parameters:
    closing_prices (list or pd.Series): List or Series of closing prices.
    period (int): Number of periods to calculate the PSY, default is 13.
    
    Returns:
    pd.Series: Series of PSY values.
    """
    # Convert closing_prices to a pandas Series if it's not already
    if not isinstance(closing_prices, pd.Series):
        closing_prices = pd.Series(closing_prices)
    
    # Calculate the number of rising periods in each rolling window
    rising_periods = (closing_prices.diff() > 0).astype(int).rolling(window=period).sum()
    
    # Calculate the PSY
    psy = (rising_periods / period) * 100
    
    return psy

In [5]:
data['MA']= ta.trend.sma_indicator(data['Close'], window=6, fillna=True)
data['BIAS'] = (data['Close'] - data['MA']) 
data['RSI'] = ta.momentum.rsi(data['Close'], window=6, fillna=True)
data['stochrsi_k'] = ta.momentum.stochrsi_k(data['Close'], window=9, fillna=True)
data['stochrsi_d'] = ta.momentum.stochrsi_d(data['Close'], window=9, fillna=True)
data['MACD'] = ta.trend.macd(data['Close'], window_slow=20, window_fast=9, fillna=True)
data['PSY'] = calculate_psy(data['Close'])
data.drop(columns=['Open', 'High', 'Low', 'Adj Close'], inplace=True)

In [58]:
print(data)

                   Close  Volume            MA         BIAS         RSI  \
Date                                                                      
2021-05-03  32465.750000  351000  32465.750000     0.000000  100.000000   
2021-05-04  32270.349609  786800  32368.049805   -97.700195    0.000000   
2021-05-05  32783.699219       0  32506.599609   277.099609   75.918709   
2021-05-06  32827.800781       0  32586.899902   240.900879   77.666642   
2021-05-07  32904.500000       0  32650.419922   254.080078   80.604697   
...                  ...     ...           ...          ...         ...   
2024-04-24  48189.000000  112200  47702.124349   486.875651   59.841015   
2024-04-25  48494.949219  337400  47870.482422   624.466797   66.589393   
2024-04-26  48201.050781  208100  48059.082682   141.968099   55.783688   
2024-04-29  49424.050781  327100  48367.399740  1056.651042   75.575465   
2024-04-30  49396.750000  402900  48612.708333   784.041667   74.680025   

            stochrsi_k  

In [6]:
print(data.columns)

Index(['Close', 'Volume', 'MA', 'BIAS', 'RSI', 'stochrsi_k', 'stochrsi_d',
       'MACD', 'PSY'],
      dtype='object')


In [7]:
print(data.index)

DatetimeIndex(['2021-05-03', '2021-05-04', '2021-05-05', '2021-05-06',
               '2021-05-07', '2021-05-10', '2021-05-11', '2021-05-12',
               '2021-05-14', '2021-05-17',
               ...
               '2024-04-16', '2024-04-18', '2024-04-19', '2024-04-22',
               '2024-04-23', '2024-04-24', '2024-04-25', '2024-04-26',
               '2024-04-29', '2024-04-30'],
              dtype='datetime64[ns]', name='Date', length=738, freq=None)


In [15]:
# Fill missing values (zeros and nas and missing dates)
data.replace([0, 0.0], np.nan, inplace=True)
data.interpolate(method='time', inplace=True)
data.dropna(inplace=True)

In [16]:
print(data.shape, data)

(726, 9)                    Close         Volume            MA         BIAS        RSI  \
Date                                                                            
2021-05-20  33334.550781  614993.103448  33170.541667   164.009115  56.176140   
2021-05-21  34606.898438  604255.172414  33529.641276  1077.257161  74.063461   
2021-05-24  34943.601562  572041.379310  33991.983073   951.618490  77.039500   
2021-05-25  34662.000000  561303.448276  34192.441406   469.558594  69.083905   
2021-05-26  34684.199219  550565.517241  34319.408203   364.791016  69.382998   
...                  ...            ...           ...          ...        ...   
2024-04-24  48189.000000  112200.000000  47702.124349   486.875651  59.841015   
2024-04-25  48494.949219  337400.000000  47870.482422   624.466797  66.589393   
2024-04-26  48201.050781  208100.000000  48059.082682   141.968099  55.783688   
2024-04-29  49424.050781  327100.000000  48367.399740  1056.651042  75.575465   
2024-04-30  49396.7

In [17]:
print(data.shape, data)

(726, 9)                    Close         Volume            MA         BIAS        RSI  \
Date                                                                            
2021-05-20  33334.550781  614993.103448  33170.541667   164.009115  56.176140   
2021-05-21  34606.898438  604255.172414  33529.641276  1077.257161  74.063461   
2021-05-24  34943.601562  572041.379310  33991.983073   951.618490  77.039500   
2021-05-25  34662.000000  561303.448276  34192.441406   469.558594  69.083905   
2021-05-26  34684.199219  550565.517241  34319.408203   364.791016  69.382998   
...                  ...            ...           ...          ...        ...   
2024-04-24  48189.000000  112200.000000  47702.124349   486.875651  59.841015   
2024-04-25  48494.949219  337400.000000  47870.482422   624.466797  66.589393   
2024-04-26  48201.050781  208100.000000  48059.082682   141.968099  55.783688   
2024-04-29  49424.050781  327100.000000  48367.399740  1056.651042  75.575465   
2024-04-30  49396.7

In [18]:
print(data.head(15))

                   Close         Volume            MA         BIAS        RSI  \
Date                                                                            
2021-05-20  33334.550781  614993.103448  33170.541667   164.009115  56.176140   
2021-05-21  34606.898438  604255.172414  33529.641276  1077.257161  74.063461   
2021-05-24  34943.601562  572041.379310  33991.983073   951.618490  77.039500   
2021-05-25  34662.000000  561303.448276  34192.441406   469.558594  69.083905   
2021-05-26  34684.199219  550565.517241  34319.408203   364.791016  69.382998   
2021-05-27  35095.050781  539827.586207  34554.383464   540.667318  74.797856   
2021-05-28  35141.449219  529089.655172  34855.533203   285.916016  75.387752   
2021-05-31  35526.648438  496875.862069  35008.824870   517.823568  80.041727   
2021-06-01  35337.199219  486137.931034  35074.424479   262.774740  72.005917   
2021-06-02  35373.750000  475400.000000  35193.049479   180.700521  72.641813   
2021-06-03  35649.000000  46

In [19]:
data.to_csv('data.csv')

In [90]:
data = pd.read_csv('data.csv', index_col='Date', parse_dates=True)
data['Next Close'] = data['Close'].shift(-1)
data.dropna(inplace=True)

In [91]:
# SPlit the data into training, testing and validation sets
train = data.loc[:'2023-12-30']
test = data.loc['2024-01-01':'2024-03-01']
validation = data.loc['2024-03-01':]

In [92]:
print(train.shape, test.shape, validation.shape)

(648, 10) (41, 10) (37, 10)


In [93]:
# Write a function for performing K Means clustering on a given data
def k_means_clustering(data, n_clusters=3):
    """
    Perform K Means clustering on a given dataset.
    
    Parameters:
    data (pd.DataFrame): Dataset to be clustered.
    n_clusters (int): Number of clusters to form.
    
    Returns:
    pd.DataFrame: Dataset with an additional column for the cluster labels.
    """
    data_copy = data.copy()
    
    # Initialize the KMeans model
    kmeans = KMeans(n_clusters=n_clusters)
    
    # Fit the model to the data
    data_copy['Cluster'] = kmeans.fit_predict(data)
    
    return data_copy, kmeans

In [94]:
print(data.columns)

Index(['Close', 'Volume', 'MA', 'BIAS', 'RSI', 'stochrsi_k', 'stochrsi_d',
       'MACD', 'PSY', 'Next Close'],
      dtype='object')


In [95]:
# Write a function for the fuzzy-TSK model
def fuzzy_tsk_train(data):

    # Perform
    n_clusters = 9
    clustered_data, kmeans = k_means_clustering(data, n_clusters=n_clusters)

    models = [None]*n_clusters
    # For each cluster, perform linear regression
    for cluster in range(9):
        # Get the data for the current cluster
        cluster_data = clustered_data[clustered_data['Cluster'] == cluster]

        # Perform linear regression
        Y = cluster_data['Next Close']
        X = cluster_data.drop(columns=['Next Close','Close', 'Cluster'])
        model = LinearRegression().fit(X, Y)
        # model = np.polyfit(X, Y, 1)

        # Add the model to the dictionary
        models[cluster] = model
    return models, kmeans

In [96]:
# Write a function to test the fuzzy-TSK model
def fuzzy_tsk_test(data, models, kmeans):
    # Perform clustering on the test data
    test_data = data.copy()
    test_data['Cluster'] = kmeans.predict(data)

    # Initialize an empty list to store the predictions
    predictions = []

    # For each row in the test data, make a prediction based on the cluster
    for index, row in test_data.iterrows():
        # Get the model for the current cluster
        model = models[int(row['Cluster'])].predict(row.drop(['Next Close','Close', 'Cluster']).values.reshape(1, -1))

        # Make a prediction based on the model
        prediction = model[0] 

        # Add the prediction to the list
        predictions.append(prediction)

    # Add the predictions to the test data
    test_data['Prediction'] = predictions

    return test_data

In [97]:
# Write a function to evaluate the fuzzy-TSK model
def evaluate_fuzzy_tsk(test_data):
    # Calculate the mean squared error
    mse = ((test_data['Next Close'] - test_data['Prediction']) ** 2).mean()
    return mse

In [98]:
print(data.columns)

Index(['Close', 'Volume', 'MA', 'BIAS', 'RSI', 'stochrsi_k', 'stochrsi_d',
       'MACD', 'PSY', 'Next Close'],
      dtype='object')


In [99]:
# Train the fuzzy-TSK model
models, kmeans = fuzzy_tsk_train(train)

# Test the fuzzy-TSK model
test_data = fuzzy_tsk_test(test, models, kmeans)

# Evaluate the fuzzy-TSK model
mse = evaluate_fuzzy_tsk(test_data)

print(f'Mean Squared Error: {mse}')

Mean Squared Error: 337125.37406247365




In [101]:
test_data.to_csv('test_data.csv')