## Setup

In [1]:
import os
pwd = os.path.abspath('.')
os.chdir(os.path.join(pwd, '../src/'))

In [2]:
from collections import Counter
import pandas as pd
import seaborn as sns
from core.environment import EnvironmentSettings
from string import Template
from core.column_definition import BaseColumns, CalculatedColumns

import datetime

In [3]:
%load_ext autoreload
%autoreload 2

## Setup

#### Historical Data Processor 

In [4]:
from datareader.data_reader import BhavCopyReader
reader = BhavCopyReader()

from datareader.data_reader import MultiDatesDataReader, DateRangeDataReader, NseDerivatiesOldReader

multidates_reader = MultiDatesDataReader(reader)
daterange_reader = DateRangeDataReader(reader)

from core.core import MarketDaysHelper, TypeHelper

In [5]:
yest = datetime.date.today() + datetime.timedelta(days=-1)

In [6]:
from dataprocess.data_processor import HistoricalDataProcessor, MultiDataCalculationPipelines, CalculationPipelineBuilder, HistoricalDataProcessOptions
options = HistoricalDataProcessOptions()
options.include_monthly_data = False
options.include_annual_data = False
histDataProcessor = HistoricalDataProcessor(options)

In [7]:
year_start = datetime.date(2016, 1, 1)
to_date = yest
result = histDataProcessor.process(reader, {'from_date': year_start, 'to_date': to_date})

Started to read data
Reading data from 2023-08-10 to 2023-08-11
Downloading data for 2023-08-11 00:00:00
https://archives.nseindia.com/content/historical/EQUITIES/2023/AUG/cm11AUG2023bhav.csv.zip
Saving data to file: ../../_data/processed/historical/Equities.csv
get_data took 45 seconds
get_manual_data took 0 seconds
Started basic calculation
add_basic_calc took 3 seconds
process took 49 seconds


In [None]:
pipelines = MultiDataCalculationPipelines()
#pipelines.set_item('rsi', CalculationPipelineBuilder.create_rsi_calculation_pipeline(crossing_above_flag_value = 75, crossing_below_flag_value = 30))
pipelines.set_item('foward_looking_fall', CalculationPipelineBuilder.create_forward_looking_price_fall_pipeline(range(1, 11)))
pipelines.set_item('foward_looking_rise', CalculationPipelineBuilder.create_forward_looking_price_rise_pipeline(range(1, 11)))
pipelines.set_item('sma', CalculationPipelineBuilder.create_sma_calculation_pipeline())

histDataProcessor.set_calculation_pipelines(pipelines)

In [8]:
histDataProcessor.run_calculation_pipelines()

LowestPriceInNextNDaysCalculationWorker took 2 seconds
LowestPriceInNextNDaysCalculationWorker took 2 seconds
LowestPriceInNextNDaysCalculationWorker took 2 seconds
LowestPriceInNextNDaysCalculationWorker took 2 seconds
LowestPriceInNextNDaysCalculationWorker took 2 seconds
LowestPriceInNextNDaysCalculationWorker took 2 seconds
LowestPriceInNextNDaysCalculationWorker took 2 seconds
LowestPriceInNextNDaysCalculationWorker took 2 seconds
LowestPriceInNextNDaysCalculationWorker took 2 seconds
LowestPriceInNextNDaysCalculationWorker took 2 seconds
HighestPriceInNextNDaysCalculationWorker took 2 seconds
HighestPriceInNextNDaysCalculationWorker took 2 seconds
HighestPriceInNextNDaysCalculationWorker took 2 seconds
HighestPriceInNextNDaysCalculationWorker took 2 seconds
HighestPriceInNextNDaysCalculationWorker took 2 seconds
HighestPriceInNextNDaysCalculationWorker took 2 seconds
HighestPriceInNextNDaysCalculationWorker took 2 seconds
HighestPriceInNextNDaysCalculationWorker took 2 seconds
Hi

## Analysis

#### Expiry Analysis

In [9]:
from datareader.data_reader import NseDerivatiesReader, NseDerivatiesOldReader

In [10]:
data = multidates_reader.read(MarketDaysHelper.get_monthly_expiry_dates(6))
data['Date'].unique()

array(['2023-07-27T00:00:00.000000000', '2023-06-28T00:00:00.000000000',
       '2023-05-25T00:00:00.000000000', '2023-04-27T00:00:00.000000000',
       '2023-03-29T00:00:00.000000000'], dtype='datetime64[ns]')

In [11]:
data['CloseToPrevCloseChangePerc'] = (data['Close'] / data['PreviousClose'] - 1) * 100
data['OpenToPrevCloseChangePerc'] = (data['Open'] / data['PreviousClose'] - 1) * 100

display_columns = ['Identifier', 'PreviousClose', 'Open', 'High', 'Low', 'Close', 'Date', 
                    'CloseToPrevCloseChangePerc', 'OpenToPrevCloseChangePerc'
                    ]

In [None]:
data.reindex(data['CloseToPrevCloseChangePerc'].abs().sort_values(ascending=False).index)[data['Identifier'].isin(derivative_instruments)][display_columns].head(20)
#data['Date'].dt.strftime('%d-%b-%Y').str.upper()

In [None]:
quantile = data.sort_values(by='OpenToPrevCloseChangePerc', ascending=True)['OpenToPrevCloseChangePerc'].quantile(q=0.99)
data[data['OpenToPrevCloseChangePerc'] >= quantile][display_columns]

#### RSI Crossing Above/Below

##### Calculate RSI

In [12]:
def get_symbols_with_rsi_signal(from_date: datetime.datetime, to_date: datetime.datetime, cross_above_level: float, 
    cross_below_level: float, output_sub_folder: str, derivative_instruments):
  result = histDataProcessor.process(reader, {'from_date': from_date, 'to_date': to_date})
  histDataProcessor.run_calculation_pipelines()
  daily_data = result.get_daily_data()
  identifier_grouped_data = result.get_identifier_grouped()
  data = daily_data

  data[CalculatedColumns.RsiCrossedAbove] = identifier_grouped_data[CalculatedColumns.RelativeStrengthIndex].transform(lambda x: 
    (x.shift(-1) < cross_above_level) & (x >= cross_above_level)
  )

  data[CalculatedColumns.RsiCrossedBelow] = identifier_grouped_data[CalculatedColumns.RelativeStrengthIndex].transform(lambda x: 
    (x.shift(-1) > cross_below_level) & (x <= cross_below_level)
  )

  data[(data[CalculatedColumns.RsiCrossedAbove] == True) & (data[BaseColumns.Identifier].isin(derivative_instruments))].to_csv(f"{output_sub_folder}/sell_signal_symbols.csv")
  data[(data[CalculatedColumns.RsiCrossedBelow] == True) & (data[BaseColumns.Identifier].isin(derivative_instruments))].to_csv(f"{output_sub_folder}/buy_signal_symbols.csv")


In [None]:
from core.core import MarketDaysHelper
cross_above_level = 75
cross_below_level = 30
output_base_path = '../output/trades/rsi'
for year in range(2016, 2023):
  print(f'Executing for {year}')
  year_start = datetime.date(year, 1, 1)
  to_date = datetime.date(year, 12, 31)

  for_date = MarketDaysHelper.get_this_or_next_market_day(year_start)
  derivative_instruments = NseDerivatiesOldReader().read(for_date)['Identifier'].unique()

  get_symbols_with_rsi_signal(year_start, to_date, cross_above_level, cross_below_level, output_base_path + '/' + year_start.strftime('%Y'), derivative_instruments)

##### Check best holding timeframe based on RSI

In [10]:
  derivative_instruments = NseDerivatiesOldReader().read(datetime.datetime(2023, 8, 9))['Identifier'].unique()

In [11]:
from calculations.base import CalculationWorker
from core.core import Instrumentation

In [12]:
daily_data = result.get_daily_data()
derivative_instruments_daily_data = daily_data[daily_data[BaseColumns.Identifier].isin(derivative_instruments)]

In [13]:
fwd_looking_price_fall_cols = [x for x in daily_data.columns if 'HighestPercFallInNext' in x]
derivative_instruments_daily_data[derivative_instruments_daily_data[CalculatedColumns.RsiCrossedAbove] == True][fwd_looking_price_fall_cols].median()

HighestPercFallInNext1Days     2.183020
HighestPercFallInNext2Days     2.968995
HighestPercFallInNext3Days     3.401319
HighestPercFallInNext4Days     3.770183
HighestPercFallInNext5Days     4.055996
HighestPercFallInNext6Days     4.316389
HighestPercFallInNext7Days     4.565894
HighestPercFallInNext8Days     4.840434
HighestPercFallInNext9Days     5.051904
HighestPercFallInNext10Days    5.262850
dtype: float64

In [14]:
fwd_looking_price_rise_cols = [x for x in daily_data.columns if 'HighestPercRiseInNext' in x]
derivative_instruments_daily_data[derivative_instruments_daily_data[CalculatedColumns.RsiCrossedBelow] == True][fwd_looking_price_rise_cols].median()
#fwd_looking_price_rise_cols

HighestPercRiseInNext1Days     2.550922
HighestPercRiseInNext2Days     3.464203
HighestPercRiseInNext3Days     4.001278
HighestPercRiseInNext4Days     4.511912
HighestPercRiseInNext5Days     4.857013
HighestPercRiseInNext6Days     5.197879
HighestPercRiseInNext7Days     5.529805
HighestPercRiseInNext8Days     5.814108
HighestPercRiseInNext9Days     6.094214
HighestPercRiseInNext10Days    6.328030
dtype: float64

### Debug

##### RSI

In [53]:
result = histDataProcessor.process(reader, {'from_date': datetime.date(2023, 7, 1), 'to_date': datetime.date.today()})
#identifier_grouped_data = result.get_identifier_grouped()
data = result.get_daily_data()

Started to read data
Reading data from 2023-08-09 to 2023-08-10
Downloading data for 2023-08-10 00:00:00
https://archives.nseindia.com/content/historical/EQUITIES/2023/AUG/cm10AUG2023bhav.csv.zip
The read operation timed out date(2023, 08, 10),
Saving data to file: ../../_data/processed/historical/Equities.csv
get_data took 46 seconds
get_manual_data took 0 seconds
Started basic calculation
add_basic_calc took 1 seconds
Started periodic calculation for Month
add_monthly_growth_calc took 3 seconds
Started periodic calculation for Year
add_yearly_growth_calc took 1 seconds
process took 51 seconds


In [58]:
_time_window = 14

def calculate_wsm_average(raw_data, data, avg_col_name, abs_col_name):
  step = 1
  for i, row in enumerate(data[avg_col_name].iloc[_time_window + step:]):
    raw_data.at[data.index[i + _time_window + step], avg_col_name] =\
      (data[avg_col_name].iloc[i + _time_window] *
      (_time_window - step) +
      data[abs_col_name].iloc[i + _time_window + step])\
      / _time_window

In [62]:
identifier_grouped_data = data.groupby(BaseColumns.Identifier)
data[CalculatedColumns.ClosePriceDiff] = identifier_grouped_data[BaseColumns.Close].transform(lambda x: x.diff(1))
data[CalculatedColumns.Gain] = identifier_grouped_data[CalculatedColumns.ClosePriceDiff].transform(lambda x: x.clip(lower=0).round(2))
data[CalculatedColumns.Loss] = identifier_grouped_data[CalculatedColumns.ClosePriceDiff].transform(lambda x: x.clip(upper=0).abs().round(2))

data[CalculatedColumns.ClosePriceDiff].fillna(0)
data[CalculatedColumns.Gain].fillna(0)
data[CalculatedColumns.Loss].fillna(0)

# Get initial Averages
data[CalculatedColumns.AvgGain] = identifier_grouped_data[CalculatedColumns.Gain].transform(lambda x: 
                                    x.rolling(window=_time_window, min_periods=_time_window).mean())
data[CalculatedColumns.AvgLoss] = identifier_grouped_data[CalculatedColumns.Loss].transform(lambda x: 
                                    x.rolling(window=_time_window, min_periods=_time_window).mean())

for identifier in data['Identifier'].unique():
  calculate_wsm_average(data, identifier_grouped_data.get_group(identifier), CalculatedColumns.AvgGain, CalculatedColumns.Gain) 
  calculate_wsm_average(data, identifier_grouped_data.get_group(identifier), CalculatedColumns.AvgLoss, CalculatedColumns.Loss)

data[CalculatedColumns.RelativeStrength] = data[CalculatedColumns.AvgGain] / data[CalculatedColumns.AvgLoss]
data[CalculatedColumns.RelativeStrengthIndex] = 100 - (100 / (1.0 + data[CalculatedColumns.RelativeStrength]))

In [64]:
data[data[BaseColumns.Identifier] == 'TVSMOTOR'].tail(5)[[BaseColumns.Date, CalculatedColumns.RelativeStrengthIndex]]

Unnamed: 0,Date,Rsi
42317,2023-08-03,56.345408
44033,2023-08-04,51.629586
45742,2023-08-07,49.179718
50319,2023-08-08,46.982477
51286,2023-08-09,51.312888
