In [7]:
import datetime

In [8]:
import os
pwd = os.path.abspath('.')
os.chdir(os.path.join(pwd, '../src/'))

In [9]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


  ### Get Index data for date range

In [3]:
from markets_insights.datareader.data_reader import NseIndicesReader
reader = NseIndicesReader()

from markets_insights.datareader.data_reader import DateRangeDataReader
daterange_reader = DateRangeDataReader(reader)

from_date = datetime.date(2023, 1, 1)
to_date = datetime.date(2023, 12, 31)
result = daterange_reader.read(from_date = from_date, to_date = to_date)

Downloading data for 2024-01-15 00:00:00
https://archives.nseindia.com/content/indices/ind_close_all_15012024.csv
Downloading data for 2024-01-16 00:00:00
https://archives.nseindia.com/content/indices/ind_close_all_16012024.csv


### Calculation pipeline for RSI
Below example demonstrates calculating RSI using the calculation pipeline. The datepart calculation is pre-requisite for RSI calculation.

In [4]:
# import classes & setup options
from markets_insights.dataprocess.data_processor import HistoricalDataProcessor, MultiDataCalculationPipelines, CalculationPipelineBuilder, HistoricalDataProcessOptions
from markets_insights.calculations.base import DatePartsCalculationWorker
options = HistoricalDataProcessOptions()
options.include_monthly_data = False
options.include_annual_data = False
histDataProcessor = HistoricalDataProcessor(options)

# Fetch the data
year_start = datetime.date(2023, 1, 1)
to_date = datetime.date.today() + datetime.timedelta(days=-1)
result = histDataProcessor.process(reader, {'from_date': year_start, 'to_date': to_date})

# Prepare calculation pipeline
pipelines = MultiDataCalculationPipelines()
pipelines.set_item('date_parts', CalculationPipelineBuilder.create_pipeline_for_worker(DatePartsCalculationWorker()))
pipelines.set_item('rsi', CalculationPipelineBuilder.create_rsi_calculation_pipeline())
histDataProcessor.set_calculation_pipelines(pipelines)

# Run the pipeline and get data
histDataProcessor.run_calculation_pipelines()
result.get_daily_data()

Started to read data
Reading data from 2024-01-05 to 2024-01-16
Saving data to file: ../../_data/processed/historical/Indices.csv
get_data took 3 seconds
get_manual_data took 0 seconds
Started basic calculation
add_basic_calc took 0 seconds
process took 3 seconds
DatePartsCalculationWorker took 0 seconds
RsiCalculationWorker took 1 seconds
ValueCrossedAboveFlagWorker took 0 seconds
ValueCrossedBelowFlagWorker took 0 seconds


Unnamed: 0,Identifier,Index Date,Open,High,Low,Close,Points Change,Change(%),Volume,Turnover (Rs. Cr.),...,Div Yield,Date,Growth,Year,MonthNo,Month,Day,Rsi,RsiCrossedAbove,RsiCrossedBelow
0,INDIA VIX,02-01-2023,14.8675,15.4525,14.5675,14.69,-0.18,-1.23,-,-,...,-,2023-01-02,0.000000,2023,1,Jan,Monday,,False,False
1,INDIA VIX,03-01-2023,14.685,14.99,14.305,14.39,-0.30,-2.04,-,-,...,-,2023-01-03,-2.042206,2023,1,Jan,Tuesday,,False,False
2,INDIA VIX,04-01-2023,14.385,15.6825,14.33,15.20,0.81,5.65,-,-,...,-,2023-01-04,3.471749,2023,1,Jan,Wednesday,,False,False
3,INDIA VIX,05-01-2023,15.1975,15.6,14.8125,14.98,-0.22,-1.41,-,-,...,-,2023-01-05,1.974132,2023,1,Jan,Thursday,,False,False
4,INDIA VIX,06-01-2023,14.9825,15.4375,14.6225,15.03,0.04,.28,-,-,...,-,2023-01-06,2.314500,2023,1,Jan,Friday,,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28152,NIFTY500 VALUE 50,10-01-2024,-,-,-,11378.93,-45.15,-.4,769456939,13097.75,...,4.1,2024-01-10,58.083509,2024,1,Jan,Wednesday,65.576014,False,False
28153,NIFTY500 VALUE 50,11-01-2024,-,-,-,11470.74,91.81,.81,728611792,13564.43,...,4.07,2024-01-11,59.358993,2024,1,Jan,Thursday,68.309648,False,False
28154,NIFTY500 VALUE 50,12-01-2024,-,-,-,11569.03,98.30,.86,807980592,14114.3,...,4.04,2024-01-12,60.724502,2024,1,Jan,Friday,70.967707,False,False
28155,NIFTY500 VALUE 50,15-01-2024,-,-,-,11682.07,113.04,.98,1124432233,18617.48,...,4,2024-01-15,62.294927,2024,1,Jan,Monday,73.699849,False,False


### A real use case: Understand the affect of RSI and Stochastic RSI on price
In this use case, understand the affect of RSI and Stochastic RSI on price

##Preparing the data
Calculate RSI and Stochastic RSI for each day.
Add a flag for whenever the RSI crosses the control limits (eg: above 75 and below 30)
Calculate the highest and lowest price change in the next 10 trading sessions.

##Analyse
We will find the average for highest price change and lowest price change whenever the RSI crosses the threshold.

In [6]:
# import classes
from markets_insights.datareader.data_reader import BhavCopyReader
reader = BhavCopyReader()

from markets_insights.datareader.data_reader import MultiDatesDataReader, DateRangeDataReader
daterange_reader = DateRangeDataReader(reader)

# Fetch the data
year_start = datetime.date(2023, 1, 1)
to_date = datetime.date(2023, 12, 31)
result = histDataProcessor.process(reader, {'from_date': year_start, 'to_date': to_date})

Started to read data
get_data took 9 seconds
get_manual_data took 0 seconds
Started basic calculation
add_basic_calc took 1 seconds
process took 10 seconds


In [7]:
# prepare calculation pipeline
periods = [1, 7, 15, 30, 45]

pipelines = MultiDataCalculationPipelines()
pipelines.set_item('date_parts', CalculationPipelineBuilder.create_pipeline_for_worker(DatePartsCalculationWorker()))
pipelines.set_item('rsi', CalculationPipelineBuilder.create_rsi_calculation_pipeline(crossing_above_flag_value = 75, crossing_below_flag_value = 30, window = 14))
pipelines.set_item('stoch_rsi', CalculationPipelineBuilder.create_stoch_rsi_calculation_pipeline(crossing_above_flag_value = 80, crossing_below_flag_value = 20, window = 14))
pipelines.set_item('foward_looking_fall', CalculationPipelineBuilder.create_forward_looking_price_fall_pipeline(periods))
pipelines.set_item('foward_looking_rise', CalculationPipelineBuilder.create_forward_looking_price_rise_pipeline(periods))
histDataProcessor.set_calculation_pipelines(pipelines=pipelines)

# run the pipeline and show results
histDataProcessor.run_calculation_pipelines()

daily_data = result.get_daily_data()

# Import constants so its easier to refer to column names
from markets_insights.core.column_definition import BaseColumns, CalculatedColumns

# get names of fwd looking price column names. Since, these column names are auto-generated there no constants for them
fwd_looking_price_fall_cols, fwd_looking_price_rise_cols = [x for x in daily_data.columns if 'HighestPercFallInNext' in x], \
    [x for x in daily_data.columns if 'HighestPercRiseInNext' in x]

# analyse the median price change % for highest price fall whenever the RSI crosses above
daily_data[
  (daily_data[CalculatedColumns.RsiCrossedAbove])
][fwd_looking_price_fall_cols].median()

# analyse the median price change % for highest price rise whenever the RSI crosses below
daily_data[
  (daily_data[CalculatedColumns.RsiCrossedAbove])
][fwd_looking_price_rise_cols].median()

DatePartsCalculationWorker took 5 seconds
RsiCalculationWorker took 10 seconds
ValueCrossedAboveFlagWorker took 1 seconds
ValueCrossedBelowFlagWorker took 1 seconds
StochRsiCalculationWorker took 14 seconds
ValueCrossedAboveFlagWorker took 1 seconds
ValueCrossedAboveFlagWorker took 1 seconds
ValueCrossedBelowFlagWorker took 1 seconds
ValueCrossedBelowFlagWorker took 1 seconds
LowestPriceInNextNDaysCalculationWorker took 1 seconds
LowestPriceInNextNDaysCalculationWorker took 1 seconds
LowestPriceInNextNDaysCalculationWorker took 1 seconds
LowestPriceInNextNDaysCalculationWorker took 1 seconds
LowestPriceInNextNDaysCalculationWorker took 1 seconds
HighestPriceInNextNDaysCalculationWorker took 1 seconds
HighestPriceInNextNDaysCalculationWorker took 1 seconds
HighestPriceInNextNDaysCalculationWorker took 1 seconds
HighestPriceInNextNDaysCalculationWorker took 1 seconds
HighestPriceInNextNDaysCalculationWorker took 1 seconds


HighestPercRiseInNext1Days      0.985232
HighestPercRiseInNext7Days      2.640740
HighestPercRiseInNext15Days     4.465773
HighestPercRiseInNext30Days     7.719928
HighestPercRiseInNext45Days    10.682510
dtype: float64

### Extended Data Reader for Nasdaq

In [5]:
import yfinance as yf
import pandas
from markets_insights.datareader.data_reader import DateRangeDataReader
from markets_insights.core.core import Instrumentation
from markets_insights.core.column_definition import BaseColumns

In [19]:
class NasdaqDataReader (DateRangeDataReader):
  def __init__(self, tickers: list = ['AAPL', 'GOOGL', 'MSFT', 'AMZN', 'META', 'TSLA', 'NVDA']):
    super().__init__(reader=None)
    self.tickers = tickers
    self.name = "NasdaqDataReader"

  @Instrumentation.trace(name="NasdaqDataReader.read")
  def read(self, from_date, to_date):
    df_list = list()
    for ticker in self.tickers:
        data = yf.download(ticker, group_by="Ticker", start=from_date, end=to_date)
        data['ticker'] = ticker
        df_list.append(data)

    # combine all dataframes into a single dataframe
    df = pandas.concat(df_list)

    final_data = df.reset_index().rename(columns = self.get_column_name_mappings())
    final_data[BaseColumns.Date] = pandas.to_datetime(final_data[BaseColumns.Date])
    return final_data
  
  def get_column_name_mappings(self):
    return {
      'ticker': BaseColumns.Identifier,
      'OPEN': BaseColumns.Open,
      'HIGH': BaseColumns.High,
      'LOW': BaseColumns.Low,
      'CLOSE': BaseColumns.Close
    }

In [25]:
year_start = datetime.date(2023, 1, 1)
to_date = datetime.date(2023, 12, 31)
NasdaqDataReader().read(from_date=year_start, to_date=to_date).tail(5)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
NasdaqDataReader.read took 2 seconds


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Identifier
1745,2023-12-22,491.950012,493.829987,484.670013,488.299988,488.299988,25213900,NVDA
1746,2023-12-26,489.679993,496.0,489.600006,492.790009,492.790009,24420000,NVDA
1747,2023-12-27,495.109985,496.799988,490.850006,494.170013,494.170013,23364800,NVDA
1748,2023-12-28,496.429993,498.839996,494.119995,495.220001,495.220001,24658700,NVDA
1749,2023-12-29,498.130005,499.970001,487.51001,495.220001,495.220001,38869000,NVDA


In [28]:
# import classes & setup options
import datetime
from markets_insights.dataprocess.data_processor import HistoricalDataProcessor, MultiDataCalculationPipelines, CalculationPipelineBuilder, HistoricalDataProcessOptions
from markets_insights.calculations.base import DatePartsCalculationWorker

reader = NasdaqDataReader()
options = HistoricalDataProcessOptions()
options.include_monthly_data = False
options.include_annual_data = False
histDataProcessor = HistoricalDataProcessor(options)

# Fetch the data
year_start = datetime.date(2023, 1, 1)
to_date = datetime.date(2023, 12, 31)
result = histDataProcessor.process(reader, {'from_date': year_start, 'to_date': to_date})

# Prepare calculation pipeline
pipelines = MultiDataCalculationPipelines()
pipelines.set_item('date_parts', CalculationPipelineBuilder.create_pipeline_for_worker(DatePartsCalculationWorker()))
pipelines.set_item('rsi', CalculationPipelineBuilder.create_rsi_calculation_pipeline())
histDataProcessor.set_calculation_pipelines(pipelines)

# Run the pipeline
histDataProcessor.run_calculation_pipelines()

Started to read data
Reading data from 2023-01-02 to 2023-12-29
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
NasdaqDataReader.read took 3 seconds
Saving data to file: ../../_data/processed/historical/NasdaqDataReader.csv
get_data took 3 seconds
get_manual_data took 0 seconds
Started basic calculation
add_basic_calc took 0 seconds
process took 3 seconds
DatePartsCalculationWorker took 0 seconds
RsiCalculationWorker took 0 seconds
ValueCrossedAboveFlagWorker took 0 seconds
ValueCrossedBelowFlagWorker took 0 seconds


In [9]:
from markets_insights.core.core import TypeHelper
result.get_daily_data()[TypeHelper.get_class_static_values(BaseColumns)].tail(5)

KeyError: "['PreviousClose', 'Turnover (Rs. Cr.)', 'TOTTRDQTY'] not in index"

In [34]:
from markets_insights.core.column_definition import CalculatedColumns

result.get_daily_data() \
  .sort_values(
    [BaseColumns.Date, BaseColumns.Identifier]
  )[
    [BaseColumns.Identifier, BaseColumns.Date, BaseColumns.Close, 
     CalculatedColumns.RelativeStrengthIndex]
  ] \
  .tail(7).to_markdown()

'|      | Identifier   | Date                |   Close |     Rsi |\n|-----:|:-------------|:--------------------|--------:|--------:|\n|  248 | AAPL         | 2023-12-28 00:00:00 |  193.58 | 54.4815 |\n|  497 | AMZN         | 2023-12-28 00:00:00 |  153.38 | 63.9387 |\n|  746 | GOOGL        | 2023-12-28 00:00:00 |  140.23 | 61.585  |\n|  995 | META         | 2023-12-28 00:00:00 |  358.32 | 70.2377 |\n| 1244 | MSFT         | 2023-12-28 00:00:00 |  375.28 | 56.909  |\n| 1493 | NVDA         | 2023-12-28 00:00:00 |  495.22 | 58.305  |\n| 1742 | TSLA         | 2023-12-28 00:00:00 |  253.18 | 55.9788 |'

### Create a Calculation Pipeline

In [10]:
import markets_insights

In [24]:
## import modules
from markets_insights.calculations.base import CalculationWorker
from markets_insights.core.core import Instrumentation
from markets_insights.calculations.base import BaseColumns
import pandas

# Implement the worker class. The important aspect here is to override the add_calculated_columns() method
class FibnocciRetracementCalculationWorker (CalculationWorker):
  def __init__(self, time_window: int, level_perct: float):
    self._time_window = time_window
    self._level = level_perct / 100
    self._column_name = 'Fbr' + str(level_perct)

  @Instrumentation.trace(name="FibnocciRetracementCalculationWorker")
  def add_calculated_columns(self, data: pandas.DataFrame):
    identifier_grouped_data: pandas.DataFrame = data.groupby(BaseColumns.Identifier)
    #Since, our dataframe may contain data for multiple symbols, we need to first group them by Identifier
    data[self._column_name] = identifier_grouped_data[BaseColumns.Close].transform(
        lambda x: 
          x.rolling(self._time_window).max() - 
          (
            (x.rolling(self._time_window).max() - x.rolling(self._time_window).min())  * self._level
          )
      )

In [28]:
# Create pipline with the FibnocciRetracementCalculationWorker and run 
from markets_insights.datareader.data_reader import NseIndicesReader
from markets_insights.dataprocess.data_processor import HistoricalDataProcessor, HistoricalDataProcessOptions, \
  MultiDataCalculationPipelines, CalculationPipeline
histDataProcessor = HistoricalDataProcessor(HistoricalDataProcessOptions(include_monthly_data=False, include_annual_data=False))

# Fetch the data
result = histDataProcessor.process(NseIndicesReader(), {'from_date': datetime.date(2023, 12, 1), 'to_date': datetime.date(2023, 12, 31)})

# Prepare calculation pipeline
fbr50_worker = FibnocciRetracementCalculationWorker(time_window=7, level_perct=50)
pipelines = MultiDataCalculationPipelines()
histDataProcessor.set_calculation_pipelines(
  CalculationPipeline(
    workers = [fbr50_worker]
  )
)

# Run the pipeline and get data
histDataProcessor.run_calculation_pipelines()

Started to read data
get_data took 1 seconds
get_manual_data took 0 seconds
Started basic calculation
add_basic_calc took 0 seconds
process took 1 seconds
FibnocciRetracementCalculationWorker took 0 seconds


In [30]:
## Display the results
result.get_daily_data()[[
  BaseColumns.Identifier, BaseColumns.Date, BaseColumns.Close, fbr50_worker._column_name
]].tail(5)

Unnamed: 0,Identifier,Index Date,Open,High,Low,Close,Points Change,Change(%),Volume,Turnover (Rs. Cr.),P/E,P/B,Div Yield,Date,Growth,Fbr50
2141,NIFTY ALPHA 50,29-12-2023,42233.6,42381.85,41950.45,42306.3,246.9,0.59,511585500,10232.22,26.68,3.81,.93,2023-12-29,6.700311,41655.7
2142,NIFTY GROWTH SECTORS 15,29-12-2023,10773.75,10825.75,10753.55,10787.7,-3.05,-0.03,79665003,6151.36,33.04,8.26,1.97,2023-12-29,5.425849,10646.675
2143,NIFTY MIDSMALLCAP 400,29-12-2023,15937.85,16036.4,15871.65,16015.55,121.1,0.76,3599665191,43126.33,26.8,3.89,.87,2023-12-29,5.7977,15663.025
2144,NIFTY 50,29-12-2023,21737.65,21770.3,21676.9,21731.4,-47.3,-0.22,270922276,23697.88,23.17,3.81,1.28,2023-12-29,7.220778,21464.425
2145,NIFTY 15 YR AND ABOVE G-SEC INDEX,29-12-2023,2997.87,3006.17,2997.87,3004.56,7.3,0.24,-,-,-,-,-,2023-12-29,2.091396,3004.09
