### Setup

In [1]:
import datetime

In [2]:
import os
pwd = os.path.abspath('.')
os.chdir(os.path.join(pwd, '../src/'))

In [3]:
%load_ext autoreload
%autoreload 2

In [9]:
test = {'a': 1, 'b': 2}
class TestEx:
  def __init__(self, a: int = None, c: float = None):
    self.a = a
    self.c = c

TestEx(**(test))

TypeError: __init__() got an unexpected keyword argument 'b'

In [13]:
for key in test:
  print(key)

a
b


In [14]:
test.items()

dict_items([('a', 1), ('b', 2)])

  ### Get Index data for date range

In [14]:
from markets_insights.datareader import data_reader
import datetime

reader = data_reader.NseIndicesReader()

daterange_reader = data_reader.DateRangeDataReader(reader)

from_date = datetime.date(2023, 1, 1)
to_date = datetime.date(2023, 12, 31)
result = daterange_reader.read(from_date = from_date, to_date = to_date)
print(result.head(5).to_markdown())

### Get daily, monthly and annually aggregrated data
In this example we will use HistoricalDataProcessor class to get data between a date range. HistoricalDataProcessor will also do monthly and annual aggregation of data.

In [13]:
# import classes & setup
from markets_insights.dataprocess.data_processor import HistoricalDataProcessor
from markets_insights.datareader.data_reader import NseIndicesReader
histDataProcessor = HistoricalDataProcessor()

# Fetch and process the data
year_start = datetime.date(2023, 12, 1)
year_end = datetime.date(2023, 12, 31)
result = histDataProcessor.process(NseIndicesReader(), {'from_date': year_start, 'to_date': year_end})

Started to read data
Reading data from 2023-12-01 to 2023-12-29
Downloading data for 2023-12-01 00:00:00
https://archives.nseindia.com/content/indices/ind_close_all_01122023.csv
Downloading data for 2023-12-04 00:00:00
https://archives.nseindia.com/content/indices/ind_close_all_04122023.csv
Downloading data for 2023-12-05 00:00:00
https://archives.nseindia.com/content/indices/ind_close_all_05122023.csv
Downloading data for 2023-12-06 00:00:00
https://archives.nseindia.com/content/indices/ind_close_all_06122023.csv
Downloading data for 2023-12-07 00:00:00
https://archives.nseindia.com/content/indices/ind_close_all_07122023.csv
Downloading data for 2023-12-08 00:00:00
https://archives.nseindia.com/content/indices/ind_close_all_08122023.csv
Downloading data for 2023-12-11 00:00:00
https://archives.nseindia.com/content/indices/ind_close_all_11122023.csv
Downloading data for 2023-12-12 00:00:00
https://archives.nseindia.com/content/indices/ind_close_all_12122023.csv
Downloading data for 202

In [21]:
from markets_insights.core.column_definition import BaseColumns

print(result.get_monthly_data().sort_values(BaseColumns.Date).head(3).to_markdown())

|      | Identifier                                  | Date                | Month   |      Volume |    Turnover |    Close |     High |        Low |       Open |
|-----:|:--------------------------------------------|:--------------------|:--------|------------:|------------:|---------:|---------:|-----------:|-----------:|
|    0 | INDIA VIX                                   | 2023-01-31 00:00:00 | 2023-01 | 0           | 0           |    16.88 |    19.39 |    11.6425 |    14.8675 |
| 1012 | NIFTY100 ENHANCED ESG                       | 2023-01-31 00:00:00 | 2023-01 | 8.12952e+09 | 4.99243e+12 |  3352.6  |  3490.75 |  3352.6    |  3472.29   |
|  228 | NIFTY ALPHA QUALITY VALUE LOW-VOLATILITY 30 | 2023-01-31 00:00:00 | 2023-01 | 2.32482e+09 | 1.3303e+12  | 11286.1  | 11599.4  | 11214.5    | 11456.8    |


### Calculation pipeline for RSI

In [30]:
# import classes & setup options
import datetime
from markets_insights.datareader.data_reader import BhavCopyReader
from markets_insights.dataprocess.data_processor import HistoricalDataProcessor, MultiDataCalculationPipelines, CalculationPipelineBuilder, HistoricalDataProcessOptions
from markets_insights.calculations.base import DatePartsCalculationWorker

reader = BhavCopyReader()
options = HistoricalDataProcessOptions()
options.include_monthly_data = False
options.include_annual_data = False
histDataProcessor = HistoricalDataProcessor(options)

# Fetch the data
year_start = datetime.date(2023, 1, 1)
to_date = datetime.date(2023, 12, 31)
result = histDataProcessor.process(reader, {'from_date': year_start, 'to_date': to_date})

# Prepare calculation pipeline
pipelines = MultiDataCalculationPipelines()
pipelines.set_item('rsi', CalculationPipelineBuilder.create_rsi_calculation_pipeline())
histDataProcessor.set_calculation_pipelines(pipelines)

# Run the pipeline
histDataProcessor.run_calculation_pipelines()

Started to read data
HistoricalDataProcessor.get_data took 1 seconds
HistoricalDataProcessor.get_manual_data took 0 seconds
HistoricalDataProcessor.run_base_calculations took 18 seconds
HistoricalDataProcessor.process took 19 seconds
RsiCalculationWorker took 9 seconds
ValueCrossedAboveFlagWorker took 1 seconds
ValueCrossedBelowFlagWorker took 1 seconds


In [32]:
from markets_insights.core.column_definition import BaseColumns, CalculatedColumns

print(result.get_daily_data().sort_values(BaseColumns.Date).tail(3)[[BaseColumns.Identifier, BaseColumns.Date, BaseColumns.Close, CalculatedColumns.RelativeStrengthIndex]].to_markdown())

|        | Identifier   | Date                |   Close |     Rsi |
|-------:|:-------------|:--------------------|--------:|--------:|
| 336063 | RKFORGE      | 2023-12-29 00:00:00 |  725.7  | 46.9257 |
| 329710 | RBL          | 2023-12-29 00:00:00 |  852.95 | 54.8479 |
| 446931 | ZYDUSWELL    | 2023-12-29 00:00:00 | 1681.1  | 72.0492 |


### Test Calculation

In [10]:
from markets_insights.calculations.base import VwapCalculationWorker
worker = VwapCalculationWorker(time_window=14)
print(worker._params)
for window in [window for window in ['time_window', 'N'] if window in worker._params]:
    window_val = int(worker._params[window])
    print(max(0, window_val))
#worker.get_calculation_window().trailing

{'time_window': 14}
14


In [15]:
from markets_insights.core.column_definition import BaseColumns
from markets_insights.calculations.base import SmaCalculationWorker, \
  ColumnValueAboveAnotherColumnValueFlagWorker
from markets_insights.core.core import IdentifierFilter, FlagFilter
from markets_insights.datareader.data_reader import BhavCopyReader
from markets_insights.dataprocess.data_processor import HistoricalDataset, HistoricalDataProcessor, HistoricalDataProcessOptions, \
  CalculationPipelineBuilder, MultiDataCalculationPipelines

from datetime import date

processor = HistoricalDataProcessor(HistoricalDataProcessOptions(include_annual_data=False, include_monthly_data=False))
result: HistoricalDataset = processor.process(
    BhavCopyReader(),#.set_filter(IdentifierFilter("INFY")),
    { "from_date": date(2023, 10, 1), "to_date": date(2023, 12, 31) },
)
result._daily.drop_duplicates(inplace=True)

workers = [
  SmaCalculationWorker(time_window=50),
  ColumnValueAboveAnotherColumnValueFlagWorker(value_column_a=BaseColumns.Close, value_column_b='Sma50')
]

# Prepare calculation pipeline
pipelines = MultiDataCalculationPipelines()
pipelines.set_item(
    "user", CalculationPipelineBuilder.create_pipeline_for_workers(workers)
)
processor.set_calculation_pipelines(pipelines)

# Run the pipeline and get data
processor.run_calculation_pipelines()
daily_data = result.get_daily_data()
data = daily_data.query(str(FlagFilter(workers[1].get_columns()[0])))
data[['Identifier', 'Date', 'Sma50', workers[1].get_columns()[0]]]


  from pandas.core import (


Started to read data
HistoricalDataProcessor.get_data took 2 seconds
HistoricalDataProcessor.get_manual_data took 0 seconds
HistoricalDataProcessor.run_base_calculations took 4 seconds
HistoricalDataProcessor.process took 6 seconds
SmaCalculationWorker took 0 seconds
ColumnValueAboveAnotherColumnValueFlagWorker took 0 seconds


Unnamed: 0,Identifier,Date,Sma50,CloseAboveSma50
89147,AAVAS,2023-12-14,1545.4910,True
89149,KOTAKMID50,2023-12-14,119.0346,True
89150,AIAENG,2023-12-14,3552.2560,True
89151,360ONE,2023-12-14,555.2230,True
89153,21STCENMGM,2023-12-14,20.9530,True
...,...,...,...,...
109036,VLSFINANCE,2023-12-29,198.0630,True
109037,SIS,2023-12-29,450.1000,True
109039,TAINWALCHM,2023-12-29,136.0300,True
109040,KOTAKLIQ,2023-12-29,999.9970,True


In [8]:
daily_data[daily_data[workers[1].get_columns()[0]] == False][['Identifier', 'Date', 'Sma50', workers[1].get_columns()[0]]]

Unnamed: 0,Identifier,Date,Sma50,CloseAboveSma50
0,OSWALAGRO,2023-10-03,,False
1,ORIENTPPR,2023-10-03,,False
2,PAKKA,2023-10-03,,False
3,PAISALO,2023-10-03,,False
4,ORIENTHOT,2023-10-03,,False
...,...,...,...,...
109019,YATHARTH,2023-12-29,382.476,False
109026,ZEEMEDIA,2023-12-29,15.184,False
109029,ZIMLAB,2023-12-29,116.195,False
109030,ZODIACLOTH,2023-12-29,132.779,False


In [15]:
data[['Identifier', 'Date', workers[1].get_columns()[0]]].tail(10)

Unnamed: 0,Identifier,Date,CloseCrossedBelowSma50
446701,STERTOOLS,2023-12-28,True
446743,SHAKTIPUMP,2023-12-28,True
446790,AAVAS,2023-12-28,True
446810,ABFRL,2023-12-28,True
446814,AGARIND,2023-12-28,True
446835,AMBER,2023-12-28,True
446866,BALKRISIND,2023-12-28,True
446915,CMSINFO,2023-12-28,True
446956,ASTRAL,2023-12-28,True
446980,ABSLLIQUID,2023-12-28,True


In [63]:
from markets_insights.core.core import IdentifierFilter, InstrumentTypeFilter
from markets_insights.datareader.data_reader import BhavCopyReader, NseDerivatiesReader


from datetime import date

cash_reader = BhavCopyReader().set_filter(IdentifierFilter("INDIACEM"))
filters = IdentifierFilter("INDIACEM") & InstrumentTypeFilter('FUTSTK')
fut_reader = NseDerivatiesReader().set_filter(filters)

premium_reader = cash_reader - fut_reader

#filters.get_query()
fut_reader.read(date(2024, 2, 21))

Unnamed: 0,InstrumentType,Identifier,ExpiryDate,StrkPric,OptionType,PreviousClose,Open,High,Low,Close,...,LftmLw,UnitOfMeasr,TtlNbOfTxsExctd,Ccy,Rsvd01,Rsvd02,Rsvd03,Rsvd04,Rsvd05,Date
0,FUTSTK,INDIACEM,28-Mar-2024,0.0,XX,243.2,246.85,246.85,243.55,243.55,...,,,,,,,,,,2024-02-21
1,FUTSTK,INDIACEM,29-Feb-2024,0.0,XX,242.45,245.0,247.6,240.1,241.95,...,,,,,,,,,,2024-02-21
2,FUTSTK,INDIACEM,25-Apr-2024,0.0,XX,251.5,0.0,0.0,0.0,251.5,...,,,,,,,,,,2024-02-21


In [36]:
from markets_insights.core.column_definition import BaseColumns
import pandas as pd
import pandas_ta as pd_ta

data = pd.DataFrame(daily_data.tail(100))
data.index = data[BaseColumns.Date]
pd_ta.vwap(
  close=data[BaseColumns.Close],
  high=data[BaseColumns.High],
  low=data[BaseColumns.Low],
  volume=data[BaseColumns.Volume],
)

Date
2024-01-01     611.533333
2024-01-02     612.100000
2024-01-03     607.966667
2024-01-04     609.716667
2024-01-05     608.516667
2024-01-08     603.566667
2024-01-09     620.850000
2024-01-10     607.750000
2024-01-11     606.133333
2024-01-12     603.416667
2024-01-15     598.300000
2024-01-16     589.366667
2024-01-17     581.783333
2024-01-18     583.216667
2024-01-19     582.133333
2024-01-23     575.683333
2024-01-24     580.250000
2024-01-25     578.316667
2024-01-29     575.516667
2024-01-30     579.866667
2024-01-31     597.750000
2024-02-01     610.466667
2024-02-02     615.850000
2024-02-05     615.866667
2024-02-06     617.033333
2024-02-07     615.900000
2024-02-08     613.950000
2024-02-09     604.583333
2024-02-12     678.100000
2024-02-13     732.600000
2024-02-14     819.433333
2024-02-15     919.433333
2024-02-16     930.283333
2024-02-19     981.083333
2024-02-20    1007.583333
Name: VWAP_D, dtype: float64

In [16]:
from markets_insights.core.column_definition import BaseColumns
daily_data = result.get_daily_data()

if len(daily_data[BaseColumns.Identifier].unique()) > 1:
    data = daily_data.groupby(BaseColumns.Identifier)
    data.apply(
        lambda x: x[BaseColumns.Turnover].rolling(50).sum()
        / x.rolling(50)[BaseColumns.Volume].sum()
    ) \
    .reset_index(level=0, drop=True)
else:
    daily_data[BaseColumns.Turnover].rolling(50).sum() / daily_data[BaseColumns.Volume].rolling(50).sum()

In [17]:
daily_data[BaseColumns.Turnover].rolling(50).sum() / daily_data[BaseColumns.Volume].rolling(50).sum()

0              NaN
1              NaN
2              NaN
3              NaN
4              NaN
          ...     
241    2390.315815
242    2393.762796
243    2397.837564
244    2403.204247
245    2407.309569
Length: 246, dtype: float64

### A real use case: Understand the affect of RSI and Stochastic RSI on price
In this use case, understand the affect of RSI and Stochastic RSI on price

#### Preparing the data
Calculate RSI and Stochastic RSI for each day.
Add a flag for whenever the RSI crosses the control limits (eg: above 75 and below 30)
Calculate the highest and lowest price change in the next 10 trading sessions.

#### Analyse
We will find the average for highest price change and lowest price change whenever the RSI crosses the threshold.

In [33]:
# import classes
from markets_insights.datareader.data_reader import BhavCopyReader

# Fetch the data
year_start = datetime.date(2023, 1, 1)
to_date = datetime.date(2023, 12, 31)
result = histDataProcessor.process(BhavCopyReader(), {'from_date': year_start, 'to_date': to_date})

Started to read data
HistoricalDataProcessor.get_data took 1 seconds
HistoricalDataProcessor.get_manual_data took 0 seconds
HistoricalDataProcessor.run_base_calculations took 17 seconds
HistoricalDataProcessor.process took 18 seconds


In [34]:
# prepare calculation pipeline
periods = [1, 7, 15, 30, 45]

pipelines = MultiDataCalculationPipelines()
pipelines.set_item('rsi', CalculationPipelineBuilder.create_rsi_calculation_pipeline(crossing_above_flag_value = 75, crossing_below_flag_value = 30, window = 14))
pipelines.set_item('stoch_rsi', CalculationPipelineBuilder.create_stoch_rsi_calculation_pipeline(crossing_above_flag_value = 80, crossing_below_flag_value = 20, window = 14))
pipelines.set_item('foward_looking_fall', CalculationPipelineBuilder.create_forward_looking_price_fall_pipeline(periods))
pipelines.set_item('foward_looking_rise', CalculationPipelineBuilder.create_forward_looking_price_rise_pipeline(periods))
histDataProcessor.set_calculation_pipelines(pipelines=pipelines)

# run the pipeline and show results
histDataProcessor.run_calculation_pipelines()

daily_data = result.get_daily_data()

# Import constants so its easier to refer to column names
from markets_insights.core.column_definition import BaseColumns, CalculatedColumns

# get names of fwd looking price column names. Since, these column names are auto-generated there no constants for them
fwd_looking_price_fall_cols, fwd_looking_price_rise_cols = [x for x in daily_data.columns if 'HighestPercFallInNext' in x], \
    [x for x in daily_data.columns if 'HighestPercRiseInNext' in x]

RsiCalculationWorker took 9 seconds
ValueCrossedAboveFlagWorker took 1 seconds
ValueCrossedBelowFlagWorker took 1 seconds
StochRsiCalculationWorker took 13 seconds
ValueCrossedAboveFlagWorker took 1 seconds
ValueCrossedAboveFlagWorker took 1 seconds
ValueCrossedBelowFlagWorker took 1 seconds
ValueCrossedBelowFlagWorker took 1 seconds
LowestPriceInNextNDaysCalculationWorker took 1 seconds
LowestPriceInNextNDaysCalculationWorker took 1 seconds
LowestPriceInNextNDaysCalculationWorker took 1 seconds
LowestPriceInNextNDaysCalculationWorker took 1 seconds
LowestPriceInNextNDaysCalculationWorker took 1 seconds
HighestPriceInNextNDaysCalculationWorker took 1 seconds
HighestPriceInNextNDaysCalculationWorker took 1 seconds
HighestPriceInNextNDaysCalculationWorker took 1 seconds
HighestPriceInNextNDaysCalculationWorker took 1 seconds
HighestPriceInNextNDaysCalculationWorker took 1 seconds


In [35]:
# analyse the median price change % for highest price fall whenever the RSI crosses above
daily_data[
  (daily_data[CalculatedColumns.RsiCrossedAbove])
][fwd_looking_price_fall_cols].median()

HighestPercFallInNext1Days     1.418923
HighestPercFallInNext7Days     3.771446
HighestPercFallInNext15Days    4.777241
HighestPercFallInNext30Days    6.055861
HighestPercFallInNext45Days    6.785467
dtype: float64

In [38]:
# analyse the median price change % for highest price rise whenever the RSI crosses below
daily_data[
  (daily_data[CalculatedColumns.RsiCrossedBelow] == True)
][fwd_looking_price_rise_cols].median()

HighestPercRiseInNext1Days      3.875000
HighestPercRiseInNext7Days      7.589087
HighestPercRiseInNext15Days     9.756772
HighestPercRiseInNext30Days    13.255317
HighestPercRiseInNext45Days    16.292135
dtype: float64

### Performing Arithmetic on Readers

In [5]:
from markets_insights.core.core import InstrumentTypeFilter
from markets_insights.datareader.data_reader import BhavCopyReader, NseDerivatiesReader, NseIndicesReader
from markets_insights.core.core import IdentifierFilter

for_date = datetime.date(2023, 12, 5)

In [11]:
indices_reader = NseIndicesReader()
vix_reader = NseIndicesReader().set_filter(IdentifierFilter("India VIX"))
op_reader = indices_reader / vix_reader
data = op_reader.read(for_date = for_date).query(str(IdentifierFilter("Nifty 50 / India VIX")))

Index-Close / India VIX-Close
Index-High / India VIX-High
Index-Low / India VIX-Low
Index-Open / India VIX-Open


In [24]:
from markets_insights.datareader.data_reader import DateRangeDataReader
nifty50 = NseIndicesReader().set_filter(IdentifierFilter("Nifty 50"))
bond_10y = NseIndicesReader().set_filter(IdentifierFilter("Nifty 10 yr Benchmark G-Sec"))
op_reader = nifty50 / bond_10y
data = DateRangeDataReader(op_reader).read(from_date = datetime.date(2024, 2, 19), to_date = datetime.date(2024, 2, 20))
op_reader.l_prefix, op_reader.r_prefix

Nifty 50-Close / Index-Close
Nifty 50-High / Index-High
Nifty 50-Low / Index-Low
Nifty 50-Open / Index-Open


('Nifty 50-', 'Index-')

In [15]:
eq_reader = BhavCopyReader()
fut_reader = NseDerivatiesReader().set_filter(InstrumentTypeFilter("FUTSTK"))
op_reader = (fut_reader - eq_reader)
premium_data = op_reader.read(for_date = for_date)

FO-Close - Cash-Close
FO-High - Cash-High
FO-Low - Cash-Low
FO-Open - Cash-Open


In [8]:
premium_data.query(
    str(IdentifierFilter("RELIANCE"))
)

Unnamed: 0,InstrumentType,Identifier,ExpiryDate,StrkPric,OptionType,FO-PreviousClose,FO-Open,FO-High,FO-Low,FO-Close,...,Cash-Volume,Cash-Turnover,TIMESTAMP,TOTALTRADES,ISIN,Unnamed: 13,Close,High,Low,Open
333,FUTSTK,RELIANCE,28-Dec-2023,0.0,XX,2433.65,2440.6,2456.95,2431.5,2446.65,...,6346812.0,15456490000.0,05-DEC-2023,270813,INE002A01018,,8.9,16.1,9.85,1.6
334,FUTSTK,RELIANCE,29-Feb-2024,0.0,XX,2468.2,2473.6,2485.5,2469.0,2481.75,...,6346812.0,15456490000.0,05-DEC-2023,270813,INE002A01018,,44.0,44.65,47.35,34.6
335,FUTSTK,RELIANCE,25-Jan-2024,0.0,XX,2450.7,2462.0,2470.3,2449.3,2462.4,...,6346812.0,15456490000.0,05-DEC-2023,270813,INE002A01018,,24.65,29.45,27.65,23.0


In [16]:
from markets_insights.dataprocess.data_processor import HistoricalDataProcessor, HistoricalDataProcessOptions
processor = HistoricalDataProcessor(HistoricalDataProcessOptions(include_annual_data=False, include_monthly_data=False))

In [17]:
result = processor.process(op_reader, {'from_date': for_date, 'to_date': for_date})

Started to read data
HistoricalDataProcessor.get_data took 0 seconds
HistoricalDataProcessor.get_manual_data took 0 seconds
HistoricalDataProcessor.run_base_calculations took 1 seconds
HistoricalDataProcessor.process took 1 seconds


In [21]:
result.get_daily_data()

Unnamed: 0,InstrumentType,Identifier,ExpiryDate,StrkPric,OptionType,FO-PreviousClose,FO-Open,FO-High,FO-Low,FO-Close,...,Cash-Volume,Cash-Turnover,TIMESTAMP,TOTALTRADES,ISIN,Close,High,Low,Open,PreviousClose
0,OPTSTK,LUPIN,28-Dec-2023,1180.0,CE,70.35,0.00,0.00,0.00,70.35,...,1104617.0,1.394572e+09,05-DEC-2023,57961,INE326A01037,-1187.85,-1187.85,-1187.85,-1187.85,-1183.85
1,OPTSTK,RELIANCE,25-Jan-2024,2300.0,CE,165.00,162.85,180.00,158.75,180.00,...,6346812.0,1.545649e+10,05-DEC-2023,270813,INE002A01018,-2257.75,-2257.75,-2257.75,-2257.75,-2076.75
2,OPTSTK,RELIANCE,28-Dec-2023,2800.0,PE,433.20,352.00,361.00,352.00,361.00,...,6346812.0,1.545649e+10,05-DEC-2023,270813,INE002A01018,-2076.75,-2076.75,-2076.75,-2076.75,-2361.45
3,OPTSTK,RELIANCE,25-Jan-2024,2440.0,CE,69.45,71.00,79.00,71.00,76.30,...,6346812.0,1.545649e+10,05-DEC-2023,270813,INE002A01018,-2361.45,-2361.45,-2361.45,-2361.45,-2230.75
4,OPTSTK,RELIANCE,28-Dec-2023,2640.0,PE,207.00,0.00,0.00,0.00,207.00,...,6346812.0,1.545649e+10,05-DEC-2023,270813,INE002A01018,-2230.75,-2230.75,-2230.75,-2230.75,-2336.95
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12111,OPTSTK,CONCOR,28-Dec-2023,830.0,PE,38.45,0.00,0.00,0.00,38.45,...,1782971.0,1.422668e+09,05-DEC-2023,53529,INE111A01025,-759.65,-759.65,-759.65,-759.65,-764.00
12112,OPTSTK,LICHSGFIN,28-Dec-2023,505.0,PE,13.05,13.65,18.95,13.30,17.05,...,1911892.0,9.578723e+08,05-DEC-2023,66364,INE115A01026,-481.45,-481.45,-481.45,-481.45,-477.35
12113,OPTSTK,LICHSGFIN,28-Dec-2023,490.0,CE,27.15,24.05,25.70,18.85,21.15,...,1911892.0,9.578723e+08,05-DEC-2023,66364,INE115A01026,-477.35,-477.35,-477.35,-477.35,
12114,OPTSTK,CONCOR,28-Dec-2023,820.0,PE,30.00,33.95,41.15,29.60,34.10,...,1782971.0,1.422668e+09,05-DEC-2023,53529,INE111A01025,-764.00,-764.00,-764.00,-764.00,


### Extended Data Reader for Nasdaq

In [5]:
import yfinance as yf
import pandas
from markets_insights.datareader.data_reader import DateRangeDataReader
from markets_insights.core.core import Instrumentation
from markets_insights.core.column_definition import BaseColumns

In [19]:
class NasdaqDataReader (DateRangeDataReader):
  def __init__(self, tickers: list = ['AAPL', 'GOOGL', 'MSFT', 'AMZN', 'META', 'TSLA', 'NVDA']):
    super().__init__(reader=None)
    self.tickers = tickers
    self.name = "NasdaqDataReader"

  @Instrumentation.trace(name="NasdaqDataReader.read")
  def read(self, from_date, to_date):
    df_list = list()
    for ticker in self.tickers:
        data = yf.download(ticker, group_by="Ticker", start=from_date, end=to_date)
        data['ticker'] = ticker
        df_list.append(data)

    # combine all dataframes into a single dataframe
    df = pandas.concat(df_list)

    final_data = df.reset_index().rename(columns = self.get_column_name_mappings())
    final_data[BaseColumns.Date] = pandas.to_datetime(final_data[BaseColumns.Date])
    return final_data
  
  def get_column_name_mappings(self):
    return {
      'ticker': BaseColumns.Identifier,
      'OPEN': BaseColumns.Open,
      'HIGH': BaseColumns.High,
      'LOW': BaseColumns.Low,
      'CLOSE': BaseColumns.Close
    }

In [25]:
year_start = datetime.date(2023, 1, 1)
to_date = datetime.date(2023, 12, 31)
NasdaqDataReader().read(from_date=year_start, to_date=to_date).tail(5)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
NasdaqDataReader.read took 2 seconds


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Identifier
1745,2023-12-22,491.950012,493.829987,484.670013,488.299988,488.299988,25213900,NVDA
1746,2023-12-26,489.679993,496.0,489.600006,492.790009,492.790009,24420000,NVDA
1747,2023-12-27,495.109985,496.799988,490.850006,494.170013,494.170013,23364800,NVDA
1748,2023-12-28,496.429993,498.839996,494.119995,495.220001,495.220001,24658700,NVDA
1749,2023-12-29,498.130005,499.970001,487.51001,495.220001,495.220001,38869000,NVDA


In [28]:
# import classes & setup options
import datetime
from markets_insights.dataprocess.data_processor import HistoricalDataProcessor, MultiDataCalculationPipelines, CalculationPipelineBuilder, HistoricalDataProcessOptions
from markets_insights.calculations.base import DatePartsCalculationWorker

reader = NasdaqDataReader()
options = HistoricalDataProcessOptions()
options.include_monthly_data = False
options.include_annual_data = False
histDataProcessor = HistoricalDataProcessor(options)

# Fetch the data
year_start = datetime.date(2023, 1, 1)
to_date = datetime.date(2023, 12, 31)
result = histDataProcessor.process(reader, {'from_date': year_start, 'to_date': to_date})

# Prepare calculation pipeline
pipelines = MultiDataCalculationPipelines()
pipelines.set_item('date_parts', CalculationPipelineBuilder.create_pipeline_for_worker(DatePartsCalculationWorker()))
pipelines.set_item('rsi', CalculationPipelineBuilder.create_rsi_calculation_pipeline())
histDataProcessor.set_calculation_pipelines(pipelines)

# Run the pipeline
histDataProcessor.run_calculation_pipelines()

Started to read data
Reading data from 2023-01-02 to 2023-12-29
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
NasdaqDataReader.read took 3 seconds
Saving data to file: ../../_data/processed/historical/NasdaqDataReader.csv
get_data took 3 seconds
get_manual_data took 0 seconds
Started basic calculation
add_basic_calc took 0 seconds
process took 3 seconds
DatePartsCalculationWorker took 0 seconds
RsiCalculationWorker took 0 seconds
ValueCrossedAboveFlagWorker took 0 seconds
ValueCrossedBelowFlagWorker took 0 seconds


In [9]:
from markets_insights.core.core import TypeHelper
result.get_daily_data()[TypeHelper.get_class_static_values(BaseColumns)].tail(5)

KeyError: "['PreviousClose', 'Turnover (Rs. Cr.)', 'TOTTRDQTY'] not in index"

In [34]:
from markets_insights.core.column_definition import CalculatedColumns

result.get_daily_data() \
  .sort_values(
    [BaseColumns.Date, BaseColumns.Identifier]
  )[
    [BaseColumns.Identifier, BaseColumns.Date, BaseColumns.Close, 
     CalculatedColumns.RelativeStrengthIndex]
  ] \
  .tail(7).to_markdown()

'|      | Identifier   | Date                |   Close |     Rsi |\n|-----:|:-------------|:--------------------|--------:|--------:|\n|  248 | AAPL         | 2023-12-28 00:00:00 |  193.58 | 54.4815 |\n|  497 | AMZN         | 2023-12-28 00:00:00 |  153.38 | 63.9387 |\n|  746 | GOOGL        | 2023-12-28 00:00:00 |  140.23 | 61.585  |\n|  995 | META         | 2023-12-28 00:00:00 |  358.32 | 70.2377 |\n| 1244 | MSFT         | 2023-12-28 00:00:00 |  375.28 | 56.909  |\n| 1493 | NVDA         | 2023-12-28 00:00:00 |  495.22 | 58.305  |\n| 1742 | TSLA         | 2023-12-28 00:00:00 |  253.18 | 55.9788 |'

### Create a Calculation Pipeline

In [10]:
import markets_insights

In [39]:
## import modules
from markets_insights.calculations.base import CalculationWorker
from markets_insights.core.core import Instrumentation
from markets_insights.calculations.base import BaseColumns
import pandas

# Implement the worker class. The important aspect here is to override the add_calculated_columns() method
class FibonacciRetracementCalculationWorker (CalculationWorker):
  def __init__(self, time_window: int, level_perct: float):
    self._time_window = time_window
    self._level = level_perct / 100
    self._column_name = 'Fbr' + str(level_perct)

  @Instrumentation.trace(name="FibnocciRetracementCalculationWorker")
  def add_calculated_columns(self, data: pandas.DataFrame):
    identifier_grouped_data: pandas.DataFrame = data.groupby(BaseColumns.Identifier)
    #Since, our dataframe may contain data for multiple symbols, we need to first group them by Identifier
    data[self._column_name] = identifier_grouped_data[BaseColumns.Close].transform(
        lambda x: 
          x.rolling(self._time_window).max() - 
          (
            (x.rolling(self._time_window).max() - x.rolling(self._time_window).min())  * self._level
          )
      )

In [40]:
# Create pipline with the FibnocciRetracementCalculationWorker and run 
from markets_insights.datareader.data_reader import NseIndicesReader
from markets_insights.dataprocess.data_processor import HistoricalDataProcessor, HistoricalDataProcessOptions, \
  MultiDataCalculationPipelines, CalculationPipeline
histDataProcessor = HistoricalDataProcessor(HistoricalDataProcessOptions(include_monthly_data=False, include_annual_data=False))

# Fetch the data
result = histDataProcessor.process(NseIndicesReader(), {'from_date': datetime.date(2023, 12, 1), 'to_date': datetime.date(2023, 12, 31)})

# Prepare calculation pipeline
fbr50_worker = FibonacciRetracementCalculationWorker(time_window=7, level_perct=50)
pipelines = MultiDataCalculationPipelines()
histDataProcessor.set_calculation_pipelines(
  CalculationPipeline(
    workers = [fbr50_worker]
  )
)

# Run the pipeline and get data
histDataProcessor.run_calculation_pipelines()

Started to read data
HistoricalDataProcessor.get_data took 0 seconds
HistoricalDataProcessor.get_manual_data took 0 seconds
HistoricalDataProcessor.run_base_calculations took 0 seconds
HistoricalDataProcessor.process took 0 seconds
FibnocciRetracementCalculationWorker took 0 seconds


In [43]:
## Display the results
print(result.get_daily_data()[[
  BaseColumns.Identifier, BaseColumns.Date, BaseColumns.Close, fbr50_worker._column_name
]].tail(5).to_markdown())

|      | Identifier                    | Date                |    Close |    Fbr50 |
|-----:|:------------------------------|:--------------------|---------:|---------:|
| 2141 | NIFTY COMPOSITE G-SEC INDEX   | 2023-12-29 00:00:00 |  2602.3  |  2599.48 |
| 2142 | NIFTY 10 YR BENCHMARK G-SEC   | 2023-12-29 00:00:00 |  2232.79 |  2230.24 |
| 2143 | NIFTY MIDCAP SELECT           | 2023-12-29 00:00:00 | 10397.5  | 10209.1  |
| 2144 | NIFTY ALPHA LOW-VOLATILITY 30 | 2023-12-29 00:00:00 | 23373.2  | 22886    |
| 2145 | NIFTY50 USD                   | 2023-12-29 00:00:00 |  9048.9  |  8941.77 |
