# 02 - Data Processing

In [1]:
import sys
sys.executable

'/usr/local/bin/python'

## Imports

In [2]:
import numpy as np
import pandas as pd

import constants as cnst
import stock_utils as su

pd.set_option('display.max_columns', None)

NSE_DATA_DIR = PosixPath('../data/NSE') | Valid: True
PROCESSED_DATA_DIR = PosixPath('../data/processed') | Valid: True


## Constants

In [3]:
stock_symbols = su.get_all_stock_symbols(
    cnst.NSE_DATA_DIR
)

stock_symbols

['HDFCBANK', 'ITBEES']

In [4]:
STOCK_SYMBOL = stock_symbols[1]
STOCK_SYMBOL

'ITBEES'

## Data loading

In [5]:
stock_df = pd.read_parquet(cnst.NSE_DATA_DIR.joinpath(STOCK_SYMBOL, "consolidated.parquet"))
stock_df = stock_df.drop(columns = ['series', 'PREV. CLOSE'])
stock_df.columns = [
    'Date',
    'Open', 'High', 'Low', 'LTP', 'Close',
    'VWAP', '52W H', '52W L',
    'Volume', 'Value', '#Trades'
]
stock_df

Unnamed: 0,Date,Open,High,Low,LTP,Close,VWAP,52W H,52W L,Volume,Value,#Trades
0,2020-07-01,17.71,17.71,14.65,14.65,14.65,14.97,17.71,14.65,26187,3.919319e+05,55
1,2020-07-02,14.65,15.74,14.65,15.21,15.26,15.07,17.71,14.65,5602,8.443024e+04,31
2,2020-07-03,15.41,15.41,15.28,15.39,15.38,15.31,17.71,14.65,13559,2.076389e+05,18
3,2020-07-06,15.50,18.16,15.41,15.54,15.55,16.05,18.16,14.65,33643,5.400182e+05,168
4,2020-07-07,15.55,16.29,15.55,15.75,15.74,15.73,18.16,14.65,15727,2.473728e+05,53
...,...,...,...,...,...,...,...,...,...,...,...,...
882,2024-01-16,39.52,39.55,38.65,38.82,38.82,38.85,40.00,27.56,5450878,2.117865e+08,22817
883,2024-01-17,38.80,39.16,38.07,38.82,38.98,38.94,40.00,27.56,7129906,2.776618e+08,18290
884,2024-01-18,38.95,39.16,38.12,38.83,38.76,38.52,40.00,27.56,5298842,2.041244e+08,20714
885,2024-01-19,38.74,39.43,38.74,39.21,39.21,39.21,40.00,27.56,3740394,1.466584e+08,12440


In [6]:
stock_df.info(memory_usage = "deep")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 887 entries, 0 to 886
Data columns (total 12 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   Date     887 non-null    datetime64[ns]
 1   Open     887 non-null    float64       
 2   High     887 non-null    float64       
 3   Low      887 non-null    float64       
 4   LTP      887 non-null    float64       
 5   Close    887 non-null    float64       
 6   VWAP     887 non-null    float64       
 7   52W H    887 non-null    float64       
 8   52W L    887 non-null    float64       
 9   Volume   887 non-null    int64         
 10  Value    887 non-null    float64       
 11  #Trades  887 non-null    int64         
dtypes: datetime64[ns](1), float64(9), int64(2)
memory usage: 83.3 KB


## Feature engineering

### Daily candle based columns

In [7]:
stock_df['IsGreen'] = (stock_df['Close'] >= stock_df['Open']).astype(int)
stock_df['Is52WLow'] = np.isclose(stock_df['Low'], stock_df['52W L'], atol = 0, rtol = 1e-4).astype(int)
stock_df['Is52WHigh'] = np.isclose(stock_df['High'], stock_df['52W H'], atol = 0, rtol = 1e-4).astype(int)
stock_df.sample(10)

Unnamed: 0,Date,Open,High,Low,LTP,Close,VWAP,52W H,52W L,Volume,Value,#Trades,IsGreen,Is52WLow,Is52WHigh
559,2022-09-27,28.0,28.14,27.7,28.07,27.99,27.93,41.08,27.1,4191072,117065000.0,10643,0,0,0
299,2021-09-09,35.77,35.77,35.1,35.4,35.41,35.28,36.44,18.0,521874,18411070.0,3439,0,0,0
823,2023-10-20,33.24,33.39,32.97,33.14,33.12,33.05,35.34,27.56,6325332,209032100.0,16029,0,0,0
36,2020-08-20,18.3,18.5,18.15,18.27,18.26,18.29,21.4,14.0,29633,541874.3,121,0,0,0
96,2020-11-13,21.79,21.79,21.41,21.53,21.54,21.52,26.2,14.0,232053,4992793.0,684,0,0,0
524,2022-08-04,30.6,31.05,30.27,30.83,30.81,30.73,41.08,27.1,4038878,124122700.0,6386,1,0,0
522,2022-08-02,30.26,30.26,29.91,30.01,30.06,30.04,41.08,27.1,1471059,44196990.0,6300,0,0,0
163,2021-02-19,26.5,26.5,25.52,25.82,25.78,25.72,27.75,14.0,735281,18910510.0,2114,0,0,0
206,2021-04-28,26.03,26.18,25.99,26.17,26.13,26.09,27.8,14.0,495892,12936260.0,1376,1,0,0
276,2021-08-06,31.82,32.2,31.61,31.98,31.91,31.89,32.5,17.03,510583,16284240.0,1982,1,0,0


In [8]:
((stock_df[['IsGreen', 'Is52WLow', 'Is52WHigh']].sum() * 100) / len(stock_df)).round(2)

IsGreen      32.24
Is52WLow      0.68
Is52WHigh     7.33
dtype: float64

In [9]:
stock_df.shape

(887, 15)

### Date based columns

In [10]:
stock_df['Day'] = stock_df['Date'].dt.day
stock_df['Month'] = stock_df['Date'].dt.month
stock_df['Year'] = stock_df['Date'].dt.year
stock_df['Weekday'] = stock_df['Date'].dt.weekday
stock_df['DayOfYear'] = stock_df['Date'].dt.day_of_year
stock_df['Quarter'] = stock_df['Date'].dt.quarter
stock_df['DaysSinceLastTradingSession'] = (stock_df['Date'] - stock_df['Date'].shift(1)).dt.days.fillna(1).astype(int)

stock_df.sample(10)

Unnamed: 0,Date,Open,High,Low,LTP,Close,VWAP,52W H,52W L,Volume,Value,#Trades,IsGreen,Is52WLow,Is52WHigh,Day,Month,Year,Weekday,DayOfYear,Quarter,DaysSinceLastTradingSession
176,2021-03-10,26.31,26.5,25.92,26.19,26.26,26.22,27.75,14.0,1521944,39900950.0,3384,0,0,0,10,3,2021,2,69,1,1
146,2021-01-27,26.89,26.89,25.77,26.15,26.11,26.13,27.75,14.0,796153,20806230.0,2912,0,0,0,27,1,2021,2,27,1,2
880,2024-01-12,37.09,38.6,37.09,38.52,38.54,38.27,38.99,27.56,23489935,898915000.0,32831,1,0,0,12,1,2024,4,12,1,1
788,2023-08-30,32.96,32.96,32.51,32.7,32.73,32.69,33.72,27.26,5078770,166045800.0,8312,0,0,0,30,8,2023,2,242,3,1
569,2022-10-12,28.59,28.73,28.36,28.63,28.62,28.56,41.08,27.1,2842813,81194350.0,7122,1,0,0,12,10,2022,2,285,4,1
185,2021-03-24,26.79,26.79,24.0,26.11,25.75,25.94,27.75,14.0,707084,18339540.0,3527,0,0,0,24,3,2021,2,83,1,1
118,2020-12-16,23.07,23.09,22.86,23.06,23.04,22.97,26.2,14.0,191519,4399409.0,997,0,0,0,16,12,2020,2,351,4,1
469,2022-05-19,31.0,31.0,29.0,29.23,29.11,29.42,41.08,25.75,5069988,149175900.0,26996,0,0,0,19,5,2022,3,139,2,1
253,2021-07-05,29.46,29.73,29.36,29.5,29.41,29.51,32.5,14.0,471882,13925930.0,3223,0,0,0,5,7,2021,0,186,3,3
632,2023-01-11,29.62,30.08,29.62,29.82,29.82,29.9,41.08,27.1,2318386,69310070.0,6352,1,0,0,11,1,2023,2,11,1,1


In [11]:
stock_df.shape

(887, 22)

### Moving averages

#### `Close`

In [12]:
for window in cnst.ROLLING_WINDOWS:
    stock_df[f'Close {window}MA'] = stock_df['Close'].rolling(window = window, min_periods = 1).mean().round(2)

stock_df.filter(regex = "Close.*")

Unnamed: 0,Close,Close 3MA,Close 7MA,Close 15MA,Close 30MA
0,14.65,14.65,14.65,14.65,14.65
1,15.26,14.96,14.96,14.96,14.96
2,15.38,15.10,15.10,15.10,15.10
3,15.55,15.40,15.21,15.21,15.21
4,15.74,15.56,15.32,15.32,15.32
...,...,...,...,...,...
882,38.82,38.86,37.60,37.38,36.80
883,38.98,39.01,37.96,37.47,36.95
884,38.76,38.85,38.26,37.53,37.08
885,39.21,38.98,38.61,37.66,37.22


#### `VWAP`

In [13]:
for window in cnst.ROLLING_WINDOWS:
    stock_df[f'VWAP {window}MA'] = stock_df['VWAP'].rolling(window = window, min_periods = 1).mean().round(2)

stock_df.filter(regex = "VWAP.*")

Unnamed: 0,VWAP,VWAP 3MA,VWAP 7MA,VWAP 15MA,VWAP 30MA
0,14.97,14.97,14.97,14.97,14.97
1,15.07,15.02,15.02,15.02,15.02
2,15.31,15.12,15.12,15.12,15.12
3,16.05,15.48,15.35,15.35,15.35
4,15.73,15.70,15.43,15.43,15.43
...,...,...,...,...,...
882,38.85,38.88,37.65,37.41,36.79
883,38.94,39.10,37.99,37.50,36.94
884,38.52,38.77,38.22,37.55,37.07
885,39.21,38.89,38.58,37.66,37.21


#### `Volume`

In [14]:
for window in cnst.ROLLING_WINDOWS:
    stock_df[f'Volume {window}MA'] = stock_df['Volume'].rolling(window = window, min_periods = 1).mean().astype(int)

stock_df.filter(regex = "Volume.*")

Unnamed: 0,Volume,Volume 3MA,Volume 7MA,Volume 15MA,Volume 30MA
0,26187,26187,26187,26187,26187
1,5602,15894,15894,15894,15894
2,13559,15116,15116,15116,15116
3,33643,17601,19747,19747,19747
4,15727,20976,18943,18943,18943
...,...,...,...,...,...
882,5450878,15391384,8630370,7433435,8159666
883,7129906,9938041,9156060,7581290,8203049
884,5298842,5959875,9242842,7696713,8070116
885,3740394,5389714,9367806,7437123,8046963


#### `Value`

In [15]:
for window in cnst.ROLLING_WINDOWS:
    stock_df[f'Value {window}MA'] = stock_df['Value'].rolling(window = window, min_periods = 1).mean().astype(int)

stock_df.filter(regex = "Value.*")

Unnamed: 0,Value,Value 3MA,Value 7MA,Value 15MA,Value 30MA
0,3.919319e+05,391931,391931,391931,391931
1,8.443024e+04,238181,238181,238181,238181
2,2.076389e+05,228000,228000,228000,228000
3,5.400182e+05,277362,306004,306004,306004
4,2.473728e+05,331676,294278,294278,294278
...,...,...,...,...,...
882,2.117865e+08,597276951,330737649,280538679,302315985
883,2.776618e+08,390192564,352367793,286710859,304917101
884,2.041244e+08,231190909,356796592,291344742,300943699
885,1.466584e+08,209481538,362734009,282027474,300672473


#### `#Trades`

In [16]:
for window in cnst.ROLLING_WINDOWS:
    stock_df[f'#Trades {window}MA'] = stock_df['#Trades'].rolling(window = window, min_periods = 1).mean().astype(int)

stock_df.filter(regex = "#Trades.*")

Unnamed: 0,#Trades,#Trades 3MA,#Trades 7MA,#Trades 15MA,#Trades 30MA
0,55,55,55,55,55
1,31,43,43,43,43
2,18,34,34,34,34
3,168,72,68,68,68
4,53,79,65,65,65
...,...,...,...,...,...
882,22817,29262,20814,20739,19773
883,18290,24415,20489,21146,19874
884,20714,20607,21654,21675,20022
885,12440,17148,21631,21299,20010


### Target columns

In [17]:
for window in cnst.TARGET_WINDOWS:
    stock_df[f'Target {window}D'] = stock_df['Close'].shift(-window)

stock_df.filter(regex = "Target.*")

Unnamed: 0,Target 3D,Target 7D,Target 15D,Target 30D
0,15.55,15.59,17.16,18.07
1,15.74,15.91,17.17,18.15
2,15.50,15.73,17.25,18.14
3,15.55,16.55,17.62,18.25
4,15.59,17.01,18.05,18.37
...,...,...,...,...
882,39.21,,,
883,38.89,,,
884,,,,
885,,,,


### Saving processed data

In [18]:
stock_df.to_parquet(
    cnst.PROCESSED_DATA_DIR.joinpath(f'{STOCK_SYMBOL}-processed.parquet'), 
    index = False
)
stock_df

Unnamed: 0,Date,Open,High,Low,LTP,Close,VWAP,52W H,52W L,Volume,Value,#Trades,IsGreen,Is52WLow,Is52WHigh,Day,Month,Year,Weekday,DayOfYear,Quarter,DaysSinceLastTradingSession,Close 3MA,Close 7MA,Close 15MA,Close 30MA,VWAP 3MA,VWAP 7MA,VWAP 15MA,VWAP 30MA,Volume 3MA,Volume 7MA,Volume 15MA,Volume 30MA,Value 3MA,Value 7MA,Value 15MA,Value 30MA,#Trades 3MA,#Trades 7MA,#Trades 15MA,#Trades 30MA,Target 3D,Target 7D,Target 15D,Target 30D
0,2020-07-01,17.71,17.71,14.65,14.65,14.65,14.97,17.71,14.65,26187,3.919319e+05,55,0,1,1,1,7,2020,2,183,3,1,14.65,14.65,14.65,14.65,14.97,14.97,14.97,14.97,26187,26187,26187,26187,391931,391931,391931,391931,55,55,55,55,15.55,15.59,17.16,18.07
1,2020-07-02,14.65,15.74,14.65,15.21,15.26,15.07,17.71,14.65,5602,8.443024e+04,31,1,1,0,2,7,2020,3,184,3,1,14.96,14.96,14.96,14.96,15.02,15.02,15.02,15.02,15894,15894,15894,15894,238181,238181,238181,238181,43,43,43,43,15.74,15.91,17.17,18.15
2,2020-07-03,15.41,15.41,15.28,15.39,15.38,15.31,17.71,14.65,13559,2.076389e+05,18,0,0,0,3,7,2020,4,185,3,1,15.10,15.10,15.10,15.10,15.12,15.12,15.12,15.12,15116,15116,15116,15116,228000,228000,228000,228000,34,34,34,34,15.50,15.73,17.25,18.14
3,2020-07-06,15.50,18.16,15.41,15.54,15.55,16.05,18.16,14.65,33643,5.400182e+05,168,1,0,1,6,7,2020,0,188,3,3,15.40,15.21,15.21,15.21,15.48,15.35,15.35,15.35,17601,19747,19747,19747,277362,306004,306004,306004,72,68,68,68,15.55,16.55,17.62,18.25
4,2020-07-07,15.55,16.29,15.55,15.75,15.74,15.73,18.16,14.65,15727,2.473728e+05,53,1,0,0,7,7,2020,1,189,3,1,15.56,15.32,15.32,15.32,15.70,15.43,15.43,15.43,20976,18943,18943,18943,331676,294278,294278,294278,79,65,65,65,15.59,17.01,18.05,18.37
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
882,2024-01-16,39.52,39.55,38.65,38.82,38.82,38.85,40.00,27.56,5450878,2.117865e+08,22817,0,0,0,16,1,2024,1,16,1,1,38.86,37.60,37.38,36.80,38.88,37.65,37.41,36.79,15391384,8630370,7433435,8159666,597276951,330737649,280538679,302315985,29262,20814,20739,19773,39.21,,,
883,2024-01-17,38.80,39.16,38.07,38.82,38.98,38.94,40.00,27.56,7129906,2.776618e+08,18290,1,0,0,17,1,2024,2,17,1,1,39.01,37.96,37.47,36.95,39.10,37.99,37.50,36.94,9938041,9156060,7581290,8203049,390192564,352367793,286710859,304917101,24415,20489,21146,19874,38.89,,,
884,2024-01-18,38.95,39.16,38.12,38.83,38.76,38.52,40.00,27.56,5298842,2.041244e+08,20714,0,0,0,18,1,2024,3,18,1,1,38.85,38.26,37.53,37.08,38.77,38.22,37.55,37.07,5959875,9242842,7696713,8070116,231190909,356796592,291344742,300943699,20607,21654,21675,20022,,,,
885,2024-01-19,38.74,39.43,38.74,39.21,39.21,39.21,40.00,27.56,3740394,1.466584e+08,12440,1,0,0,19,1,2024,4,19,1,1,38.98,38.61,37.66,37.22,38.89,38.58,37.66,37.21,5389714,9367806,7437123,8046963,209481538,362734009,282027474,300672473,17148,21631,21299,20010,,,,


## Data standardization

### Stock price based columns

In [19]:
stock_price_cols = [
    'Open', 'High', 'Low', 'LTP', '52W H', '52W L'
] + (
    stock_df.filter(regex = "Close.*").columns.to_list() +
    stock_df.filter(regex = "VWAP.*").columns.to_list() +
    stock_df.filter(regex = "Target.*").columns.to_list()
)

stock_price_cols

['Open',
 'High',
 'Low',
 'LTP',
 '52W H',
 '52W L',
 'Close',
 'Close 3MA',
 'Close 7MA',
 'Close 15MA',
 'Close 30MA',
 'VWAP',
 'VWAP 3MA',
 'VWAP 7MA',
 'VWAP 15MA',
 'VWAP 30MA',
 'Target 3D',
 'Target 7D',
 'Target 15D',
 'Target 30D']

In [20]:
stock_df[stock_price_cols] = stock_df[stock_price_cols].div(stock_df['Close'], axis = 0).round(3)
stock_df[stock_price_cols]

Unnamed: 0,Open,High,Low,LTP,52W H,52W L,Close,Close 3MA,Close 7MA,Close 15MA,Close 30MA,VWAP,VWAP 3MA,VWAP 7MA,VWAP 15MA,VWAP 30MA,Target 3D,Target 7D,Target 15D,Target 30D
0,1.209,1.209,1.000,1.000,1.209,1.000,1.0,1.000,1.000,1.000,1.000,1.022,1.022,1.022,1.022,1.022,1.061,1.064,1.171,1.233
1,0.960,1.031,0.960,0.997,1.161,0.960,1.0,0.980,0.980,0.980,0.980,0.988,0.984,0.984,0.984,0.984,1.031,1.043,1.125,1.189
2,1.002,1.002,0.993,1.001,1.151,0.953,1.0,0.982,0.982,0.982,0.982,0.995,0.983,0.983,0.983,0.983,1.008,1.023,1.122,1.179
3,0.997,1.168,0.991,0.999,1.168,0.942,1.0,0.990,0.978,0.978,0.978,1.032,0.995,0.987,0.987,0.987,1.000,1.064,1.133,1.174
4,0.988,1.035,0.988,1.001,1.154,0.931,1.0,0.989,0.973,0.973,0.973,0.999,0.997,0.980,0.980,0.980,0.990,1.081,1.147,1.167
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
882,1.018,1.019,0.996,1.000,1.030,0.710,1.0,1.001,0.969,0.963,0.948,1.001,1.002,0.970,0.964,0.948,1.010,,,
883,0.995,1.005,0.977,0.996,1.026,0.707,1.0,1.001,0.974,0.961,0.948,0.999,1.003,0.975,0.962,0.948,0.998,,,
884,1.005,1.010,0.983,1.002,1.032,0.711,1.0,1.002,0.987,0.968,0.957,0.994,1.000,0.986,0.969,0.956,,,,
885,0.988,1.006,0.988,1.000,1.020,0.703,1.0,0.994,0.985,0.960,0.949,1.000,0.992,0.984,0.960,0.949,,,,


### Volume based columns

In [21]:
volume_cols = stock_df.filter(regex = "Volume.*").columns.to_list()

volume_cols

['Volume', 'Volume 3MA', 'Volume 7MA', 'Volume 15MA', 'Volume 30MA']

In [22]:
stock_df[volume_cols] = stock_df[volume_cols].div(stock_df['Volume'], axis = 0).round(3)
stock_df[volume_cols]

Unnamed: 0,Volume,Volume 3MA,Volume 7MA,Volume 15MA,Volume 30MA
0,1.0,1.000,1.000,1.000,1.000
1,1.0,2.837,2.837,2.837,2.837
2,1.0,1.115,1.115,1.115,1.115
3,1.0,0.523,0.587,0.587,0.587
4,1.0,1.334,1.204,1.204,1.204
...,...,...,...,...,...
882,1.0,2.824,1.583,1.364,1.497
883,1.0,1.394,1.284,1.063,1.151
884,1.0,1.125,1.744,1.453,1.523
885,1.0,1.441,2.504,1.988,2.151


### Value based columns

In [23]:
value_cols = stock_df.filter(regex = "Value.*").columns.to_list()

value_cols

['Value', 'Value 3MA', 'Value 7MA', 'Value 15MA', 'Value 30MA']

In [24]:
stock_df[value_cols] = stock_df[value_cols].div(stock_df['Value'], axis = 0).round(3)
stock_df[value_cols]

Unnamed: 0,Value,Value 3MA,Value 7MA,Value 15MA,Value 30MA
0,1.0,1.000,1.000,1.000,1.000
1,1.0,2.821,2.821,2.821,2.821
2,1.0,1.098,1.098,1.098,1.098
3,1.0,0.514,0.567,0.567,0.567
4,1.0,1.341,1.190,1.190,1.190
...,...,...,...,...,...
882,1.0,2.820,1.562,1.325,1.427
883,1.0,1.405,1.269,1.033,1.098
884,1.0,1.133,1.748,1.427,1.474
885,1.0,1.428,2.473,1.923,2.050


### Trade count based columns

In [25]:
trade_count_cols = stock_df.filter(regex = "#Trades.*").columns.to_list()

trade_count_cols

['#Trades', '#Trades 3MA', '#Trades 7MA', '#Trades 15MA', '#Trades 30MA']

In [26]:
stock_df[trade_count_cols] = stock_df[trade_count_cols].div(stock_df['#Trades'], axis = 0).round(3)
stock_df[trade_count_cols]

Unnamed: 0,#Trades,#Trades 3MA,#Trades 7MA,#Trades 15MA,#Trades 30MA
0,1.0,1.000,1.000,1.000,1.000
1,1.0,1.387,1.387,1.387,1.387
2,1.0,1.889,1.889,1.889,1.889
3,1.0,0.429,0.405,0.405,0.405
4,1.0,1.491,1.226,1.226,1.226
...,...,...,...,...,...
882,1.0,1.282,0.912,0.909,0.867
883,1.0,1.335,1.120,1.156,1.087
884,1.0,0.995,1.045,1.046,0.967
885,1.0,1.378,1.739,1.712,1.609


### Dropping unnecessary columns

In [27]:
stock_df = stock_df.drop(columns = ['Date', 'Close', 'Volume', 'Value', '#Trades'])
stock_df

Unnamed: 0,Open,High,Low,LTP,VWAP,52W H,52W L,IsGreen,Is52WLow,Is52WHigh,Day,Month,Year,Weekday,DayOfYear,Quarter,DaysSinceLastTradingSession,Close 3MA,Close 7MA,Close 15MA,Close 30MA,VWAP 3MA,VWAP 7MA,VWAP 15MA,VWAP 30MA,Volume 3MA,Volume 7MA,Volume 15MA,Volume 30MA,Value 3MA,Value 7MA,Value 15MA,Value 30MA,#Trades 3MA,#Trades 7MA,#Trades 15MA,#Trades 30MA,Target 3D,Target 7D,Target 15D,Target 30D
0,1.209,1.209,1.000,1.000,1.022,1.209,1.000,0,1,1,1,7,2020,2,183,3,1,1.000,1.000,1.000,1.000,1.022,1.022,1.022,1.022,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.061,1.064,1.171,1.233
1,0.960,1.031,0.960,0.997,0.988,1.161,0.960,1,1,0,2,7,2020,3,184,3,1,0.980,0.980,0.980,0.980,0.984,0.984,0.984,0.984,2.837,2.837,2.837,2.837,2.821,2.821,2.821,2.821,1.387,1.387,1.387,1.387,1.031,1.043,1.125,1.189
2,1.002,1.002,0.993,1.001,0.995,1.151,0.953,0,0,0,3,7,2020,4,185,3,1,0.982,0.982,0.982,0.982,0.983,0.983,0.983,0.983,1.115,1.115,1.115,1.115,1.098,1.098,1.098,1.098,1.889,1.889,1.889,1.889,1.008,1.023,1.122,1.179
3,0.997,1.168,0.991,0.999,1.032,1.168,0.942,1,0,1,6,7,2020,0,188,3,3,0.990,0.978,0.978,0.978,0.995,0.987,0.987,0.987,0.523,0.587,0.587,0.587,0.514,0.567,0.567,0.567,0.429,0.405,0.405,0.405,1.000,1.064,1.133,1.174
4,0.988,1.035,0.988,1.001,0.999,1.154,0.931,1,0,0,7,7,2020,1,189,3,1,0.989,0.973,0.973,0.973,0.997,0.980,0.980,0.980,1.334,1.204,1.204,1.204,1.341,1.190,1.190,1.190,1.491,1.226,1.226,1.226,0.990,1.081,1.147,1.167
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
882,1.018,1.019,0.996,1.000,1.001,1.030,0.710,0,0,0,16,1,2024,1,16,1,1,1.001,0.969,0.963,0.948,1.002,0.970,0.964,0.948,2.824,1.583,1.364,1.497,2.820,1.562,1.325,1.427,1.282,0.912,0.909,0.867,1.010,,,
883,0.995,1.005,0.977,0.996,0.999,1.026,0.707,1,0,0,17,1,2024,2,17,1,1,1.001,0.974,0.961,0.948,1.003,0.975,0.962,0.948,1.394,1.284,1.063,1.151,1.405,1.269,1.033,1.098,1.335,1.120,1.156,1.087,0.998,,,
884,1.005,1.010,0.983,1.002,0.994,1.032,0.711,0,0,0,18,1,2024,3,18,1,1,1.002,0.987,0.968,0.957,1.000,0.986,0.969,0.956,1.125,1.744,1.453,1.523,1.133,1.748,1.427,1.474,0.995,1.045,1.046,0.967,,,,
885,0.988,1.006,0.988,1.000,1.000,1.020,0.703,1,0,0,19,1,2024,4,19,1,1,0.994,0.985,0.960,0.949,0.992,0.984,0.960,0.949,1.441,2.504,1.988,2.151,1.428,2.473,1.923,2.050,1.378,1.739,1.712,1.609,,,,


### Saving standardized data

In [28]:
stock_df.to_parquet(
    cnst.PROCESSED_DATA_DIR.joinpath(f'{STOCK_SYMBOL}-standardized.parquet'), 
    index = False
)
stock_df

Unnamed: 0,Open,High,Low,LTP,VWAP,52W H,52W L,IsGreen,Is52WLow,Is52WHigh,Day,Month,Year,Weekday,DayOfYear,Quarter,DaysSinceLastTradingSession,Close 3MA,Close 7MA,Close 15MA,Close 30MA,VWAP 3MA,VWAP 7MA,VWAP 15MA,VWAP 30MA,Volume 3MA,Volume 7MA,Volume 15MA,Volume 30MA,Value 3MA,Value 7MA,Value 15MA,Value 30MA,#Trades 3MA,#Trades 7MA,#Trades 15MA,#Trades 30MA,Target 3D,Target 7D,Target 15D,Target 30D
0,1.209,1.209,1.000,1.000,1.022,1.209,1.000,0,1,1,1,7,2020,2,183,3,1,1.000,1.000,1.000,1.000,1.022,1.022,1.022,1.022,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.061,1.064,1.171,1.233
1,0.960,1.031,0.960,0.997,0.988,1.161,0.960,1,1,0,2,7,2020,3,184,3,1,0.980,0.980,0.980,0.980,0.984,0.984,0.984,0.984,2.837,2.837,2.837,2.837,2.821,2.821,2.821,2.821,1.387,1.387,1.387,1.387,1.031,1.043,1.125,1.189
2,1.002,1.002,0.993,1.001,0.995,1.151,0.953,0,0,0,3,7,2020,4,185,3,1,0.982,0.982,0.982,0.982,0.983,0.983,0.983,0.983,1.115,1.115,1.115,1.115,1.098,1.098,1.098,1.098,1.889,1.889,1.889,1.889,1.008,1.023,1.122,1.179
3,0.997,1.168,0.991,0.999,1.032,1.168,0.942,1,0,1,6,7,2020,0,188,3,3,0.990,0.978,0.978,0.978,0.995,0.987,0.987,0.987,0.523,0.587,0.587,0.587,0.514,0.567,0.567,0.567,0.429,0.405,0.405,0.405,1.000,1.064,1.133,1.174
4,0.988,1.035,0.988,1.001,0.999,1.154,0.931,1,0,0,7,7,2020,1,189,3,1,0.989,0.973,0.973,0.973,0.997,0.980,0.980,0.980,1.334,1.204,1.204,1.204,1.341,1.190,1.190,1.190,1.491,1.226,1.226,1.226,0.990,1.081,1.147,1.167
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
882,1.018,1.019,0.996,1.000,1.001,1.030,0.710,0,0,0,16,1,2024,1,16,1,1,1.001,0.969,0.963,0.948,1.002,0.970,0.964,0.948,2.824,1.583,1.364,1.497,2.820,1.562,1.325,1.427,1.282,0.912,0.909,0.867,1.010,,,
883,0.995,1.005,0.977,0.996,0.999,1.026,0.707,1,0,0,17,1,2024,2,17,1,1,1.001,0.974,0.961,0.948,1.003,0.975,0.962,0.948,1.394,1.284,1.063,1.151,1.405,1.269,1.033,1.098,1.335,1.120,1.156,1.087,0.998,,,
884,1.005,1.010,0.983,1.002,0.994,1.032,0.711,0,0,0,18,1,2024,3,18,1,1,1.002,0.987,0.968,0.957,1.000,0.986,0.969,0.956,1.125,1.744,1.453,1.523,1.133,1.748,1.427,1.474,0.995,1.045,1.046,0.967,,,,
885,0.988,1.006,0.988,1.000,1.000,1.020,0.703,1,0,0,19,1,2024,4,19,1,1,0.994,0.985,0.960,0.949,0.992,0.984,0.960,0.949,1.441,2.504,1.988,2.151,1.428,2.473,1.923,2.050,1.378,1.739,1.712,1.609,,,,
