# 02 - Data Processing

In [1]:
import sys
sys.executable

'/usr/local/bin/python'

## Imports

In [2]:
import numpy as np
import pandas as pd

import constants as cnst
import stock_utils as su

pd.set_option('display.max_columns', None)

NSE_DATA_DIR = PosixPath('../data/NSE') | Valid: True
PROCESSED_DATA_DIR = PosixPath('../data/processed') | Valid: True


## Constants

In [3]:
ROLLING_WINDOWS = [3, 7, 15, 30]

stock_symbols = su.get_all_stock_symbols(
    cnst.NSE_DATA_DIR
)

stock_symbols

['HDFCBANK', 'ITBEES']

In [4]:
STOCK_SYMBOL = stock_symbols[0]
STOCK_SYMBOL

'HDFCBANK'

## Data loading

In [5]:
stock_df = pd.read_parquet(cnst.NSE_DATA_DIR.joinpath(STOCK_SYMBOL, "consolidated.parquet"))
stock_df = stock_df.drop(columns = ['series', 'PREV. CLOSE'])
stock_df.columns = [
    'Date',
    'Open', 'High', 'Low', 'LTP', 'Close',
    'VWAP', '52W H', '52W L',
    'Volume', 'Value', '#Trades'
]
stock_df

Unnamed: 0,Date,Open,High,Low,LTP,Close,VWAP,52W H,52W L,Volume,Value,#Trades
0,2020-01-01,1276.10,1280.00,1270.60,1279.00,1278.60,1276.64,2503.3,1084.00,1836849,2.345001e+09,46625
1,2020-01-02,1279.00,1288.00,1279.00,1286.00,1286.75,1284.56,2503.3,1084.00,3068583,3.941792e+09,104570
2,2020-01-03,1282.20,1285.00,1263.60,1268.50,1268.40,1270.48,2503.3,1084.00,5427775,6.895886e+09,157066
3,2020-01-06,1260.00,1261.80,1236.00,1240.25,1240.95,1247.24,2503.3,1084.00,5445093,6.791348e+09,155007
4,2020-01-07,1258.90,1271.45,1252.25,1261.00,1260.60,1261.48,2503.3,1084.00,7362247,9.287302e+09,189026
...,...,...,...,...,...,...,...,...,...,...,...,...
1004,2023-12-22,1683.60,1685.90,1667.10,1668.70,1670.85,1675.74,1757.5,1460.25,24289425,4.070271e+10,450284
1005,2023-12-26,1673.25,1685.95,1668.55,1684.10,1682.45,1678.46,1757.5,1460.25,9022928,1.514458e+10,243596
1006,2023-12-27,1681.50,1706.50,1678.60,1702.10,1703.30,1695.87,1757.5,1460.25,13504539,2.290188e+10,321373
1007,2023-12-28,1709.30,1721.40,1702.00,1708.85,1705.25,1711.26,1757.5,1460.25,22038235,3.771305e+10,423062


In [6]:
stock_df.info(memory_usage = "deep")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1009 entries, 0 to 1008
Data columns (total 12 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   Date     1009 non-null   datetime64[ns]
 1   Open     1009 non-null   float64       
 2   High     1009 non-null   float64       
 3   Low      1009 non-null   float64       
 4   LTP      1009 non-null   float64       
 5   Close    1009 non-null   float64       
 6   VWAP     1009 non-null   float64       
 7   52W H    1009 non-null   float64       
 8   52W L    1009 non-null   float64       
 9   Volume   1009 non-null   int64         
 10  Value    1009 non-null   float64       
 11  #Trades  1009 non-null   int64         
dtypes: datetime64[ns](1), float64(9), int64(2)
memory usage: 94.7 KB


## Feature engineering

### Daily candle based columns

In [7]:
stock_df['IsGreen'] = (stock_df['Close'] >= stock_df['Open']).astype(int)
stock_df['Is52WLow'] = np.isclose(stock_df['Low'], stock_df['52W L'], atol = 0, rtol = 1e-4).astype(int)
stock_df['Is52WHigh'] = np.isclose(stock_df['High'], stock_df['52W H'], atol = 0, rtol = 1e-4).astype(int)
stock_df.sample(10)

Unnamed: 0,Date,Open,High,Low,LTP,Close,VWAP,52W H,52W L,Volume,Value,#Trades,IsGreen,Is52WLow,Is52WHigh
232,2020-11-11,1399.9,1414.8,1370.5,1390.15,1389.95,1393.64,1414.8,738.75,18058097,25166450000.0,319672,0,0,1
822,2023-03-28,1565.7,1582.9,1564.1,1581.6,1580.2,1574.78,1722.1,1271.6,19638151,30925810000.0,195952,1,0,0
611,2022-05-25,1330.0,1335.95,1323.2,1327.5,1328.8,1329.41,1725.0,1278.3,6443556,8566145000.0,212264,0,0,0
652,2022-07-21,1360.55,1369.5,1358.1,1363.0,1360.75,1363.06,1725.0,1271.6,6320139,8614710000.0,158369,1,0,0
870,2023-06-09,1615.9,1618.5,1607.0,1609.5,1610.6,1613.79,1734.45,1271.6,9106289,14695640000.0,185235,0,0,0
144,2020-07-09,1118.0,1129.7,1107.25,1127.0,1124.95,1121.08,2474.6,738.75,10057717,11275520000.0,143088,1,0,0
68,2020-03-17,1008.0,1010.0,954.7,976.2,975.1,982.62,2503.3,919.2,21338641,20967840000.0,447922,0,0,0
833,2023-04-18,1670.0,1677.25,1656.4,1660.35,1659.6,1666.9,1720.0,1271.6,14818613,24701210000.0,198013,0,0,0
939,2023-09-18,1653.75,1655.0,1626.1,1629.65,1629.05,1636.5,1757.5,1365.0,18528144,30321220000.0,292658,0,0,0
699,2022-09-29,1396.3,1402.2,1378.8,1381.5,1382.35,1387.85,1725.0,1271.6,7205194,9999718000.0,258296,0,0,0


In [8]:
((stock_df[['IsGreen', 'Is52WLow', 'Is52WHigh']].sum() * 100) / len(stock_df)).round(2)

IsGreen      49.36
Is52WLow      1.49
Is52WHigh     2.58
dtype: float64

In [9]:
stock_df.shape

(1009, 15)

### Date based columns

In [10]:
stock_df['Day'] = stock_df['Date'].dt.day
stock_df['Month'] = stock_df['Date'].dt.month
stock_df['Year'] = stock_df['Date'].dt.year
stock_df['Weekday'] = stock_df['Date'].dt.weekday
stock_df['DayOfYear'] = stock_df['Date'].dt.day_of_year
stock_df['Quarter'] = stock_df['Date'].dt.quarter

stock_df.sample(10)

Unnamed: 0,Date,Open,High,Low,LTP,Close,VWAP,52W H,52W L,Volume,Value,#Trades,IsGreen,Is52WLow,Is52WHigh,Day,Month,Year,Weekday,DayOfYear,Quarter
700,2022-09-30,1378.8,1431.45,1365.0,1422.05,1421.35,1407.93,1725.0,1271.6,7890878,11109800000.0,181624,1,0,0,30,9,2022,4,273,3
71,2020-03-20,875.0,914.6,824.55,881.25,882.85,868.05,2503.3,795.0,44319015,38471080000.0,790631,1,0,0,20,3,2020,4,80,1
408,2021-07-29,1428.25,1429.95,1413.3,1421.75,1418.25,1422.4,1641.0,993.0,6851115,9745030000.0,174810,0,0,0,29,7,2021,3,210,3
97,2020-05-04,957.5,960.0,917.5,924.9,923.0,932.42,2503.3,738.75,13361177,12458170000.0,383166,0,0,0,4,5,2020,0,125,2
849,2023-05-11,1665.15,1688.0,1649.25,1655.0,1653.2,1656.04,1734.45,1271.6,17867309,29588950000.0,219658,0,0,0,11,5,2023,3,131,2
689,2022-09-15,1530.0,1540.95,1512.0,1520.5,1520.7,1525.41,1725.0,1271.6,6769686,10326530000.0,209664,0,0,0,15,9,2022,3,258,3
1,2020-01-02,1279.0,1288.0,1279.0,1286.0,1286.75,1284.56,2503.3,1084.0,3068583,3941792000.0,104570,1,0,0,2,1,2020,3,2,1
396,2021-07-12,1502.0,1502.0,1484.0,1487.3,1487.0,1492.97,1641.0,993.0,7229489,10793390000.0,160971,0,0,0,12,7,2021,0,193,3
122,2020-06-09,1020.0,1026.75,979.55,984.45,987.3,1004.5,2503.3,738.75,30922894,31062060000.0,417245,0,0,0,9,6,2020,1,161,2
111,2020-05-22,850.0,870.3,833.05,842.65,838.85,844.52,2503.3,738.75,21025849,17756810000.0,382888,0,0,0,22,5,2020,4,143,2


In [11]:
stock_df.shape

(1009, 21)

### Moving averages

#### `Close`

In [12]:
for window in ROLLING_WINDOWS:
    stock_df[f'Close {window}MA'] = stock_df['Close'].rolling(window = window, min_periods = 1).mean().round(2)

stock_df.filter(regex = "Close.*")

Unnamed: 0,Close,Close 3MA,Close 7MA,Close 15MA,Close 30MA
0,1278.60,1278.60,1278.60,1278.60,1278.60
1,1286.75,1282.68,1282.68,1282.68,1282.68
2,1268.40,1277.92,1277.92,1277.92,1277.92
3,1240.95,1265.37,1268.68,1268.68,1268.68
4,1260.60,1256.65,1267.06,1267.06,1267.06
...,...,...,...,...,...
1004,1670.85,1671.52,1661.41,1646.06,1582.67
1005,1682.45,1680.00,1666.02,1650.93,1589.04
1006,1703.30,1685.53,1672.70,1656.24,1595.81
1007,1705.25,1697.00,1679.78,1661.40,1603.03


#### `VWAP`

In [13]:
for window in ROLLING_WINDOWS:
    stock_df[f'VWAP {window}MA'] = stock_df['VWAP'].rolling(window = window, min_periods = 1).mean().round(2)

stock_df.filter(regex = "VWAP.*")

Unnamed: 0,VWAP,VWAP 3MA,VWAP 7MA,VWAP 15MA,VWAP 30MA
0,1276.64,1276.64,1276.64,1276.64,1276.64
1,1284.56,1280.60,1280.60,1280.60,1280.60
2,1270.48,1277.23,1277.23,1277.23,1277.23
3,1247.24,1267.43,1269.73,1269.73,1269.73
4,1261.48,1259.73,1268.08,1268.08,1268.08
...,...,...,...,...,...
1004,1675.74,1670.28,1660.89,1645.06,1581.69
1005,1678.46,1675.69,1664.81,1650.81,1588.08
1006,1695.87,1683.36,1670.60,1655.46,1594.63
1007,1711.26,1695.20,1678.24,1661.05,1601.98


#### `Volume`

In [14]:
for window in ROLLING_WINDOWS:
    stock_df[f'Volume {window}MA'] = stock_df['Volume'].rolling(window = window, min_periods = 1).mean().astype(int)

stock_df.filter(regex = "Volume.*")

Unnamed: 0,Volume,Volume 3MA,Volume 7MA,Volume 15MA,Volume 30MA
0,1836849,1836849,1836849,1836849,1836849
1,3068583,2452716,2452716,2452716,2452716
2,5427775,3444402,3444402,3444402,3444402
3,5445093,4647150,3944575,3944575,3944575
4,7362247,6078371,4628109,4628109,4628109
...,...,...,...,...,...
1004,24289425,19900101,23959521,20674469,16842909
1005,9022928,17202324,22736199,19587249,16910087
1006,13504539,15605630,14748481,18746524,17294389
1007,22038235,14855234,16610032,18928638,17685565


#### `Value`

In [15]:
for window in ROLLING_WINDOWS:
    stock_df[f'Value {window}MA'] = stock_df['Value'].rolling(window = window, min_periods = 1).mean().astype(int)

stock_df.filter(regex = "Value.*")

Unnamed: 0,Value,Value 3MA,Value 7MA,Value 15MA,Value 30MA
0,2.345001e+09,2345000988,2345000988,2345000988,2345000988
1,3.941792e+09,3143396262,3143396262,3143396262,3143396262
2,6.895886e+09,4394226092,4394226092,4394226092,4394226092
3,6.791348e+09,5876341707,4993506527,4993506527,4993506527
4,9.287302e+09,7658178376,5852265530,5852265530,5852265530
...,...,...,...,...,...
1004,4.070271e+10,33252816061,39779977233,34019236236,26928124028
1005,1.514458e+10,28817261735,37795582215,32339953679,27085677731
1006,2.290188e+10,26249724747,24651270134,31035709963,27750333500
1007,3.771305e+10,25253170191,27905665883,31455274908,28495473480


#### `#Trades`

In [16]:
for window in ROLLING_WINDOWS:
    stock_df[f'#Trades {window}MA'] = stock_df['#Trades'].rolling(window = window, min_periods = 1).mean().astype(int)

stock_df.filter(regex = "#Trades.*")

Unnamed: 0,#Trades,#Trades 3MA,#Trades 7MA,#Trades 15MA,#Trades 30MA
0,46625,46625,46625,46625,46625
1,104570,75597,75597,75597,75597
2,157066,102753,102753,102753,102753
3,155007,138881,115817,115817,115817
4,189026,167033,130458,130458,130458
...,...,...,...,...,...
1004,450284,417161,361824,361372,308156
1005,243596,359931,347304,348166,310241
1006,321373,338417,333095,343117,318093
1007,423062,329343,356645,348588,321565


### Saving processed data

In [17]:
stock_df.to_parquet(
    cnst.PROCESSED_DATA_DIR.joinpath(f'{STOCK_SYMBOL}-processed.parquet'), 
    index = False
)
stock_df

Unnamed: 0,Date,Open,High,Low,LTP,Close,VWAP,52W H,52W L,Volume,Value,#Trades,IsGreen,Is52WLow,Is52WHigh,Day,Month,Year,Weekday,DayOfYear,Quarter,Close 3MA,Close 7MA,Close 15MA,Close 30MA,VWAP 3MA,VWAP 7MA,VWAP 15MA,VWAP 30MA,Volume 3MA,Volume 7MA,Volume 15MA,Volume 30MA,Value 3MA,Value 7MA,Value 15MA,Value 30MA,#Trades 3MA,#Trades 7MA,#Trades 15MA,#Trades 30MA
0,2020-01-01,1276.10,1280.00,1270.60,1279.00,1278.60,1276.64,2503.3,1084.00,1836849,2.345001e+09,46625,1,0,0,1,1,2020,2,1,1,1278.60,1278.60,1278.60,1278.60,1276.64,1276.64,1276.64,1276.64,1836849,1836849,1836849,1836849,2345000988,2345000988,2345000988,2345000988,46625,46625,46625,46625
1,2020-01-02,1279.00,1288.00,1279.00,1286.00,1286.75,1284.56,2503.3,1084.00,3068583,3.941792e+09,104570,1,0,0,2,1,2020,3,2,1,1282.68,1282.68,1282.68,1282.68,1280.60,1280.60,1280.60,1280.60,2452716,2452716,2452716,2452716,3143396262,3143396262,3143396262,3143396262,75597,75597,75597,75597
2,2020-01-03,1282.20,1285.00,1263.60,1268.50,1268.40,1270.48,2503.3,1084.00,5427775,6.895886e+09,157066,0,0,0,3,1,2020,4,3,1,1277.92,1277.92,1277.92,1277.92,1277.23,1277.23,1277.23,1277.23,3444402,3444402,3444402,3444402,4394226092,4394226092,4394226092,4394226092,102753,102753,102753,102753
3,2020-01-06,1260.00,1261.80,1236.00,1240.25,1240.95,1247.24,2503.3,1084.00,5445093,6.791348e+09,155007,0,0,0,6,1,2020,0,6,1,1265.37,1268.68,1268.68,1268.68,1267.43,1269.73,1269.73,1269.73,4647150,3944575,3944575,3944575,5876341707,4993506527,4993506527,4993506527,138881,115817,115817,115817
4,2020-01-07,1258.90,1271.45,1252.25,1261.00,1260.60,1261.48,2503.3,1084.00,7362247,9.287302e+09,189026,1,0,0,7,1,2020,1,7,1,1256.65,1267.06,1267.06,1267.06,1259.73,1268.08,1268.08,1268.08,6078371,4628109,4628109,4628109,7658178376,5852265530,5852265530,5852265530,167033,130458,130458,130458
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1004,2023-12-22,1683.60,1685.90,1667.10,1668.70,1670.85,1675.74,1757.5,1460.25,24289425,4.070271e+10,450284,0,0,0,22,12,2023,4,356,4,1671.52,1661.41,1646.06,1582.67,1670.28,1660.89,1645.06,1581.69,19900101,23959521,20674469,16842909,33252816061,39779977233,34019236236,26928124028,417161,361824,361372,308156
1005,2023-12-26,1673.25,1685.95,1668.55,1684.10,1682.45,1678.46,1757.5,1460.25,9022928,1.514458e+10,243596,1,0,0,26,12,2023,1,360,4,1680.00,1666.02,1650.93,1589.04,1675.69,1664.81,1650.81,1588.08,17202324,22736199,19587249,16910087,28817261735,37795582215,32339953679,27085677731,359931,347304,348166,310241
1006,2023-12-27,1681.50,1706.50,1678.60,1702.10,1703.30,1695.87,1757.5,1460.25,13504539,2.290188e+10,321373,1,0,0,27,12,2023,2,361,4,1685.53,1672.70,1656.24,1595.81,1683.36,1670.60,1655.46,1594.63,15605630,14748481,18746524,17294389,26249724747,24651270134,31035709963,27750333500,338417,333095,343117,318093
1007,2023-12-28,1709.30,1721.40,1702.00,1708.85,1705.25,1711.26,1757.5,1460.25,22038235,3.771305e+10,423062,0,0,0,28,12,2023,3,362,4,1697.00,1679.78,1661.40,1603.03,1695.20,1678.24,1661.05,1601.98,14855234,16610032,18928638,17685565,25253170191,27905665883,31455274908,28495473480,329343,356645,348588,321565


## Data standardization

### Stock price based columns

In [18]:
stock_price_cols = [
    'Open', 'High', 'Low', 'LTP', '52W H', '52W L'
] + (
    stock_df.filter(regex = "Close.*").columns.to_list() +
    stock_df.filter(regex = "VWAP.*").columns.to_list()
)

stock_price_cols

['Open',
 'High',
 'Low',
 'LTP',
 '52W H',
 '52W L',
 'Close',
 'Close 3MA',
 'Close 7MA',
 'Close 15MA',
 'Close 30MA',
 'VWAP',
 'VWAP 3MA',
 'VWAP 7MA',
 'VWAP 15MA',
 'VWAP 30MA']

In [19]:
stock_df[stock_price_cols] = stock_df[stock_price_cols].div(stock_df['Close'], axis = 0).round(3)
stock_df[stock_price_cols]

Unnamed: 0,Open,High,Low,LTP,52W H,52W L,Close,Close 3MA,Close 7MA,Close 15MA,Close 30MA,VWAP,VWAP 3MA,VWAP 7MA,VWAP 15MA,VWAP 30MA
0,0.998,1.001,0.994,1.000,1.958,0.848,1.0,1.000,1.000,1.000,1.000,0.998,0.998,0.998,0.998,0.998
1,0.994,1.001,0.994,0.999,1.945,0.842,1.0,0.997,0.997,0.997,0.997,0.998,0.995,0.995,0.995,0.995
2,1.011,1.013,0.996,1.000,1.974,0.855,1.0,1.008,1.008,1.008,1.008,1.002,1.007,1.007,1.007,1.007
3,1.015,1.017,0.996,0.999,2.017,0.874,1.0,1.020,1.022,1.022,1.022,1.005,1.021,1.023,1.023,1.023
4,0.999,1.009,0.993,1.000,1.986,0.860,1.0,0.997,1.005,1.005,1.005,1.001,0.999,1.006,1.006,1.006
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1004,1.008,1.009,0.998,0.999,1.052,0.874,1.0,1.000,0.994,0.985,0.947,1.003,1.000,0.994,0.985,0.947
1005,0.995,1.002,0.992,1.001,1.045,0.868,1.0,0.999,0.990,0.981,0.944,0.998,0.996,0.990,0.981,0.944
1006,0.987,1.002,0.985,0.999,1.032,0.857,1.0,0.990,0.982,0.972,0.937,0.996,0.988,0.981,0.972,0.936
1007,1.002,1.009,0.998,1.002,1.031,0.856,1.0,0.995,0.985,0.974,0.940,1.004,0.994,0.984,0.974,0.939


### Volume based columns

In [20]:
volume_cols = stock_df.filter(regex = "Volume.*").columns.to_list()

volume_cols

['Volume', 'Volume 3MA', 'Volume 7MA', 'Volume 15MA', 'Volume 30MA']

In [21]:
stock_df[volume_cols] = stock_df[volume_cols].div(stock_df['Volume'], axis = 0).round(3)
stock_df[volume_cols]

Unnamed: 0,Volume,Volume 3MA,Volume 7MA,Volume 15MA,Volume 30MA
0,1.0,1.000,1.000,1.000,1.000
1,1.0,0.799,0.799,0.799,0.799
2,1.0,0.635,0.635,0.635,0.635
3,1.0,0.853,0.724,0.724,0.724
4,1.0,0.826,0.629,0.629,0.629
...,...,...,...,...,...
1004,1.0,0.819,0.986,0.851,0.693
1005,1.0,1.907,2.520,2.171,1.874
1006,1.0,1.156,1.092,1.388,1.281
1007,1.0,0.674,0.754,0.859,0.802


### Value based columns

In [22]:
value_cols = stock_df.filter(regex = "Value.*").columns.to_list()

value_cols

['Value', 'Value 3MA', 'Value 7MA', 'Value 15MA', 'Value 30MA']

In [23]:
stock_df[value_cols] = stock_df[value_cols].div(stock_df['Value'], axis = 0).round(3)
stock_df[value_cols]

Unnamed: 0,Value,Value 3MA,Value 7MA,Value 15MA,Value 30MA
0,1.0,1.000,1.000,1.000,1.000
1,1.0,0.797,0.797,0.797,0.797
2,1.0,0.637,0.637,0.637,0.637
3,1.0,0.865,0.735,0.735,0.735
4,1.0,0.825,0.630,0.630,0.630
...,...,...,...,...,...
1004,1.0,0.817,0.977,0.836,0.662
1005,1.0,1.903,2.496,2.135,1.788
1006,1.0,1.146,1.076,1.355,1.212
1007,1.0,0.670,0.740,0.834,0.756


### Trade count based columns

In [24]:
trade_count_cols = stock_df.filter(regex = "#Trades.*").columns.to_list()

trade_count_cols

['#Trades', '#Trades 3MA', '#Trades 7MA', '#Trades 15MA', '#Trades 30MA']

In [25]:
stock_df[trade_count_cols] = stock_df[trade_count_cols].div(stock_df['#Trades'], axis = 0).round(3)
stock_df[trade_count_cols]

Unnamed: 0,#Trades,#Trades 3MA,#Trades 7MA,#Trades 15MA,#Trades 30MA
0,1.0,1.000,1.000,1.000,1.000
1,1.0,0.723,0.723,0.723,0.723
2,1.0,0.654,0.654,0.654,0.654
3,1.0,0.896,0.747,0.747,0.747
4,1.0,0.884,0.690,0.690,0.690
...,...,...,...,...,...
1004,1.0,0.926,0.804,0.803,0.684
1005,1.0,1.478,1.426,1.429,1.274
1006,1.0,1.053,1.036,1.068,0.990
1007,1.0,0.778,0.843,0.824,0.760


### Dropping unnecessary columns

In [26]:
stock_df = stock_df.drop(columns = ['Date', 'Close', 'Volume', 'Value', '#Trades'])
stock_df

Unnamed: 0,Open,High,Low,LTP,VWAP,52W H,52W L,IsGreen,Is52WLow,Is52WHigh,Day,Month,Year,Weekday,DayOfYear,Quarter,Close 3MA,Close 7MA,Close 15MA,Close 30MA,VWAP 3MA,VWAP 7MA,VWAP 15MA,VWAP 30MA,Volume 3MA,Volume 7MA,Volume 15MA,Volume 30MA,Value 3MA,Value 7MA,Value 15MA,Value 30MA,#Trades 3MA,#Trades 7MA,#Trades 15MA,#Trades 30MA
0,0.998,1.001,0.994,1.000,0.998,1.958,0.848,1,0,0,1,1,2020,2,1,1,1.000,1.000,1.000,1.000,0.998,0.998,0.998,0.998,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000
1,0.994,1.001,0.994,0.999,0.998,1.945,0.842,1,0,0,2,1,2020,3,2,1,0.997,0.997,0.997,0.997,0.995,0.995,0.995,0.995,0.799,0.799,0.799,0.799,0.797,0.797,0.797,0.797,0.723,0.723,0.723,0.723
2,1.011,1.013,0.996,1.000,1.002,1.974,0.855,0,0,0,3,1,2020,4,3,1,1.008,1.008,1.008,1.008,1.007,1.007,1.007,1.007,0.635,0.635,0.635,0.635,0.637,0.637,0.637,0.637,0.654,0.654,0.654,0.654
3,1.015,1.017,0.996,0.999,1.005,2.017,0.874,0,0,0,6,1,2020,0,6,1,1.020,1.022,1.022,1.022,1.021,1.023,1.023,1.023,0.853,0.724,0.724,0.724,0.865,0.735,0.735,0.735,0.896,0.747,0.747,0.747
4,0.999,1.009,0.993,1.000,1.001,1.986,0.860,1,0,0,7,1,2020,1,7,1,0.997,1.005,1.005,1.005,0.999,1.006,1.006,1.006,0.826,0.629,0.629,0.629,0.825,0.630,0.630,0.630,0.884,0.690,0.690,0.690
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1004,1.008,1.009,0.998,0.999,1.003,1.052,0.874,0,0,0,22,12,2023,4,356,4,1.000,0.994,0.985,0.947,1.000,0.994,0.985,0.947,0.819,0.986,0.851,0.693,0.817,0.977,0.836,0.662,0.926,0.804,0.803,0.684
1005,0.995,1.002,0.992,1.001,0.998,1.045,0.868,1,0,0,26,12,2023,1,360,4,0.999,0.990,0.981,0.944,0.996,0.990,0.981,0.944,1.907,2.520,2.171,1.874,1.903,2.496,2.135,1.788,1.478,1.426,1.429,1.274
1006,0.987,1.002,0.985,0.999,0.996,1.032,0.857,1,0,0,27,12,2023,2,361,4,0.990,0.982,0.972,0.937,0.988,0.981,0.972,0.936,1.156,1.092,1.388,1.281,1.146,1.076,1.355,1.212,1.053,1.036,1.068,0.990
1007,1.002,1.009,0.998,1.002,1.004,1.031,0.856,0,0,0,28,12,2023,3,362,4,0.995,0.985,0.974,0.940,0.994,0.984,0.974,0.939,0.674,0.754,0.859,0.802,0.670,0.740,0.834,0.756,0.778,0.843,0.824,0.760


### Saving standardized data

In [27]:
stock_df.to_parquet(
    cnst.PROCESSED_DATA_DIR.joinpath(f'{STOCK_SYMBOL}-standardized.parquet'), 
    index = False
)
stock_df

Unnamed: 0,Open,High,Low,LTP,VWAP,52W H,52W L,IsGreen,Is52WLow,Is52WHigh,Day,Month,Year,Weekday,DayOfYear,Quarter,Close 3MA,Close 7MA,Close 15MA,Close 30MA,VWAP 3MA,VWAP 7MA,VWAP 15MA,VWAP 30MA,Volume 3MA,Volume 7MA,Volume 15MA,Volume 30MA,Value 3MA,Value 7MA,Value 15MA,Value 30MA,#Trades 3MA,#Trades 7MA,#Trades 15MA,#Trades 30MA
0,0.998,1.001,0.994,1.000,0.998,1.958,0.848,1,0,0,1,1,2020,2,1,1,1.000,1.000,1.000,1.000,0.998,0.998,0.998,0.998,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000
1,0.994,1.001,0.994,0.999,0.998,1.945,0.842,1,0,0,2,1,2020,3,2,1,0.997,0.997,0.997,0.997,0.995,0.995,0.995,0.995,0.799,0.799,0.799,0.799,0.797,0.797,0.797,0.797,0.723,0.723,0.723,0.723
2,1.011,1.013,0.996,1.000,1.002,1.974,0.855,0,0,0,3,1,2020,4,3,1,1.008,1.008,1.008,1.008,1.007,1.007,1.007,1.007,0.635,0.635,0.635,0.635,0.637,0.637,0.637,0.637,0.654,0.654,0.654,0.654
3,1.015,1.017,0.996,0.999,1.005,2.017,0.874,0,0,0,6,1,2020,0,6,1,1.020,1.022,1.022,1.022,1.021,1.023,1.023,1.023,0.853,0.724,0.724,0.724,0.865,0.735,0.735,0.735,0.896,0.747,0.747,0.747
4,0.999,1.009,0.993,1.000,1.001,1.986,0.860,1,0,0,7,1,2020,1,7,1,0.997,1.005,1.005,1.005,0.999,1.006,1.006,1.006,0.826,0.629,0.629,0.629,0.825,0.630,0.630,0.630,0.884,0.690,0.690,0.690
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1004,1.008,1.009,0.998,0.999,1.003,1.052,0.874,0,0,0,22,12,2023,4,356,4,1.000,0.994,0.985,0.947,1.000,0.994,0.985,0.947,0.819,0.986,0.851,0.693,0.817,0.977,0.836,0.662,0.926,0.804,0.803,0.684
1005,0.995,1.002,0.992,1.001,0.998,1.045,0.868,1,0,0,26,12,2023,1,360,4,0.999,0.990,0.981,0.944,0.996,0.990,0.981,0.944,1.907,2.520,2.171,1.874,1.903,2.496,2.135,1.788,1.478,1.426,1.429,1.274
1006,0.987,1.002,0.985,0.999,0.996,1.032,0.857,1,0,0,27,12,2023,2,361,4,0.990,0.982,0.972,0.937,0.988,0.981,0.972,0.936,1.156,1.092,1.388,1.281,1.146,1.076,1.355,1.212,1.053,1.036,1.068,0.990
1007,1.002,1.009,0.998,1.002,1.004,1.031,0.856,0,0,0,28,12,2023,3,362,4,0.995,0.985,0.974,0.940,0.994,0.984,0.974,0.939,0.674,0.754,0.859,0.802,0.670,0.740,0.834,0.756,0.778,0.843,0.824,0.760
