In [52]:
import pandas as pd
import numpy as np
import sys
sys.path.append('../src')


In [53]:
from features import (
    calculate_returns,
    calculate_volatility_features,
    calculate_liquidity_features,
    calculate_momentum_features,
    calculate_drawdown
)

# Merged dataset

In [54]:
df = pd.read_csv("../Data/Processed/cleaned_nse.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 69754 entries, 0 to 69753
Data columns (total 16 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Date            69754 non-null  object 
 1   Stock_code      69754 non-null  object 
 2   Name            69754 non-null  object 
 3   12m Low         69754 non-null  float64
 4   12m High        69754 non-null  float64
 5   Day Low         69754 non-null  float64
 6   Day High        69754 non-null  float64
 7   Day Price       69754 non-null  float64
 8   Previous        69754 non-null  float64
 9   Change          69754 non-null  float64
 10  %Change         69754 non-null  float64
 11  Volume          69754 non-null  float64
 12  Adjusted Price  69754 non-null  float64
 13  Sector          69754 non-null  object 
 14  Month           69754 non-null  int64  
 15  Year            69754 non-null  int64  
dtypes: float64(10), int64(2), object(4)
memory usage: 8.5+ MB


In [58]:
# Applying feature engineering to df

print("Calculating returns")
df = df.groupby('Stock_code', group_keys=False).apply(calculate_returns).reset_index(drop=True)

print("Calculating volatility")
df = df.groupby('Stock_code', group_keys=False).apply(calculate_volatility_features).reset_index(drop=True)

print("Calculating liquidity")
df = df.groupby('Stock_code', group_keys=False).apply(calculate_liquidity_features).reset_index(drop=True)

print("Calculating momentum")
df = df.groupby('Stock_code', group_keys=False).apply(calculate_momentum_features).reset_index(drop=True)

print("Calculating drawdown...")
df = df.groupby('Stock_code', group_keys=False).apply(calculate_drawdown).reset_index(drop=True)

print("\nAggregating to stock level")
features_list = []
for stock_code, group in df.groupby('Stock_code'):
    stock_features = aggregate_stock_features(group)
    if stock_features is not None:
        features_list.append(stock_features)

df_features = pd.DataFrame(features_list)

df_features.head()

Calculating returns


  df = df.groupby('Stock_code', group_keys=False).apply(calculate_returns).reset_index(drop=True)


Calculating volatility


  df = df.groupby('Stock_code', group_keys=False).apply(calculate_volatility_features).reset_index(drop=True)


Calculating liquidity


  df = df.groupby('Stock_code', group_keys=False).apply(calculate_liquidity_features).reset_index(drop=True)


Calculating momentum


  df = df.groupby('Stock_code', group_keys=False).apply(calculate_momentum_features).reset_index(drop=True)


Calculating drawdown...


  df = df.groupby('Stock_code', group_keys=False).apply(calculate_drawdown).reset_index(drop=True)



Aggregating to stock level


Unnamed: 0,Stock_code,Sector,Name,trading_days,total_days,trading_frequency,mean_return,std_return,volatility_7d,volatility_30d,max_drawdown,avg_volume,zero_volume_ratio,momentum_30d,current_price
0,ABSA,Banking,ABSA Bank Kenya Plc,988,988,1.0,0.000743,0.013638,0.011135,0.012541,-0.23622,457918.016194,0.0,0.160772,18.05
1,BAMB,Construction and Allied,Bamburi Cement Ltd,974,988,0.98583,0.000901,0.028007,0.021978,0.024157,-0.514563,233655.667351,0.01417,-0.17603,55.0
2,BAT,Manufacturing and Allied,British American Tobacco Kenya Plc,936,988,0.947368,0.000126,0.013762,0.010576,0.012213,-0.348232,19173.611111,0.052632,0.059155,376.0
3,BKG,Banking,BK Group Plc,592,988,0.59919,0.001923,0.038372,0.025778,0.028502,-0.3925,86221.114865,0.40081,-0.01214,32.55
4,BOC,Manufacturing and Allied,BOC Kenya Plc,402,988,0.406883,0.001758,0.042706,0.021544,0.026053,-0.302949,10246.268657,0.593117,-0.002809,88.75


In [59]:
df_features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 57 entries, 0 to 56
Data columns (total 15 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Stock_code         57 non-null     object 
 1   Sector             57 non-null     object 
 2   Name               57 non-null     object 
 3   trading_days       57 non-null     int64  
 4   total_days         57 non-null     int64  
 5   trading_frequency  57 non-null     float64
 6   mean_return        57 non-null     float64
 7   std_return         57 non-null     float64
 8   volatility_7d      57 non-null     float64
 9   volatility_30d     57 non-null     float64
 10  max_drawdown       57 non-null     float64
 11  avg_volume         57 non-null     float64
 12  zero_volume_ratio  57 non-null     float64
 13  momentum_30d       57 non-null     float64
 14  current_price      57 non-null     float64
dtypes: float64(10), int64(2), object(3)
memory usage: 6.8+ KB


In [60]:
df_features.to_csv('../data/processed/nse_features.csv', index=False)