## Feature Selection

In [None]:
# Delete unwanted features from the dataset
drop_cols = ['TRIMA5', 'MOM20', 'HT_TRENDLINE', 'KAMA20', 'fastdsr', 'fastd', 'CCI5', 'SAR', 'ROC5', 'TRIMA20', 'MOM10', 'HT_DCPERIOD', 'Trange', 'PPO', 'ADX20', 'fastksr', 'KAMA30', 'ADX5', 'CCI10', 'slowd', 'TYPPRICE', 'CCI15', 'fastk', 'ULTOSC', 'ADX10', 'APO', 'slowk', 'BETA', 'WILLR', 'ATR', 'MOM15', 'TRIMA10', 'KAMA10', 'ROC10', 'ROC20']
hdfc_five_min_df = hdfc_five_min_df.drop(columns = drop_cols)
reliance_five_min_df = reliance_five_min_df.drop(columns = drop_cols)
sunpharma_five_min_df = sunpharma_five_min_df.drop(columns = drop_cols)

print(hdfc_five_min_df.columns)

Index(['date', 'open', 'high', 'low', 'close', 'volume', 'sma5', 'sma10',
       'sma15', 'sma20', 'ema5', 'ema10', 'ema15', 'ema20', 'upperband',
       'middleband', 'lowerband', 'macd510', 'macd520', 'macd1020', 'macd1520',
       'macd1226', 'RSI14', 'RSI8'],
      dtype='object')


## Deleting Timezone from Date

In [None]:
# Deleting the the time zone part from the date column
# HDFC Bank
hdfc_five_min_df['date'] = pd.to_datetime(hdfc_five_min_df['date'], format='%Y-%m-%d')
# dt.tz_convert method is a part of the datetime module and is used to convert the time zone of a datetime object to a different time zone.
hdfc_five_min_df['date'] = hdfc_five_min_df['date'].dt.tz_convert(None)
hdfc_five_min_df['date'] = hdfc_five_min_df.set_index('date', inplace=True)

# Reliance Industries
reliance_five_min_df['date'] = pd.to_datetime(reliance_five_min_df['date'], format='%Y-%m-%d')
# dt.tz_convert method is a part of the datetime module and is used to convert the time zone of a datetime object to a different time zone.
reliance_five_min_df['date'] = reliance_five_min_df['date'].dt.tz_convert(None)
reliance_five_min_df['date'] = reliance_five_min_df.set_index('date', inplace=True)

# Sun Pharmaceutical Industries
sunpharma_five_min_df['date'] = pd.to_datetime(sunpharma_five_min_df['date'], format='%Y-%m-%d')
# dt.tz_convert method is a part of the datetime module and is used to convert the time zone of a datetime object to a different time zone.
sunpharma_five_min_df['date'] = sunpharma_five_min_df['date'].dt.tz_convert(None)
sunpharma_five_min_df['date'] = sunpharma_five_min_df.set_index('date', inplace=True)

# data.set_index('date', inplace=True)
print(hdfc_five_min_df.head(1).index, reliance_five_min_df.head(1).index, sunpharma_five_min_df.head(1).index)

DatetimeIndex(['2015-02-02 09:00:00'], dtype='datetime64[ns]', name='date', freq=None) DatetimeIndex(['2015-02-02 09:00:00'], dtype='datetime64[ns]', name='date', freq=None) DatetimeIndex(['2015-02-02 09:00:00'], dtype='datetime64[ns]', name='date', freq=None)


# Data Manipulation

In [None]:
# Resampling data from 5 min intreval to 1 day interval

# Specify the columns you want to resample
columns_to_resample = ['open', 'high', 'low', 'close', 'volume', 'sma5', 'sma10',
                       'sma15', 'sma20', 'ema5', 'ema10', 'ema15', 'ema20',
                       'upperband', 'middleband', 'lowerband', 'macd510', 'macd520',
                       'macd1020', 'macd1520', 'macd1226', 'RSI14', 'RSI8']
columns_to_agg = {
    'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum', 'sma5': 'last', 'sma10': 'last', 'sma15': 'last', 'sma20': 'last',
    'ema5': 'last', 'ema10': 'last', 'ema15': 'last', 'ema20': 'last', 'upperband': 'last', 'middleband': 'last','lowerband': 'last','macd510': 'last',
    'macd520': 'last', 'macd1020': 'last', 'macd1520': 'last', 'macd1226': 'last', 'RSI14': 'last', 'RSI8': 'last'
}

# Resample the DataFrame to 1-day intervals (OHLCV data)
monthly_hdfc_data = hdfc_five_min_df[columns_to_resample].resample('M').agg(columns_to_agg)

# Resample the DataFrame to 1-day intervals (OHLCV data)
monthly_reliance_data = reliance_five_min_df[columns_to_resample].resample('M').agg(columns_to_agg)

# Resample the DataFrame to 1-day intervals (OHLCV data)
monthly_sunpharma_data = sunpharma_five_min_df[columns_to_resample].resample('M').agg(columns_to_agg)


print(monthly_hdfc_data.shape, monthly_reliance_data.shape, monthly_sunpharma_data.shape)

(85, 23) (85, 23) (85, 23)


## Add new features year and month

In [None]:
# Extracting year and month from column date and adding them as new features to the stock market dataframe.
monthly_hdfc_data['year'] = monthly_hdfc_data.index.year
monthly_hdfc_data['month'] = monthly_hdfc_data.index.month

monthly_reliance_data['year'] = monthly_reliance_data.index.year
monthly_reliance_data['month'] = monthly_reliance_data.index.month

monthly_sunpharma_data['year'] = monthly_sunpharma_data.index.year
monthly_sunpharma_data['month'] = monthly_sunpharma_data.index.month

print(monthly_hdfc_data.shape, monthly_reliance_data.shape, monthly_sunpharma_data.shape)

(85, 25) (85, 25) (85, 25)
