In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import StratifiedKFold
import xgboost as xgb

from sklearn.model_selection import cross_validate
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.model_selection import RandomizedSearchCV,GridSearchCV

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
data = pd.read_csv("../input/national-stock-exchange-banking-sectors/NSE_BANKING_SECTOR.csv")
data

In [None]:
data['DATE']
# 2016-01-01 --- 2021-05-28

In [None]:
data['SYMBOL'].unique()

In [None]:
data1=data[data['SYMBOL']=='HDFC']

In [None]:
data1=data1.drop(['SYMBOL','SERIES'],axis=1)

In [None]:
n=len(data1)
print(n)

In [None]:
t=0
for item in data1['DATE']:
    if item[0:4]=='2021':
        t+=1
print(t) 

In [None]:
testx=data1[n-99:]
testx

In [None]:
def dummy_date(df):
    df["year"] = df["DATE"].apply(lambda x: x.split("-")[0])
    df["month"] = df["DATE"].apply(lambda x: x.split("-")[1])
    df["day"] = df["DATE"].apply(lambda x: x.split("-")[2])
    df.drop("DATE",inplace=True,axis=1)
    return df

def LabelEncord_categorical(df):
    categorical_params = ["year","month","day"]
    for params in categorical_params:
        le = LabelEncoder()
        df[params] = le.fit_transform(df[params])
    return df

def dummies(df):
    categorical_params = ["year","month","day"]
    for params in categorical_params:
        dummies =  pd.get_dummies(df[params])
        df = pd.concat([df, dummies],axis=1)
    return df

def pre_processing(df):
    df = dummy_date(df)
    df = LabelEncord_categorical(df)
    #df = dummies(df)
    return df

data2 = pre_processing(data1)

In [None]:
data2.info()

In [None]:
train=data2[0:n-99]
test=data2[n-99:]

In [None]:
train

In [None]:
y_train = train["CLOSE"].values
X_train = train.drop("CLOSE",axis=1).values
y_test = test["CLOSE"].values
X_test =test.drop("CLOSE",axis=1).values

### GridSearch for searching best hyperparameter   

In [None]:
gbm = xgb.XGBRegressor()
reg_cv = GridSearchCV(gbm, {"colsample_bytree":[1.0],"min_child_weight":[1.0,1.2],
                            'max_depth': [3,4,6], 'n_estimators': [500,1000]}, verbose=1)
reg_cv.fit(X_train,y_train)

In [None]:
reg_cv.best_params_

### Train data using XGBRegressor with best parameter

In [None]:
gbm = xgb.XGBRegressor(**reg_cv.best_params_)
gbm.fit(X_train,y_train)

### Predict

In [None]:
predictions = gbm.predict(X_test)

In [None]:
gbm.score(X_test,y_test)

In [None]:
gbm.score(X_train,y_train)

In [None]:
testx['pred_close']=predictions
testx2=testx[['DATE','CLOSE','pred_close']]
testx2

In [None]:
fig=make_subplots(specs=[[{"secondary_y":False}]])
fig.add_trace(go.Scatter(x=testx2['DATE'],y=testx2['CLOSE'],name="real close values"),secondary_y=False,)
fig.add_trace(go.Scatter(x=testx2['DATE'],y=testx2['pred_close'],name="predicted close values"),secondary_y=False,)

fig.update_layout(autosize=False,width=700,height=500,title_text="HDFC predicted Close values compared to real Close values")
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="Value",secondary_y=False)
fig.show()