# Time Series Forecasting with *scikit-learn* 

In [42]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import plotly.express as pe
from sklearn.model_selection import train_test_split
import xgboost as xgb

## 1. Get the data 

In [25]:
df = pd.read_csv("/home/harshit/Desktop/OFSSAIBanking/AAPL(2).csv", index_col='Date', parse_dates=True, usecols=['Date', 'Close'])
df

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2022-01-12,175.529999
2022-01-13,172.190002
2022-01-14,173.070007
2022-01-18,169.800003
2022-01-19,166.229996
...,...
2023-01-05,125.019997
2023-01-06,129.619995
2023-01-09,130.149994
2023-01-10,130.729996


In [26]:
pe.line(y='Close', data_frame=df, title='Closing price value',height=720, width=1280)

# Feature engineering

In [27]:
df['Month'] = df.index.month
df['WeekDay'] = df.index.dayofweek
df['Quarter'] = df.index.quarter
df['Year'] = df.index.year
df['DayOfYear'] = df.index.dayofyear

In [28]:
df.sample(5)

Unnamed: 0_level_0,Close,Month,WeekDay,Quarter,Year,DayOfYear
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-09-20,156.899994,9,1,3,2022,263
2022-05-24,140.360001,5,1,2,2022,144
2022-09-14,155.309998,9,2,3,2022,257
2022-06-28,137.440002,6,1,2,2022,179
2022-10-10,140.419998,10,0,4,2022,283


In [49]:
df.columns

Index(['Close', 'Month', 'WeekDay', 'Quarter', 'Year', 'DayOfYear'], dtype='object')

# Visualize the relationship

In [36]:
for col in ['Month', 'WeekDay', 'Quarter', 'Year', 'DayOfYear']:
    display(  pe.strip(y='Close', x=col, data_frame=df) )

# Create the model

In [None]:
features = ['Month', 'WeekDay', 'Quarter', 'Year', 'DayOfYear']
target = ['Close']

In [None]:
x_train, x_test, y_train, y_test = train_test_split(df[features], df[target],test_size=0.2, random_state=10)

In [45]:
reg_model = xgb.XGBRegressor(n_estimators=1000, early_stopping_rounds=50)

In [48]:
reg_model.fit(x_train, y_train, eval_set=[    (x_train, y_train) ,(x_test, y_test)            ], verbose=20)

[0]	validation_0-rmse:108.03933	validation_1-rmse:108.83288
[20]	validation_0-rmse:1.13632	validation_1-rmse:3.32321
[40]	validation_0-rmse:0.49880	validation_1-rmse:3.35123
[60]	validation_0-rmse:0.25907	validation_1-rmse:3.36733
[74]	validation_0-rmse:0.19187	validation_1-rmse:3.38266


In [52]:
prediction_df = pd.read_csv("/home/harshit/Desktop/Learning/Python/forecast_prediction_data.csv")
prediction_df

Unnamed: 0,Month,WeekDay,Quarter,Year,DayOfYear
0,8,3,3,2023,204
1,12,2,4,2023,355
2,11,4,4,2023,332


In [53]:
reg_model.predict(prediction_df)

array([150.06462, 134.43762, 145.67921], dtype=float32)