In [None]:
import tensorflow as tf
import pandas as pd
import yfinance as yf
import os
import numpy as np
from sklearn.model_selection import train_test_split


checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "my_ckpt")

In [None]:
data = yf.download(tickers='^NSEI', period='60d', interval='5m')
data

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-03-31 09:15:00+05:30,17210.349609,17262.550781,17209.449219,17246.300781,17246.300781,0
2023-03-31 09:20:00+05:30,17246.199219,17274.949219,17246.199219,17270.900391,17270.900391,0
2023-03-31 09:25:00+05:30,17270.750000,17276.300781,17255.199219,17269.900391,17269.900391,0
2023-03-31 09:30:00+05:30,17270.449219,17278.550781,17253.300781,17253.849609,17253.849609,0
2023-03-31 09:35:00+05:30,17254.300781,17268.599609,17254.300781,17263.500000,17263.500000,0
...,...,...,...,...,...,...
2023-06-27 15:05:00+05:30,18816.900391,18824.300781,18808.400391,18824.300781,18824.300781,0
2023-06-27 15:10:00+05:30,18824.849609,18829.150391,18819.550781,18824.800781,18824.800781,0
2023-06-27 15:15:00+05:30,18824.949219,18825.449219,18813.500000,18817.150391,18817.150391,0
2023-06-27 15:20:00+05:30,18815.800781,18819.199219,18811.099609,18817.199219,18817.199219,0


In [None]:
df = pd.DataFrame()
df['Close'] = (data['Close'].diff())*10000/data['Close']
# df['Target'] = df['Close'].shift(-1)
print(df)
df['index'] = range(len(df))
df.set_index('index', inplace=True)
df

                               Close
Datetime                            
2023-03-31 09:15:00+05:30        NaN
2023-03-31 09:20:00+05:30  14.243386
2023-03-31 09:25:00+05:30  -0.579042
2023-03-31 09:30:00+05:30  -9.302725
2023-03-31 09:35:00+05:30   5.590055
...                              ...
2023-06-27 15:05:00+05:30   4.064103
2023-06-27 15:10:00+05:30   0.265607
2023-06-27 15:15:00+05:30  -4.065648
2023-06-27 15:20:00+05:30   0.025949
2023-06-27 15:25:00+05:30   0.744152

[4425 rows x 1 columns]


Unnamed: 0_level_0,Close
index,Unnamed: 1_level_1
0,
1,14.243386
2,-0.579042
3,-9.302725
4,5.590055
...,...
4420,4.064103
4421,0.265607
4422,-4.065648
4423,0.025949


In [None]:
cdf = pd.DataFrame()
cdf = df.dropna()
cdf = cdf.astype(float)
cdf

Unnamed: 0_level_0,Close
index,Unnamed: 1_level_1
1,14.243386
2,-0.579042
3,-9.302725
4,5.590055
5,8.536744
...,...
4420,4.064103
4421,0.265607
4422,-4.065648
4423,0.025949


In [None]:
X_sequences = np.array(cdf.Close[0:cdf.size - 2])
X_sequences

array([14.24338559, -0.57904214, -9.30272468, ...,  4.06410348,
        0.26560706, -4.06564781])

In [None]:
y_sequences = np.array(cdf.Close[1:cdf.size - 1])
y_sequences

array([-0.57904214, -9.30272468,  5.59005452, ...,  0.26560706,
       -4.06564781,  0.02594867])

In [None]:
data = pd.DataFrame()
data['X'] = X_sequences
data['y'] = y_sequences
data

Unnamed: 0,X,y
0,14.243386,-0.579042
1,-0.579042,-9.302725
2,-9.302725,5.590055
3,5.590055,8.536744
4,8.536744,-2.055481
...,...,...
4417,-0.132998,10.283238
4418,10.283238,4.064103
4419,4.064103,0.265607
4420,0.265607,-4.065648


In [None]:
X = np.transpose([data['X']])
y = data['y']
np.shape(X)

(4422, 1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [None]:
from sklearn.svm import SVR
from sklearn import ensemble

In [None]:
regressor = SVR(kernel = 'rbf')
regressor.fit(X_train, y_train)

In [None]:
y_predict = regressor.predict(X_test)

In [None]:
from sklearn.metrics import mean_squared_error
score = regressor.score(X_test,y_test)
print("R-squared:", score)
print("MSE:", mean_squared_error(y_test, y_predict))

R-squared: -0.010468362475610116
MSE: 44.930826629310666


In [None]:
reg = ensemble.GradientBoostingRegressor(learning_rate=0.001, n_estimators=150)
reg.fit(X_train, y_train)

mse = mean_squared_error(y_test, reg.predict(X_test))
print("The mean squared error (MSE) on test set: {:.4f}".format(mse))

The mean squared error (MSE) on test set: 44.6156
