## Imports

In [1]:
import numpy as np
import pandas as pd

import math,datetime
import time

import matplotlib.pyplot as plt

from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split 

In [2]:
dataset = pd.read_csv("datasets/NAS.csv", index_col='Date', parse_dates=True)
dataset.dropna() #drops NaN rows

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2003-12-18,19.482599,19.596901,19.025499,19.139799,19.139799,4978496.0
2003-12-19,19.368299,19.425501,18.282801,18.454201,18.454201,1410901.0
2003-12-22,18.739901,18.739901,17.997101,18.054300,18.054300,137047.0
2003-12-23,17.997101,17.997101,17.368700,17.425800,17.425800,229418.0
2003-12-29,17.425800,17.425800,16.854500,17.254400,17.254400,196206.0
...,...,...,...,...,...,...
2020-10-12,0.709000,0.710000,0.650000,0.676800,0.676800,48320475.0
2020-10-13,0.676800,0.676800,0.600000,0.600600,0.600600,47786200.0
2020-10-14,0.601000,0.640000,0.596200,0.626000,0.626000,37534949.0
2020-10-15,0.626000,0.626000,0.585000,0.605000,0.605000,26737615.0


In [3]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4253 entries, 2003-12-18 to 2020-10-16
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       4218 non-null   float64
 1   High       4218 non-null   float64
 2   Low        4218 non-null   float64
 3   Close      4218 non-null   float64
 4   Adj Close  4218 non-null   float64
 5   Volume     4218 non-null   float64
dtypes: float64(6)
memory usage: 232.6 KB


### Calculate change in percentage

In [4]:
#change in percentage
dataset['Open-Close']= (dataset.Close - dataset.Open) / dataset['Open']*100
dataset['High-Low']  = (dataset.High - dataset.Low) /dataset['Low']*100
X= dataset[['Open-Close', 'High-Low']]
X.dropna()
X.head()

Unnamed: 0_level_0,Open-Close,High-Low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2003-12-18,-1.759519,3.003348
2003-12-19,-4.719557,6.250136
2003-12-22,-3.658509,4.127331
2003-12-23,-3.174406,3.618008
2003-12-24,,


In [5]:
dataset = dataset[['Close','High-Low','Open-Close','Volume']]

In [6]:
forecast_Col = 'Close'
forecast_out = int(math.ceil(0.01*len(dataset)))
dataset['label'] = dataset[forecast_Col].shift(-forecast_out)
dataset.dropna(inplace=True)
print(dataset['label'])

Date
2003-12-18    16.2831
2003-12-19    16.4545
2003-12-22    16.5116
2003-12-23    15.7118
2003-12-29    16.1117
               ...   
2020-08-12     0.6768
2020-08-13     0.6006
2020-08-14     0.6260
2020-08-17     0.6050
2020-08-18     0.6106
Name: label, Length: 4146, dtype: float64


In [7]:
input = np.array(dataset.drop(['label'], axis = 1))
target = np.array(dataset['label'])
dataset.tail()

Unnamed: 0_level_0,Close,High-Low,Open-Close,Volume,label
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-08-12,1.44,6.142857,-0.689655,49840531.0,0.6768
2020-08-13,1.32,13.643411,-9.805261,55693758.0,0.6006
2020-08-14,1.251,9.362224,-5.227273,40517551.0,0.626
2020-08-17,1.3675,8.907363,7.719575,25817961.0,0.605
2020-08-18,1.3,8.649706,-6.340058,30726005.0,0.6106


### Splitting data to test and train

In [20]:
X_train, X_test, Y_train, Y_test = train_test_split(input,target, test_size=0.20)

In [21]:
np.shape(X_train)

(3316, 4)

In [57]:
startknn = time.time()

knn = KNeighborsRegressor(24)
knn.fit(X_train, Y_train)

endknn = time.time()

time_knn = endknn - startknn

print('Training Time:- {}'.format(time_knn))
print('Accuracy:- {}'.format(knn.score(X_test, Y_test)))


Training Time:- 0.004001140594482422
Accuracy:- 0.38682446099524226


### Accuracy ends up at around 39% with k=24

In [58]:
X = input[:-forecast_out]
X_old = input[-forecast_out:]

Forecast_set = knn.predict(X_old)
print(Forecast_set)

[ 1.69056667  1.788875    1.709975    2.13915     2.36087083  2.49949583
  9.15170404  9.15170404  2.37607917  2.40679583 13.90306237 10.60435404
 12.41306254 10.36899571 15.60331233  2.38449583 26.96590471 13.90306237
 10.37328737 10.48424571  2.40679583  1.73401667  1.788875   10.57995404
  2.37712083  2.30289167  2.40679583  2.49949583  2.49949583  2.26062917
 10.56078737  2.37482917  2.37607917  2.26062917  1.93170833  1.56746667
  1.69056667  1.63087917  1.87483333  1.74495833  1.88241667  2.40679583
  1.85523333]
