In [36]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [37]:
# load time series dataset
series = pd.read_csv('https://raw.githubusercontent.com/dhamvi01/Univariate-Time-Series-using-LSTM/master/airline-passengers.csv', header=0, index_col=0)
print(series)

         Passengers
Month              
1949-01         112
1949-02         118
1949-03         132
1949-04         129
1949-05         121
...             ...
1960-08         606
1960-09         508
1960-10         461
1960-11         390
1960-12         432

[144 rows x 1 columns]


In [38]:
# convert series to NumPy array
data = series.to_numpy()

# print the shape of the array
print(data.shape)


(144, 1)


In [39]:
# Specify subsequence length and number of subsequences
n_steps = 3
n_samples = len(data) - n_steps + 1

# Split data into input and output variables
X = np.zeros((n_samples, n_steps))
y = np.zeros(n_samples)
for i in range(n_samples):
    X[i] = data[i:i+n_steps].reshape(-1)
    y[i] = data[i+n_steps-1]

In [40]:
print(X)

[[112. 118. 132.]
 [118. 132. 129.]
 [132. 129. 121.]
 [129. 121. 135.]
 [121. 135. 148.]
 [135. 148. 148.]
 [148. 148. 136.]
 [148. 136. 119.]
 [136. 119. 104.]
 [119. 104. 118.]
 [104. 118. 115.]
 [118. 115. 126.]
 [115. 126. 141.]
 [126. 141. 135.]
 [141. 135. 125.]
 [135. 125. 149.]
 [125. 149. 170.]
 [149. 170. 170.]
 [170. 170. 158.]
 [170. 158. 133.]
 [158. 133. 114.]
 [133. 114. 140.]
 [114. 140. 145.]
 [140. 145. 150.]
 [145. 150. 178.]
 [150. 178. 163.]
 [178. 163. 172.]
 [163. 172. 178.]
 [172. 178. 199.]
 [178. 199. 199.]
 [199. 199. 184.]
 [199. 184. 162.]
 [184. 162. 146.]
 [162. 146. 166.]
 [146. 166. 171.]
 [166. 171. 180.]
 [171. 180. 193.]
 [180. 193. 181.]
 [193. 181. 183.]
 [181. 183. 218.]
 [183. 218. 230.]
 [218. 230. 242.]
 [230. 242. 209.]
 [242. 209. 191.]
 [209. 191. 172.]
 [191. 172. 194.]
 [172. 194. 196.]
 [194. 196. 196.]
 [196. 196. 236.]
 [196. 236. 235.]
 [236. 235. 229.]
 [235. 229. 243.]
 [229. 243. 264.]
 [243. 264. 272.]
 [264. 272. 237.]
 [272. 237

In [41]:
# Scale the input variables using MinMaxScaler
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
train_size = int(0.67 * len(X_scaled))
X_train, X_test = X_scaled[:train_size], X_scaled[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

In [42]:
# Train the KNN model on the training data
k = 3  # number of neighbors
knn = KNeighborsRegressor(n_neighbors=k)
knn.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = knn.predict(X_test)

In [43]:
print(y_test)

[301. 356. 348. 355. 422. 465. 467. 404. 347. 305. 336. 340. 318. 362.
 348. 363. 435. 491. 505. 404. 359. 310. 337. 360. 342. 406. 396. 420.
 472. 548. 559. 463. 407. 362. 405. 417. 391. 419. 461. 472. 535. 622.
 606. 508. 461. 390. 432.]


In [44]:
# Calculate and print the MSE
mse = mean_squared_error(y_test, y_pred)
print("MSE: ", mse)

MSE:  6225.01891252955
