In [9]:
from google.colab import drive
drive.mount('/content/drive')

ValueError: Mountpoint must not already contain files

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [None]:
import pandas as pd

# Đọc dữ liệu từ Google Drive
df = pd.read_csv("/content/drive/MyDrive/IS403/DoAnCuoiKy/Dataset/VIX_Cleaned.csv")
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values('Date').reset_index(drop=True)

# Tạo các đặc trưng đầu vào và mục tiêu
features = ['Open', 'High', 'Low', 'Volume']
target = 'Close'
X = df[features]
y = df[target]
dates = df['Date']



# Huấn luyện KNN và Biểu đồ

In [None]:
def train_evaluate_knn(train_ratio, val_ratio, test_ratio, X, y, dates, n_neighbors=5):
    # Xác định số lượng mẫu
    n = len(X)
    n_train = int(n * train_ratio)
    n_val = int(n * val_ratio)

    # Tách dữ liệu
    X_train = X[:n_train]
    X_val = X[n_train:n_train + n_val]
    X_test = X[n_train + n_val:]

    y_train = y[:n_train]
    y_val = y[n_train:n_train + n_val]
    y_test = y[n_train + n_val:]

    date_train = dates[:n_train]
    date_val = dates[n_train:n_train + n_val]
    date_test = dates[n_train + n_val:]

    # Chuẩn hóa dữ liệu
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()

    X_train_scaled = scaler_X.fit_transform(X_train)
    X_val_scaled = scaler_X.transform(X_val)
    X_test_scaled = scaler_X.transform(X_test)

    y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1)).ravel()

    # Huấn luyện mô hình KNN
    model = KNeighborsRegressor(n_neighbors=n_neighbors)
    model.fit(X_train_scaled, y_train_scaled)

    # Dự đoán
    y_val_pred_scaled = model.predict(X_val_scaled)
    y_test_pred_scaled = model.predict(X_test_scaled)

    y_val_pred = scaler_y.inverse_transform(y_val_pred_scaled.reshape(-1, 1)).ravel()
    y_test_pred = scaler_y.inverse_transform(y_test_pred_scaled.reshape(-1, 1)).ravel()

    # Đánh giá trên tập test
    mse = mean_squared_error(y_test, y_test_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_test_pred)
    r2 = r2_score(y_test, y_test_pred)
    mape = np.mean(np.abs((y_test - y_test_pred) / y_test)) * 100

    # In kết quả
    print(f"KNN ({int(train_ratio*100)}/{int(val_ratio*100)}/{int(test_ratio*100)}):")
    print(f"  - MSE   = {mse:.4f}")
    print(f"  - RMSE  = {rmse:.4f}")
    print(f"  - MAE   = {mae:.4f}")
    print(f"  - MAPE  = {mape:.2f}%")
    print(f"  - R2    = {r2:.4f}")

    # Biểu đồ kết quả
    plt.figure(figsize=(15, 6))
    plt.plot(date_train, y_train, label='Train True', color='blue', alpha=0.7)
    plt.plot(date_val, y_val, label='Val True', color='green')
    plt.plot(date_test, y_test, label='Test True', color='red')
    plt.plot(date_test, y_test_pred, label='Test Predicted', color='orange', linestyle='--')

    plt.title(f'KNN Regression - Split {int(train_ratio*100)}/{int(val_ratio*100)}/{int(test_ratio*100)}')
    plt.xlabel('Date')
    plt.ylabel('Close Price')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

    return model, scaler_X, scaler_y

In [None]:
model_75, scX_75, scy_75 = train_evaluate_knn(0.75, 0.10,0.15 , X, y, dates)

In [None]:
model_70, scX_70, scy_70 = train_evaluate_knn(0.70, 0.10, 0.20, X, y, dates)

In [None]:
model_65, scX_65, scy_65 = train_evaluate_knn(0.65, 0.10, 0.25, X, y, dates)

# Dự đoán 30 ngày tiếp theo

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

def predict_next_days(model, scaler_X, scaler_y, last_X_df, start_date, num_days=30):
    predictions = []
    current_X = last_X_df.copy()

    for _ in range(num_days):
        current_X_scaled = scaler_X.transform(current_X)
        y_pred_scaled = model.predict(current_X_scaled)
        y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()[0]

        predictions.append(y_pred)

        # Với KNN, không cập nhật current_X vì không có đặc trưng thời gian

    # Tạo mảng ngày tương lai
    future_dates = pd.date_range(start=start_date + pd.Timedelta(days=1), periods=num_days)

    # Vẽ biểu đồ
    plt.figure(figsize=(12, 5))
    plt.plot(future_dates, predictions, linestyle='-', color='blue', label='Predicted')
    plt.title('30-Day Forecast Using KNN (65/10/25)')
    plt.xlabel('Date')
    plt.ylabel('Predicted Resale Price (INR)')
    plt.grid(True, alpha=0.3)
    plt.legend()
    plt.tight_layout()
    plt.show()

    # Trả về DataFrame kết quả
    result_df = pd.DataFrame({'Date': future_dates, 'Predicted Price (INR)': predictions})
    return result_df

# Chuẩn bị đầu vào cho dự đoán
last_X_sample_df = pd.DataFrame([X.iloc[-1].values], columns=X.columns)
last_date = dates.iloc[-1]

# Dự đoán 30 ngày
future_result_df = predict_next_days(model_65, scX_65, scy_65, last_X_sample_df, last_date)


In [None]:
!pip install nbconvert

In [None]:
!jupyter nbconvert --to html "/content/drive/MyDrive/IS403/DoAnCuoiKy/Code/KNN/KNN.ipynb" --output-dir="/content/drive/MyDrive/IS403/DoAnCuoiKy/Code/KNN"