In [1]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [None]:


# Load time series data
X= pd.read_csv('data/train.csv')

# Define hyperparameter grid
param_grid = {
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
}

# Define decision tree model
model = DecisionTreeRegressor(random_state=42)

# Perform hyperparameter tuning
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_squared_error', verbose=2)
grid_search.fit(X, y)

# Print best hyperparameters
print('Best hyperparameters:', grid_search.best_params_)

# Define sliding window function
def sliding_window(X, window_size):
    X_windows = []
    for i in range(len(X) - window_size + 1):
        X_windows.append(X[i:i+window_size])
    return np.array(X_windows)

# Apply sliding window approach to training data
X_windows = sliding_window(X, grid_search.best_params_['window_size'])
X_train = X_windows.reshape(-1, window_size)

# Train final decision tree model
final_model = DecisionTreeRegressor(**grid_search.best_params_, random_state=42)
final_model.fit(X_train, y)

# Scale test data
test_X = test_data.values.reshape(-1, 1)
test_X = scaler.transform(test_X)

# Apply sliding window approach to test data
test_X_windows = sliding_window(test_X, grid_search.best_params_['window_size'])
test_X_windows = test_X_windows.reshape(-1, window_size)

# Predict on test data
test_y_pred = final_model.predict(test_X_windows)

# Evaluate performance
test_y_pred = scaler.inverse_transform(test_y_pred.reshape(-1, 1))
test_data = scaler.inverse_transform(test_data.values.reshape(-1, 1))
test_y_pred = test_y_pred.flatten()
test_data = test_data.flatten()
mse = mean_squared_error(test_data, test_y_pred)
print('Mean Squared Error:', mse)