In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.linear_model import Lasso, Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR

# Load the dataset from CSV file
data = pd.read_csv('TIMESERIESFORECASTING.csv')  # Replace 'your_dataset.csv' with the actual file name

# Splitting features and target variable
X = data[['Advertising Spend', 'Temperature', 'Holiday']]
y = data['Sales']

# Define machine learning models to test
models = {
    "Linear Regression": LinearRegression(),
    "Logistic Regression": LogisticRegression(),  # Added logistic regression
    "Lasso": Lasso(),
    "Ridge": Ridge(),
    "Decision Tree": DecisionTreeRegressor(),
    "Random Forest": RandomForestRegressor(),
    "Gradient Boosting": GradientBoostingRegressor(),
    "KNN": KNeighborsRegressor(),
    "SVM": SVR(),
}

# Initialize time series cross-validator
tscv = TimeSeriesSplit(n_splits=5)

# Initialize variables to store best model and its score
best_model = None
best_score = float('inf')  # For regression models, lower is better

# Model Training and Evaluation Loop
for name, model in models.items():
    scores = []
    for train_index, test_index in tscv.split(X):
        X_train_cv, X_test_cv = X.iloc[train_index], X.iloc[test_index]
        y_train_cv, y_test_cv = y.iloc[train_index], y.iloc[test_index]

        model.fit(X_train_cv, y_train_cv)
        y_pred_cv = model.predict(X_test_cv)
        score = mean_squared_error(y_test_cv, y_pred_cv)
        scores.append(score)

    avg_score = np.mean(scores)
    print(f"{name}: Average Score: {avg_score}")

    if avg_score < best_score:  # For regression models, lower is better
        best_score = avg_score
        best_model = name

# Selecting the Best Model
print("\nBest Model:", best_model)
print("Best Model Average Score:", best_score)


Linear Regression: Average Score: 54.15326117530102
Logistic Regression: Average Score: 40.0
Lasso: Average Score: 1.1064998647991158
Ridge: Average Score: 1.8761738542609359
Decision Tree: Average Score: 265.0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Random Forest: Average Score: 139.3255
Gradient Boosting: Average Score: 99.29470446147683
KNN: Average Score: 257.2
SVM: Average Score: 377.79309616042417

Best Model: Lasso
Best Model Average Score: 1.1064998647991158
