In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV, train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np

# Load data
data = pd.read_csv('data-sets/alcohol_sales.csv')

# Convert date column to datetime object
data['date'] = pd.to_datetime(data['date'])

# Set date column as the index of the DataFrame
data = data.set_index('date')

# Split data into features and target
X = data.index.values.astype(float).reshape(-1, 1)
y = data['sales']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define linear regression model
model = LinearRegression()

# Define time series cross-validation strategy
cv = TimeSeriesSplit(n_splits=5)

# Define range of learning rates to try
param_grid = {'eta0': np.logspace(-5, 0, 6)}

# Perform grid search over learning rates
grid_search = GridSearchCV(model, param_grid=param_grid, cv=cv, scoring='neg_mean_squared_error')
grid_search.fit(X, y)

# Get best learning rate and corresponding MSE
best_lr = grid_search.best_params_['eta0']
best_mse = -grid_search.best_score_

# Train final model with best learning rate
final_model = LinearRegression(eta0=best_lr)
final_model.fit(X, y)


ValueError: Invalid parameter 'eta0' for estimator LinearRegression(). Valid parameters are: ['copy_X', 'fit_intercept', 'n_jobs', 'positive'].