In [1]:
#This code uses the Wholesale Customers dataset, which is a publicly available dataset from the UCI Machine Learning Repository. 
#The dataset contains sales data for a wholesale distributor and includes the following features: 'Fresh', 'Milk', 'Grocery', 'Frozen', 'Detergents_Paper', and 'Delicatessen'. 
#The code drops the 'Region' and 'Channel' columns, and then splits the data into training and test sets. 
#The model is initialized with LinearRegression() and we use GridSearchCV to find the best hyperparameter for the model. 
#Then, we fit the grid search object to the data, make predictions on the test set and evaluate the model's performance. 
#Finally, we visualize the results.



In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score
import matplotlib.pyplot as plt

In [3]:
# Load the data
df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/00292/Wholesale%20customers%20data.csv")

# Preprocessing

In [4]:
# drop the 'Region' and 'Channel' columns
df.drop(['Region', 'Channel'], axis=1, inplace=True)

In [5]:
# Split the data into training and test sets
X = df.drop('Fresh', axis=1)
y = df['Fresh']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Initialize the model
lm = LinearRegression()

In [14]:
# Define the parameter grid
param_grid = {'fit_intercept':[True,False]}

In [15]:
# Initialize the grid search object
grid = GridSearchCV(lm, param_grid, cv=5)

In [None]:
# Fit the grid search object to the data
grid.fit(X_train, y_train)

In [17]:
# Make predictions on the test set
y_pred = grid.predict(X_test)

In [None]:
# Evaluate the model's performance
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Mean Absolute Error: ", mae)
print("R-squared: ", r2)

In [None]:
# Visualize the results
plt.scatter(y_test, y_pred)
plt.xlabel("Actual Fresh Sales")
plt.ylabel("Predicted Fresh Sales")
plt.show()

In [None]:
#References:
#1.	Wholesale customers dataset https://archive.ics.uci.edu/ml/datasets/Wholesale+customers
#2.	https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html