# Luggage Bags Cost Prediction

## Import necessary libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import MiniBatchKMeans
from sklearn.linear_model import GradientBoostingRegressor

## Load the dataset

In [2]:
data = pd.read_csv('../data/Mini Project Dataset.csv')
data.head()

## Perform EDA

In [3]:
# Check for null values and data types
data.info()

In [4]:
# Summary statistics
data.describe()

In [5]:
# Check for null values
data.isnull().sum()

## Plot various graphs

In [6]:
# Plot histograms
data.hist(bins=50, figsize=(20,15))
plt.show()

In [7]:
# Plot box plots
plt.figure(figsize=(20,15))
sns.boxplot(data=data)
plt.show()

In [8]:
# Plot pair plots
sns.pairplot(data)
plt.show()

In [9]:
# Plot correlation matrix
plt.figure(figsize=(12, 8))
sns.heatmap(data.corr(), annot=True, cmap='coolwarm')
plt.show()

## Prepare data for model building

In [10]:
X = data.drop('Cost', axis=1)
y = data['Cost']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Build and evaluate models

In [11]:
# Linear Regression
lin_reg = LinearRegression()
lin_reg.fit(X_train_scaled, y_train)
y_pred_lin = lin_reg.predict(X_test_scaled)
r2_lin = r2_score(y_test, y_pred_lin)
rmse_lin = np.sqrt(mean_squared_error(y_test, y_pred_lin))
mse_lin = mean_squared_error(y_test, y_pred_lin)

# SGD Regressor
sgd_reg = SGDRegressor(max_iter=1000, tol=1e-3)
sgd_reg.fit(X_train_scaled, y_train)
y_pred_sgd = sgd_reg.predict(X_test_scaled)
r2_sgd = r2_score(y_test, y_pred_sgd)
rmse_sgd = np.sqrt(mean_squared_error(y_test, y_pred_sgd))
mse_sgd = mean_squared_error(y_test, y_pred_sgd)

# Gradient Descent Regressor
grad_reg = SGDRegressor(max_iter=1000, tol=1e-3, learning_rate='constant', eta0=0.01)
grad_reg.fit(X_train_scaled, y_train)
y_pred_grad = grad_reg.predict(X_test_scaled)
r2_grad = r2_score(y_test, y_pred_grad)
rmse_grad = np.sqrt(mean_squared_error(y_test, y_pred_grad))
mse_grad = mean_squared_error(y_test, y_pred_grad)

# Mini Batch Gradient Descent
mini_batch_reg = SGDRegressor(max_iter=1000, tol=1e-3, learning_rate='constant', eta0=0.01)
mini_batch_reg.fit(X_train_scaled, y_train)
y_pred_mini_batch = mini_batch_reg.predict(X_test_scaled)
r2_mini_batch = r2_score(y_test, y_pred_mini_batch)
rmse_mini_batch = np.sqrt(mean_squared_error(y_test, y_pred_mini_batch))
mse_mini_batch = mean_squared_error(y_test, y_pred_mini_batch)

print("Linear Regression: R2 =", r2_lin, ", RMSE =", rmse_lin, ", MSE =", mse_lin)
print("SGD Regressor: R2 =", r2_sgd, ", RMSE =", rmse_sgd, ", MSE =", mse_sgd)
print("Gradient Descent Regressor: R2 =", r2_grad, ", RMSE =", rmse_grad, ", MSE =", mse_grad)
print("Mini Batch Gradient Descent: R2 =", r2_mini_batch, ", RMSE =", rmse_mini_batch, ", MSE =", mse_mini_batch)

## Compare results

The results of the models are compared based on R squared, RMSE, and MSE values. The model with the highest R squared and lowest RMSE and MSE values is considered the best model for predicting the cost of luggage bags.