In [3]:
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import numpy as np

# Rocinante 36 Model

In [4]:
file_path = 'Exhibit+2.xlsx'
roc = pd.read_excel(file_path, sheet_name='Rocinante Models')
roc

Unnamed: 0,Cars,"Sales \n(in 1,000 units)",Price\n(in lakh rupees),Mileage\n(Km/ltr),Top speed (Km/hr)
0,Rocinante 1,171.877,6.1,15.8,168.2
1,Rocinante 2,139.796,6.1,12.1,149.6
2,Rocinante 3,178.947,9.9,17.0,173.4
3,Rocinante 4,140.022,5.8,11.6,170.6
4,Rocinante 5,186.476,10.0,17.2,175.0
5,Rocinante 6,192.123,6.5,17.6,173.1
6,Rocinante 7,175.085,5.5,16.0,184.6
7,Rocinante 8,146.882,8.4,13.0,175.7
8,Rocinante 9,202.847,6.6,19.3,166.7
9,Rocinante 10,149.933,8.8,13.3,175.4


In [6]:
X_rocinante = roc[['Price\n(in lakh rupees)', 'Mileage\n(Km/ltr)', 'Top speed (Km/hr)']]
y_rocinante = roc['Sales \n(in 1,000 units)']

### Step-1: Split the data into training and testing sets (70% training, 30% testing)

In [7]:
X_train_roc, X_test_roc, y_train_roc, y_test_roc = train_test_split(X_rocinante, y_rocinante, test_size=0.3, random_state=42)

# Initialize and train a Linear Regression model
model_rocinante = LinearRegression()
model_rocinante.fit(X_train_roc, y_train_roc)

# Make predictions on the test set
y_pred_roc = model_rocinante.predict(X_test_roc)

### Step-2: Calculation of RMSE ( Root Mean Squared Error)

In [9]:
RMSE_roc = mean_squared_error(y_test_roc, y_pred_roc, squared=False)

# Print the RMSE result
print(f'Root Mean Squared Error(RMSE) for Rocinante Model : {RMSE_roc}')

Root Mean Squared Error(RMSE) for Rocinante Model : 2.284066588336076


#### The Root Mean Squared Error (RMSE) for the Rocinante Model is 2.28. This means that, on average, the model's predictions for Rocinante vehicle sales are off by about 2.28 units (in thousands) compared to the actual sales.

#### The model's typical prediction error is around 2,280 sales units. An RMSE of 2.28 suggests that the model is reasonably accurate, but there might still be some room for improvement to make the predictions closer to the actual sales figures.

### Predicting both training and testing data

In [10]:
y_train_pred = model_rocinante.predict(X_train_roc)
y_test_pred = model_rocinante.predict(X_test_roc)

### Calculation of RMSE for both training and testing data

In [11]:
train_rmse_roc = mean_squared_error(y_train_roc, y_train_pred)
test_rmse_roc = mean_squared_error(y_test_roc, y_test_pred)

In [12]:
print("Training RMSE:", train_rmse_roc)
print("Testing RMSE:", test_rmse_roc)

Training RMSE: 4.990464394910194
Testing RMSE: 5.216960179953201


### Step-3: Determining whether the model is Overfitting or Underfitting

In [13]:
if train_rmse_roc < test_rmse_roc:
    print("The model may be overfitting.")
elif train_rmse_roc > test_rmse_roc:
    print("The model may be underfitting.")
else:
    print("The model is performing consistently on both training and testing data.")

The model may be overfitting.


### The Training RMSE (4.99) is lower than the Testing RMSE (5.22).
#### The difference between the two values is relatively small, indicating that the model is not dramatically overfitting or underfitting.
### Interpretation:
#### Slight Overfitting: The model may be experiencing slight overfitting, as it performs better on the training data than on the testing data. This suggests it has learned the training data well but does not generalize perfectly to new, unseen data.

# Marengo32 Model

In [14]:
mar = pd.read_excel(file_path, sheet_name='Marengo Models')
mar

Unnamed: 0,Cars,"Sales \n(in 1,000 units)",Price\n(in lakh rupees),Mileage\n(Km/ltr),Top speed (Km/hr)
0,Marengo 1,20.896,42.5,9.3,199.4
1,Marengo 2,31.048,36.0,9.7,235.2
2,Marengo 3,29.904,54.7,16.6,240.8
3,Marengo 4,28.792,42.7,11.7,232.5
4,Marengo 5,16.776,44.9,13.7,188.8
5,Marengo 6,18.928,35.5,9.6,184.2
6,Marengo 7,22.776,51.3,13.7,207.7
7,Marengo 8,36.824,30.4,12.6,249.5
8,Marengo 9,22.216,38.4,16.2,175.8
9,Marengo 10,35.456,32.2,9.6,245.6


In [15]:
X_marengo  = mar[['Price\n(in lakh rupees)', 'Mileage\n(Km/ltr)', 'Top speed (Km/hr)']]
y_marengo  = mar['Sales \n(in 1,000 units)']

### Step-1: Split the data into training and testing sets (70% training, 30% testing)

In [17]:
X_train_mar, X_test_mar, y_train_mar, y_test_mar = train_test_split(X_marengo, y_marengo, test_size=0.3, random_state=42)

# Initialize and train a Linear Regression model
model_marengo = LinearRegression()
model_marengo.fit(X_train_mar, y_train_mar)

# Make predictions on the test set
y_pred_mar = model_marengo.predict(X_test_mar)

### Step-2: Calculation of RMSE ( Root Mean Squared Error)

In [18]:
RMSE_mar = mean_squared_error(y_test_mar, y_pred_mar, squared=False)
print(f'Root Mean Squared Error(RMSE) for Marengo Model : {RMSE_mar}')

Root Mean Squared Error(RMSE) for Marengo Model : 2.7322921310468495


#### The Root Mean Squared Error (RMSE) of 2.73 indicates that, on average, the predictions made by the Marengo Model deviate from the actual values by about 2.73 units. A lower RMSE generally reflects better predictive accuracy, but the acceptable level depends on the context and scale of the data.

### Predicting both training and testing data

In [19]:
y_train_pred = model_marengo.predict(X_train_mar)
y_test_pred = model_marengo.predict(X_test_mar)

### Calculation of RMSE for both training and testing data

In [20]:
train_rmse_mar = mean_squared_error(y_train_mar, y_train_pred)
test_rmse_mar = mean_squared_error(y_test_mar, y_test_pred)

In [21]:
print("Training RMSE:", train_rmse_mar)
print("Testing RMSE:", test_rmse_mar)

Training RMSE: 3.9768018689111426
Testing RMSE: 7.465420289380534


### Step-3: Determining whether the model is Overfitting or Underfitting

In [22]:
if train_rmse_mar < test_rmse_mar:
    print("The model may be overfitting.")
elif train_rmse_mar > test_rmse_mar:
    print("The model may be underfitting.")
else:
    print("The model is performing consistently on both training and testing data.")

The model may be overfitting.


### Interpretation:
#### The model shows a significantly lower RMSE on the training set (3.98) compared to the testing set (7.47). This indicates overfitting, where the model has learned the training data very well but fails to generalize to new, unseen data. The large gap between training and testing RMSE suggests that the model is too complex, capturing noise or patterns specific to the training set rather than underlying trends. Reducing model complexity or using regularization techniques could help mitigate overfitting.