In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
import pickle

file_path = '/content/multiple_linear_regression_dataset.csv'  # Path to the dataset
data = pd.read_csv(file_path)

print("Dataset Preview:")
print(data.head())

print("\nMissing Values Count:")
print(data.isnull().sum())

data = data.dropna()

target_column = data.columns[-1]
X = data.drop(target_column, axis=1)
y = data[target_column]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("\nModel Evaluation:")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"R-squared (R2): {r2:.2f}")

model_filename = 'AI-22015_Lab1_ML.pkl'
with open(model_filename, 'wb') as file:
    pickle.dump(model, file)

print(f"\nModel has been saved to {model_filename}")

with open(model_filename, 'rb') as file:
    loaded_model = pickle.load(file)

loaded_model_pred = loaded_model.predict(X_test)
print("\nPredictions with loaded model (first 5):")
print(loaded_model_pred[:5])



Dataset Preview:
   age  experience  income
0   25           1   30450
1   30           3   35670
2   47           2   31580
3   32           5   40130
4   43          10   47830

Missing Values Count:
age           0
experience    0
income        0
dtype: int64

Model Evaluation:
Mean Squared Error (MSE): 753796.77
Root Mean Squared Error (RMSE): 868.21
R-squared (R2): 0.94

Model has been saved to AI-22015_Lab1_ML.pkl

Predictions with loaded model (first 5):
[31093.38107376 31295.49954076 40250.46080162 34897.6958918 ]
