<img src=https://lstms-brainybits.s3.ap-south-1.amazonaws.com/green+logo.png width="300" height="200" style="float: left; margin-right: 8px;">

We believe in empowering individuals with the
knowledge and skills they need to become lifelong
learners. Our self-learning website offers a diverse
range of high-quality, interactive courses.
**All rights reserved to BrainyBits. Unauthorized reproduction or distribution of this content is prohibited.** 

# Multiple Linear Regression

### 1. Importing the libraries

In [None]:
#Importing the libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

### 2. Importing the dataset

In [None]:
dataset = pd.read_csv('50_Startups.csv')

### 3) Exploratory Data Analysis

In [None]:
dataset.head()

In [None]:
dataset.shape

In [None]:
dataset.isna().sum()

In [None]:
dataset.duplicated().any()

In [None]:
fig, axs = plt.subplots(3, figsize = (5,5))
plt1 = sns.boxplot(dataset['R&D Spend'], ax = axs[0])
plt2 = sns.boxplot(dataset['Administration'], ax = axs[1])
plt3 = sns.boxplot(dataset['Marketing Spend'], ax = axs[2])
plt.tight_layout()

In [None]:
sns.distplot(dataset['R&D Spend']);

In [None]:
sns.distplot(dataset['Administration']);

In [None]:
sns.distplot(dataset['Marketing Spend']);

In [None]:
sns.pairplot(dataset)

In [None]:
sns.pairplot(dataset, x_vars=["R&D Spend", "Administration", "Marketing Spend"], y_vars='Profit', height=4, aspect=1, kind='scatter')
plt.show()

In [None]:
df1 = dataset[["R&D Spend", "Administration", "Marketing Spend"]]
sns.heatmap(df1.corr(), annot = True)
plt.show()

### 4. Data Preprocessing 

In [None]:
# Converting Categorical variable to numerical variable
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder(sparse=False)
df_encoded = pd.DataFrame(encoder.fit_transform(dataset[['State']]), columns=encoder.get_feature_names_out(['State']))
df_encoded = pd.concat([dataset, df_encoded], axis=1)
df_encoded.drop(['State'], axis=1, inplace=True)

In [None]:
df_encoded

### 5. Splitting the dataset into the Training set and Test set

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics

#Setting the value for X and Y
X = df_encoded.drop(['Profit'], axis=1)
y = df_encoded['Profit']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [None]:
print('X_train dimension= ', X_train.shape)
print('X_test dimension= ', X_test.shape)
print('y_train dimension= ', y_train.shape)
print('y_train dimension= ', y_test.shape)

### 6) MLR Model Building with sklearn package

In [None]:
mlr = LinearRegression() 
mlr.fit(X_train, y_train) 

In [None]:
#Printing the model coefficients
print(mlr.intercept_)
# pair the feature names with the coefficients
list(zip(X, mlr.coef_))

In [None]:
#Predicting the Test and Train set result 
y_pred_mlr = mlr.predict(X_test)  
x_pred_mlr = mlr.predict(X_train)  

In [None]:
#Actual value and the predicted value
mlr_diff = pd.DataFrame({'Actual value': y_test, 'Predicted value': y_pred_mlr})
mlr_diff

In [None]:
#Predict for any value

mlr.predict([[134544, 90022, 362443, 1, 0, 0]])

### Observation

#### Above number is the profit value

### 7) Model Evaluation with Regression metrics 

In [None]:
# 0 means the model is perfect. Therefore the value should be as close to 0 as possible
meanAbErr = metrics.mean_absolute_error(y_test, y_pred_mlr)
meanSqErr = metrics.mean_squared_error(y_test, y_pred_mlr)
rootMeanSqErr = np.sqrt(metrics.mean_squared_error(y_test, y_pred_mlr))

print('Mean Absolute Error:', meanAbErr)
print('Mean Square Error:', meanSqErr)
print('Root Mean Square Error:', rootMeanSqErr)

In [None]:
# print the R-squared value for the model
print('R squared value of the model: {:.2f}'.format(mlr.score(X,y)*100))