# Multiple Linear Regression

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('MuscleGain_FatLoss.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [3]:
print(X)

[[18 152.4 30 ... 50.0 'Fat loss' 1.3]
 [19 152.6 31 ... 50.18 'Fat loss' 1.5]
 [20 152.79999999999998 32 ... 50.36 'Fat loss' 1.3]
 ...
 [40 174.99999999999872 104 ... 65.96 'Muscle Gain' 1.9]
 [41 175.1999999999987 105 ... 66.15 'Muscle Gain' 1.7]
 [42 175.3999999999987 106 ... 66.33 'Muscle Gain' 1.9]]


#Encoding Labels

In [4]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:,3]=le.fit_transform(X[:,3])
X[:,7]=le.fit_transform(X[:,7])

In [5]:
print(X[0:15,0:])

[[18 152.4 30 1 12.92 'Severe Thinness' 50.0 0 1.3]
 [19 152.6 31 1 13.31 'Severe Thinness' 50.18 0 1.5]
 [20 152.79999999999998 32 1 13.71 'Severe Thinness' 50.36 0 1.3]
 [21 152.99999999999997 33 1 14.1 'Severe Thinness' 50.54 0 1.5]
 [22 153.19999999999996 34 1 14.49 'Severe Thinness' 50.72 0 1.3]
 [23 153.39999999999995 35 1 14.87 'Severe Thinness' 50.91 0 1.5]
 [24 153.59999999999994 36 1 15.26 'Severe Thinness' 51.09 0 1.3]
 [25 153.79999999999993 37 1 15.64 'Severe Thinness' 51.27 0 1.5]
 [26 153.99999999999991 38 1 16.02 'Moderate Thinness' 51.45 0 1.3]
 [27 154.1999999999999 39 1 16.4 'Moderate Thinness' 51.63 0 1.5]
 [28 154.3999999999999 40 1 16.78 'Moderate Thinness' 51.81 0 1.3]
 [29 154.59999999999988 41 1 17.15 'Mild Thinness' 51.99 0 1.5]
 [30 154.79999999999987 42 1 17.53 'Mild Thinness' 52.17 0 1.3]
 [31 154.99999999999986 43 1 17.9 'Mild Thinness' 52.35 0 1.5]
 [32 155.19999999999985 44 1 18.27 'Mild Thinness' 52.54 0 1.3]]


## Encoding categorical data

In [6]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [5])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [10]:
print(X[0:15,0:])

[[0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 18 152.4 30 1 12.92 50.0 0 1.3]
 [0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 19 152.6 31 1 13.31 50.18 0 1.5]
 [0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 20 152.79999999999998 32 1 13.71 50.36
  0 1.3]
 [0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 21 152.99999999999997 33 1 14.1 50.54 0
  1.5]
 [0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 22 153.19999999999996 34 1 14.49 50.72
  0 1.3]
 [0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 23 153.39999999999995 35 1 14.87 50.91
  0 1.5]
 [0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 24 153.59999999999994 36 1 15.26 51.09
  0 1.3]
 [0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 25 153.79999999999993 37 1 15.64 51.27
  0 1.5]
 [0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 26 153.99999999999991 38 1 16.02 51.45
  0 1.3]
 [0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 27 154.1999999999999 39 1 16.4 51.63 0
  1.5]
 [0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 28 154.3999999999999 40 1 16.78 51.81 0
  1.3]
 [1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 29 154.59999999999988 41 1 17.15 51.99
  0 1.5]
 [1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 30 154.7999999999

## Splitting the dataset into the Training set and Test set

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Training the Multiple Linear Regression model on the Training set

In [12]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

## Predicting the Test set results

In [13]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[2990.8  3025.16]
 [2310.58 2303.94]
 [2567.53 2575.91]
 ...
 [1905.33 1920.23]
 [1883.14 1900.17]
 [2123.4  2014.33]]


In [15]:
print(y_test)

[3025.16 2303.94 2575.91 ... 1920.23 1900.17 2014.33]


#Evalutaing Performance

In [14]:
from sklearn.metrics import r2_score
r2_score(y_test,y_pred)

0.9902959782983927