### Load libraries

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

### Load data from admissions.csv 

In [3]:
df = pd.read_csv("admission.csv")

### Build Machine Learning Model 

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

### Model with Normalization

In [5]:
X = df[['Gre','Toefl','Cgpa']]
y = df['Chance'] * 100

In [6]:
# Split data into train and test 
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=0)

In [7]:
## Fit model or Training Model
model = LinearRegression(normalize=True)  # Min-max scaling
model.fit(X_train,y_train)

LinearRegression(normalize=True)

In [8]:
model.coef_, model.intercept_

(array([ 0.2530053 ,  0.31789215, 14.25427517]), -164.24396080709766)

In [9]:
model.score(X_train,y_train)

0.8122343451664386

In [10]:
y_pred = model.predict(X_test)

In [11]:
## get MSE, R2 and MAE
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

In [12]:
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error : {mse:0.2f}")

rmse = np.sqrt(mse)
print(f"RMSE   : {rmse:0.2f}")

r2score = r2_score(y_test, y_pred)
print(f"R2 Score: {r2score:0.2f}")

Mean Squared Error : 41.34
RMSE   : 6.43
R2 Score: 0.76


## Standardization of DataSet

In [13]:
X = df[['Gre','Toefl','Cgpa']]
y = df['Chance'] * 100

In [14]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
ss = StandardScaler()

In [15]:
# Split data into train and test 
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=0)

In [16]:
# Scale Train data 
X_train_scaled = ss.fit_transform(X_train)

In [17]:
X_train_scaled[:10,0]

array([ 1.87138145,  0.19003625,  1.69439775, -0.78337412,  0.36701996,
       -0.25242301,  0.27852811,  0.72098737, -1.04884968, -0.42940671])

In [18]:
X_train_scaled[:10,1]

array([ 1.56490551,  0.43325896,  0.75658655, -0.37506   ,  0.27159517,
       -0.86005137,  0.43325896,  0.75658655,  0.75658655,  0.27159517])

In [19]:
print(f"{X_train_scaled[:,0].mean():f} {X_train_scaled[:,0].std()}")  # Scaled data has 0 mean and 1 std

-0.000000 1.0


In [20]:
print(f"{X_train_scaled[:,1].mean():f} {X_train_scaled[:,1].std()}")  # Scaled data has 0 mean and 1 std

0.000000 1.0


In [19]:
## Fit model or Training Model
model = LinearRegression()
model.fit(X_train_scaled,y_train)

LinearRegression()

In [22]:
model.score(X_train_scaled,y_train)

0.8122343451664386

In [20]:
ss.mean_, ss.var_

(array([316.8525 , 107.32   ,   8.60215]),
 array([127.70074375,  38.2626    ,   0.37071688]))

In [26]:
# Scale test data with same scale as train data
X_test_scaled = ss.transform(X_test)

In [27]:
y_pred = model.predict(X_test_scaled)

In [28]:
score = r2_score(y_test,y_pred)
print(f"R2 Score: {score:0.2f}")

R2 Score: 0.76


In [29]:
mse = mean_squared_error(y_test,y_pred)
print(f"Mean Squared Error : {mse:0.2f}")

Mean Squared Error : 41.34


### Scaled vs. non-scaled data

In [30]:
scaled_df = pd.DataFrame(X_train_scaled)

In [31]:
scaled_df.head(10)

Unnamed: 0,0,1,2
0,1.871381,1.564906,1.408932
1,0.190036,0.433259,0.308525
2,1.694398,0.756587,1.901652
3,-0.783374,-0.37506,-1.071091
4,0.36702,0.271595,-0.660491
5,-0.252423,-0.860051,-1.186059
6,0.278528,0.433259,-0.069227
7,0.720987,0.756587,0.932637
8,-1.04885,0.756587,0.078589
9,-0.429407,0.271595,0.144285


In [27]:
X_train.head(10)

Unnamed: 0,Gre,Toefl,Cgpa
107,338,117,9.46
336,319,110,8.79
71,336,112,9.76
474,308,105,7.95
6,321,109,8.2
412,314,102,7.88
113,320,110,8.56
236,325,112,9.17
299,305,112,8.65
155,312,109,8.69


#### Scale input features

In [37]:
# Scale input features using StandardScaler used to transform train data 
chances = model.predict(ss.transform([[320,110,8.5]]))

In [33]:
chances

array([72.84721094])

In [35]:
# Non-scaled data is being used as input 
chances = model.predict([[320,110,8.5]])

In [36]:
chances

array([1277.63318018])