### Load libraries

In [9]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

### Load data from admissions.csv 

In [10]:
df = pd.read_csv("admission.csv")

In [11]:
df.drop(columns=['Sno'], inplace=True)

In [12]:
# Convert rating to categorical
df['Rating'] = df.Rating.astype('str')

In [13]:
df = pd.get_dummies(df)

In [14]:
df.columns

Index(['Gre', 'Toefl', 'Sop', 'Lor', 'Cgpa', 'Research', 'Chance', 'Rating_1',
       'Rating_2', 'Rating_3', 'Rating_4', 'Rating_5'],
      dtype='object')

### Build Machine Learning Model 

In [15]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [16]:
X = df.drop(columns=['Chance'])
y = df['Chance'] * 100

In [17]:
# Split data into train and test 
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=0)

In [18]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((400, 11), (100, 11), (400,), (100,))

In [19]:
## Fit model or Train Model or Build model 
lr_model = LinearRegression()
lr_model.fit(X_train,y_train) 

LinearRegression()

In [20]:
for c,s in zip(X_train.columns, lr_model.coef_):
    print(f"{c:20}  {s:10.4f}")

Gre                       0.2125
Toefl                     0.2781
Sop                       0.1104
Lor                       2.0385
Cgpa                     11.4622
Research                  2.4491
Rating_1                 -0.4260
Rating_2                 -0.5559
Rating_3                 -0.6595
Rating_4                  0.0260
Rating_5                  1.6154


In [21]:
lr_model.intercept_

-131.98199418690507

### Evaluate Model with Train data

In [22]:
lr_model.score(X_train,y_train)   # R2 Score

0.8332135424266612

In [23]:
from sklearn.metrics import mean_squared_error

In [24]:
y_pred = lr_model.predict(X_train)

In [25]:
mse = mean_squared_error(y_train,y_pred)
print("MSE   :" ,mse )
print("RMSE  :" ,np.sqrt(mse))

MSE   : 33.9728961599095
RMSE  : 5.828627296363141


In [26]:
result = pd.DataFrame( {"Actual" : y_train, "Predicted" : y_pred})
result.head()

Unnamed: 0,Actual,Predicted
107,91.0,92.856703
336,72.0,71.925583
71,96.0,97.254007
474,67.0,61.7036
6,75.0,70.816672


### Evaluate with Test data

In [27]:
## Evaluate Model using test data 
y_pred = lr_model.predict(X_test)

In [28]:
lr_model.score(X_test,y_test)

0.7674568403325926

In [29]:
test_res = pd.DataFrame( {"Actual" : y_test, "Predicted" : y_pred})
test_res.head()

Unnamed: 0,Actual,Predicted
90,64.0,66.349366
254,85.0,83.137649
283,80.0,77.302756
445,91.0,88.816781
461,68.0,59.684921


In [30]:
mse = mean_squared_error(y_test,y_pred)
print("MSE   :" ,mse )
print("RMSE  :" ,np.sqrt(mse))

MSE   : 40.61308147801352
RMSE  : 6.372839357618668
