# LOAD LIBRARIES

In [30]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

### LOAD DATA FROM ADMISSION.CSV

In [31]:
df = pd.read_csv("admission.csv")

In [32]:
df.columns

Index(['Sno', 'Gre', 'Toefl', 'Rating', 'Sop', 'Lor', 'Cgpa', 'Research',
       'Chance'],
      dtype='object')

### BUILD MACHINE LEARNING MODEL FOR ALL COLUMNS LEAVING CHANCE AND SNO(AS SNO IS IRRELEVANT TO THE SITUATION)

In [33]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [34]:
X = df.drop(columns = ["Chance", "Sno"])
y = df["Chance"]  * 100

In [35]:
# SPLIT THE DATA INTO TRAIN AND TEST
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [36]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((400, 7), (100, 7), (400,), (100,))

In [37]:
# FIT MODEL OR TRAIN MODEL OR BUILD MODEL
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

LinearRegression()

In [38]:
for c,s in zip(X_train.columns, lr_model.coef_):
    print(f"{c:15} = {s:10.4f}")

Gre             =     0.2141
Toefl           =     0.2770
Rating          =     0.5557
Sop             =     0.0510
Lor             =     2.0413
Cgpa            =    11.5054
Research        =     2.4126


In [39]:
lr_model.intercept_

-134.41673411950725

### EVALUATE WITH TRAIN DATA

In [40]:
lr_model.score(X_train, y_train)   # R2 SCORE

0.8320945273130533

In [41]:
from sklearn.metrics import mean_squared_error

In [42]:
y_pred = lr_model.predict(X_train)

In [43]:
mse = mean_squared_error(y_train, y_pred)
print("MSE :", mse)
print("RMSE :", np.sqrt(mse))

MSE : 34.200829439440035
RMSE : 5.848147522031232


In [44]:
result = pd.DataFrame( {"Actual" : y_train, "Prdeicted" : y_pred})

In [45]:
result

Unnamed: 0,Actual,Prdeicted
107,91.0,93.217626
336,72.0,72.424599
71,96.0,96.508678
474,67.0,61.987576
6,75.0,71.262233
...,...,...
323,62.0,59.585293
192,86.0,82.563495
117,45.0,49.754748
47,89.0,93.920628


In [46]:
result.head()

Unnamed: 0,Actual,Prdeicted
107,91.0,93.217626
336,72.0,72.424599
71,96.0,96.508678
474,67.0,61.987576
6,75.0,71.262233


### EVALUATE WITH TEST DATA

In [47]:
# EVALUATE MODEL WITH TEST DATA
y_pred = lr_model.predict(X_test)

In [48]:
lr_model.score(X_test, y_test)

0.7664048993199386

In [49]:
test_res = pd.DataFrame({"Actual" : y_test, "Predicted" : y_pred})

In [50]:
test_res.head()

Unnamed: 0,Actual,Predicted
90,64.0,66.06252
254,85.0,83.467888
283,80.0,77.803235
445,91.0,88.054515
461,68.0,60.126564


In [51]:
mse = mean_squared_error(y_test, y_pred)
print("MSE :", mse)
print("RMSE :", np.sqrt(mse))

MSE : 40.79680034602103
RMSE : 6.387237301527244
