In [1]:
from google.colab import drive
drive.mount('drive')

Mounted at drive


In [2]:
ROOT = '/content/drive/'
drive.mount(ROOT)

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


# Importing Libraries

In [3]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score

# Reading Input

In [4]:
PATH = '/content/drive/My Drive/predicting graduate admission/'

In [5]:
df = pd.read_csv(PATH + 'input/train.csv')

In [6]:
df.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.0,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.8
4,5,314,103,2,2.0,3.0,8.21,0,0.65


In [7]:
df.shape

(500, 9)

In [8]:
# Dropping Serial No. as it has no sinificance in modelling
df = df.drop(['Serial No.'], axis=1)

In [9]:
df.head()

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,337,118,4,4.5,4.5,9.65,1,0.92
1,324,107,4,4.0,4.5,8.87,1,0.76
2,316,104,3,3.0,3.5,8.0,1,0.72
3,322,110,3,3.5,2.5,8.67,1,0.8
4,314,103,2,2.0,3.0,8.21,0,0.65


# Algorithm

To solve this problem, we will train 5 models, to predict the chance of admit in each of the universities. 

Input will be the data of each university and the features will be:
1. GRE Score
2. TOEFL Score
3. SOP
4. LOR
5. CGPA
6. Research

The Y-value will be Chance of Admit

In [None]:
def get_training_data(df):
  
  X = df.drop(columns=["University Rating", "Chance of Admit "], axis=1)
  y = df["Chance of Admit "]

  return X, y

In [None]:
def train_model(University_rating):

  # Select data according to university rating
  data = df[df['University Rating'] == University_rating]

  X, y = get_training_data(data)

  reg = LinearRegression()
  reg.fit(X, y)

  metric = cross_val_score(reg, X, y, cv=5)

  return reg, metric

In [None]:
university_ratings = df["University Rating"].unique()

university_recommendations = {}

for u in university_ratings:
    regressor, metric = train_model(u)
    university_recommendations["University ranking " + str(u)] = {'model': regressor, 'metric': metric}

In [None]:
university_recommendations

{'University ranking 1': {'metric': array([ 0.91791431, -0.03123308,  0.5077833 ,  0.58622725,  0.77048993]),
  'model': LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)},
 'University ranking 2': {'metric': array([-0.64325416,  0.32028836,  0.48696248,  0.34106336,  0.65715079]),
  'model': LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)},
 'University ranking 3': {'metric': array([0.04196118, 0.22399362, 0.14130896, 0.58430267, 0.78058286]),
  'model': LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)},
 'University ranking 4': {'metric': array([0.70503399, 0.88116779, 0.43667585, 0.94462783, 0.90363293]),
  'model': LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)},
 'University ranking 5': {'metric': array([0.89144242, 0.85894631, 0.87964782, 0.78529031, 0.85653355]),
  'model': LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)}}

# Getting Recommendation

In [None]:
test = df.sample(20)
test = test.drop(columns = ["Chance of Admit ", "University Rating"])
test.head()

Unnamed: 0,GRE Score,TOEFL Score,SOP,LOR,CGPA,Research
265,313,102,2.5,2.5,8.68,0
18,318,110,4.0,3.0,8.8,0
182,299,100,3.0,3.5,7.88,0
482,328,113,4.0,2.5,8.77,1
302,322,105,3.0,3.0,8.45,1


In [None]:
predictions = {}

for uni in university_recommendations.keys():
    model = university_recommendations[uni]["model"]
    
    predictions[uni] = model.predict(test)
    
pred = pd.DataFrame(predictions)
pred.head(10)


Unnamed: 0,University ranking 4,University ranking 3,University ranking 2,University ranking 5,University ranking 1
0,0.623332,0.690761,0.686104,0.658774,0.68594
1,0.736268,0.722788,0.732461,0.747376,0.724969
2,0.546825,0.576191,0.55963,0.598772,0.6073
3,0.799183,0.760963,0.755739,0.838544,0.710049
4,0.692676,0.717116,0.689481,0.753266,0.690637
5,0.803116,0.789511,0.777364,0.845076,0.738852
6,0.489045,0.521541,0.49781,0.551607,0.537637
7,0.505933,0.524233,0.475614,0.62097,0.47385
8,0.638818,0.652489,0.613894,0.713722,0.60964
9,0.605242,0.665792,0.6721,0.638099,0.663432


The above model predicts the chance of each of the 10 students, getting in the different universities.