In [94]:
# Criar um modelo de regressão que obtem a probabilidade de um estudante ser admitido com base em suas pontuações.
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet

In [95]:
pd.set_option('display.max_columns', 9)
dataSet = pd.read_csv('C:/curso.de.machine.learning/src/DataSets/Admission_Predict.csv')
dataSet.drop('Serial No.', axis=1, inplace=True)

In [96]:
dataSet.shape

(400, 8)

In [97]:
dataSet.dtypes

GRE Score              int64
TOEFL Score            int64
University Rating      int64
SOP                  float64
LOR                  float64
CGPA                 float64
Research               int64
Chance of Admit      float64
dtype: object

In [98]:
isNullData = dataSet.isnull().sum()
print(isNullData)

GRE Score            0
TOEFL Score          0
University Rating    0
SOP                  0
LOR                  0
CGPA                 0
Research             0
Chance of Admit      0
dtype: int64


In [99]:
y = dataSet['Chance of Admit']
x = dataSet.drop('Chance of Admit', axis=1)

In [100]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.30, random_state=45)

In [101]:
linearRegressionModel = LinearRegression()
linearRegressionModel.fit(x_train, y_train)
linearRegressionResult = linearRegressionModel.score(x_test, y_test)
print('Linear:', linearRegressionResult)

Linear: 0.8093428763333979


In [102]:
ridgeRegression = Ridge()
ridgeRegression.fit(x_train, y_train)
ridgeResult = ridgeRegression.score(x_test, y_test)
print('Ridge:', ridgeResult)

Ridge: 0.8085345281885273


In [103]:
elasticNetModel = ElasticNet()
elasticNetModel.fit(x_train, y_train)
elasticNetResult = elasticNetModel.score(x_test, y_test)
print('ElasticNet:', elasticNetResult)

ElasticNet: 0.5647447563640624


In [104]:
lassoModel = Lasso()
lassoModel.fit(x_train, y_train)
lassoResult = lassoModel.score(x_test, y_test)
print('Lasso:', lassoResult)

Lasso: 0.2855728845758697


In [105]:
# Validação cruzada Kfold
kfold = KFold(n_splits=5)
kfoldResult = cross_val_score(linearRegressionModel, x, y, cv=kfold)
linearKFoldResult = kfoldResult.mean()
print('Linear-KFold:', linearKFoldResult)

Linear-KFold: 0.7711794121066361


In [106]:
# Validação cruzada Kfold
kfold = KFold(n_splits=5)
kfoldResult = cross_val_score(ridgeRegression, x, y, cv=kfold)
ridgeKFoldResult = kfoldResult.mean()
print('Ridge-KFold:', ridgeKFoldResult)

Ridge-KFold: 0.7708707501243893


In [107]:
# Validação cruzada Kfold
kfold = KFold(n_splits=5)
kfoldResult = cross_val_score(elasticNetModel, x, y, cv=kfold)
elasticNetKFoldResult = kfoldResult.mean()
print('ElasticNet-KFold:', elasticNetKFoldResult)

ElasticNet-KFold: 0.5018924119815464


In [108]:
# Validação cruzada Kfold
kfold = KFold(n_splits=5)
kfoldResult = cross_val_score(lassoModel, x, y, cv=kfold)
lassoKFoldResult = kfoldResult.mean()
print('Lasso-KFold:', lassoKFoldResult)

Lasso-KFold: 0.2120832416398803


In [109]:
# Escolhendo o melhor modelo de regressão
objectResult = {'Linear': linearRegressionResult, 'Ridge': ridgeResult, 'ElasticNet': elasticNetResult, 'Lasso': lassoResult}
maxResult = max(objectResult, key=objectResult.get)
print('Regression Model:', maxResult, '-', 'Value:', objectResult[maxResult])

Regression Model: Linear - Value: 0.8093428763333979


In [110]:
# Escolhendo o melhor modelo de validação cruzada Kfold
objectResultLKfold = {'Linear-KFold': linearKFoldResult, 'Ridge-KFold': ridgeKFoldResult, 'ElasticNet-KFold': elasticNetKFoldResult, 'Lasso-KFold': lassoKFoldResult}
maxResultKfold = max(objectResultLKfold, key=objectResultLKfold.get)
print('cross validation:', maxResultKfold, '-', 'Value:', objectResultLKfold[maxResultKfold])

cross validation: Linear-KFold - Value: 0.7711794121066361


In [111]:
predictPriceTest = linearRegressionModel.predict(x_test)
newData  = pd.DataFrame(x_test)
newData.insert(0, 'Chance of Admit', predictPriceTest)
newData.head(120)

Unnamed: 0,Chance of Admit,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
255,0.692759,307,110,4,4.0,4.5,8.37,0
346,0.515812,304,97,2,1.5,2.0,7.64,0
228,0.729467,318,112,3,4.0,3.5,8.67,0
384,0.965937,340,113,4,5.0,5.0,9.74,1
269,0.696963,308,108,4,4.5,5.0,8.34,0
...,...,...,...,...,...,...,...,...
399,0.926974,333,117,4,5.0,4.0,9.66,1
216,0.855838,322,112,4,4.5,4.5,9.26,1
397,0.903676,330,116,4,5.0,4.5,9.45,1
271,0.526897,299,96,2,1.5,2.0,7.86,0
