In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.model_selection import cross_val_score, cross_validate, KFold, LeaveOneOut, GridSearchCV

In [2]:
data = pd.read_csv('Admission_Predict.csv', sep=',')
data.dtypes

Serial No.             int64
GRE Score              int64
TOEFL Score            int64
University Rating      int64
SOP                  float64
LOR                  float64
CGPA                 float64
Research               int64
Chance of Admit      float64
dtype: object

In [3]:
data.isnull().sum()

Serial No.           0
GRE Score            0
TOEFL Score          0
University Rating    0
SOP                  0
LOR                  0
CGPA                 0
Research             0
Chance of Admit      0
dtype: int64

In [6]:
Positive = data[data['Research'] == 1]['Research'].count()
Negative = data.shape[0] - Positive
print('Распределение классов:')
print('    Положительные: {} % ({})'.format(round(Positive/data.shape[0]*100, 4),Positive))
print('    Отрицательные: {} % ({})'.format(round(Negative/data.shape[0]*100, 4), Negative))

Распределение классов:
    Положительные: 54.75 % (219)
    Отрицательные: 45.25 % (181)


In [10]:
TrainX,TestX,TrainY,TestY = train_test_split(data[['Serial No.','GRE Score','TOEFL Score','University Rating','SOP','CGPA']],data['Research'], test_size=0.3, random_state = 1)

In [11]:
LogReg = LogisticRegression(C=10, solver='liblinear')
SVCT = SVC(gamma = 10)
DecTree = DecisionTreeClassifier(max_depth = 4)

In [12]:
LogReg.fit(TrainX, TrainY)
SVCT.fit(TrainX, TrainY)
DecTree.fit(TrainX, TrainY)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [13]:
Result = []
Result.append(LogReg.predict(TestX))
Result.append(SVCT.predict(TestX))
Result.append(DecTree.predict(TestX))
for res in Result:
    print(accuracy_score(TestY,res))

0.7416666666666667
0.575
0.7166666666666667


In [14]:
for res in Result:
    print(confusion_matrix(TestY,res), '\n')

[[38 13]
 [18 51]] 

[[ 0 51]
 [ 0 69]] 

[[44  7]
 [27 42]] 



In [15]:
for res in Result:
    print(classification_report(TestY,res), '\n')

              precision    recall  f1-score   support

           0       0.68      0.75      0.71        51
           1       0.80      0.74      0.77        69

   micro avg       0.74      0.74      0.74       120
   macro avg       0.74      0.74      0.74       120
weighted avg       0.75      0.74      0.74       120
 

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        51
           1       0.57      1.00      0.73        69

   micro avg       0.57      0.57      0.57       120
   macro avg       0.29      0.50      0.37       120
weighted avg       0.33      0.57      0.42       120
 

              precision    recall  f1-score   support

           0       0.62      0.86      0.72        51
           1       0.86      0.61      0.71        69

   micro avg       0.72      0.72      0.72       120
   macro avg       0.74      0.74      0.72       120
weighted avg       0.76      0.72      0.72       120
 



  'precision', 'predicted', average, warn_for)


In [16]:
CTP = [0.001,0.01,0.1,1,10,100,1000]
GammaTP = [0.001,0.01,0.1,1,10,100,1000]
DepthTP = [1,2,3,4,5,6,7,8,9,10]
tuned_parameters = [{'C':CTP}]
LogRegGS = GridSearchCV(LogisticRegression(solver='liblinear'), tuned_parameters, cv=LeaveOneOut(), scoring='accuracy')
LogRegGS.fit(data[['Serial No.','GRE Score','TOEFL Score','University Rating','SOP','CGPA']]
                 ,data['Research'])
LogRegCBP = LogRegGS.best_params_.get('C')

tuned_parameters = [{'gamma':GammaTP}]
SVCGS = GridSearchCV(SVC(), tuned_parameters, cv=LeaveOneOut(), scoring='accuracy')
SVCGS.fit(data[['Serial No.','GRE Score','TOEFL Score','University Rating','SOP','CGPA']]
                 ,data['Research'])
SVCGammaBP = SVCGS.best_params_.get('gamma')

tuned_parameters = [{'max_depth':DepthTP}]
DecTreeGS = GridSearchCV(DecisionTreeClassifier(), tuned_parameters, cv=LeaveOneOut(), scoring='accuracy')
DecTreeGS.fit(data[['Serial No.','GRE Score','TOEFL Score','University Rating','SOP','CGPA']]
                 ,data['Research'])
DecTreeDepthBP = DecTreeGS.best_params_.get('max_depth')

In [17]:
print('Подобранные гиперпараметры:')
print('    Логистическая регрессия: параметр С: ',LogRegCBP)
print('    Метод Опорных Векторов: параметр gamma: ',SVCGammaBP)
print('    Дерево решений: параметр глубина: ',DecTreeDepthBP)

Подобранные гиперпараметры:
    Логистическая регрессия: параметр С:  1000
    Метод Опорных Векторов: параметр gamma:  0.001
    Дерево решений: параметр глубина:  1


In [18]:
LogReg = LogisticRegression(C=LogRegCBP, solver='liblinear')
SVCT = SVC(gamma = SVCGammaBP)
DecTree = DecisionTreeClassifier(max_depth = DecTreeDepthBP)

In [19]:
LogReg.fit(TrainX, TrainY)
SVCT.fit(TrainX, TrainY)
DecTree.fit(TrainX, TrainY)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=1,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [20]:
Result = []
Result.append(LogReg.predict(TestX))
Result.append(SVCT.predict(TestX))
Result.append(DecTree.predict(TestX))
for res in Result:
    print(accuracy_score(TestY,res))

0.775
0.75
0.7416666666666667


In [21]:
for res in Result:
    print(confusion_matrix(TestY,res), '\n')

[[39 12]
 [15 54]] 

[[40 11]
 [19 50]] 

[[47  4]
 [27 42]] 



In [22]:
for res in Result:
    print(classification_report(TestY,res), '\n')

              precision    recall  f1-score   support

           0       0.72      0.76      0.74        51
           1       0.82      0.78      0.80        69

   micro avg       0.78      0.78      0.78       120
   macro avg       0.77      0.77      0.77       120
weighted avg       0.78      0.78      0.78       120
 

              precision    recall  f1-score   support

           0       0.68      0.78      0.73        51
           1       0.82      0.72      0.77        69

   micro avg       0.75      0.75      0.75       120
   macro avg       0.75      0.75      0.75       120
weighted avg       0.76      0.75      0.75       120
 

              precision    recall  f1-score   support

           0       0.64      0.92      0.75        51
           1       0.91      0.61      0.73        69

   micro avg       0.74      0.74      0.74       120
   macro avg       0.77      0.77      0.74       120
weighted avg       0.79      0.74      0.74       120
 

