In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [2]:
df = pd.read_csv('binary.csv')

In [3]:
df.head()

Unnamed: 0,admit,gre,gpa,rank
0,0,380,3.61,3
1,1,660,3.67,3
2,1,800,4.0,1
3,1,640,3.19,4
4,0,520,2.93,4


In [4]:
X = df[['gre', 'gpa', 'rank']]
y = df['admit']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.30, random_state=42)

In [6]:
stdsc = StandardScaler()
X_train_std = stdsc.fit_transform(X_train)
X_test_std = stdsc.transform(X_test)

### Sem regularização

In [7]:
model = SGDClassifier(penalty='none')
model.fit(X_train_std, y_train)
y_pred = model.predict(X_test_std)
acc = accuracy_score(y_test, y_pred)

print('Coef: {}\nIntercept: {}\nAccuracy: {}'.format(model.coef_, model.intercept_, acc))

Coef: [[ 4.51662719 -5.51408178 -4.22153869]]
Intercept: [-8.48670366]
Accuracy: 0.65




In [8]:
values_alpha = [0.001, 0.01, 1, 10, 100]
for i in values_alpha:
    print('------ ****** Alpha {} ****** ------'.format(i));
    #L1
    model_l1 = SGDClassifier(penalty='l1', random_state=0, alpha=i)
    model_l1.fit(X_train_std, y_train)
    y_pred = model_l1.predict(X_test_std)
    acc = accuracy_score(y_test, y_pred)
    
    print('L1\nCoef: {}\nIntercept: {}\nAccuracy: {}\n'.format(model_l1.coef_, model_l1.intercept_, acc))
    
    #L2
    model_l2 = SGDClassifier(penalty='l2', random_state=0, alpha=i)
    model_l2.fit(X_train_std, y_train)
    y_pred = model_l2.predict(X_test_std)
    acc = accuracy_score(y_test, y_pred)

    print('L2\nCoef: {}\nIntercept: {}\nAccuracy: {}\n'.format(model_l2.coef_, model_l2.intercept_, acc))
    
    #Elastic Net
    model_en = SGDClassifier(penalty='elasticnet', random_state=0, alpha=i)
    model_en.fit(X_train_std, y_train)
    y_pred = model_en.predict(X_test_std)
    acc = accuracy_score(y_test, y_pred)

    print('Elastic Net\nCoef: {}\nIntercept: {}\nAccuracy: {}\n\n'.format(model_en.coef_, model_en.intercept_, acc))

------ ****** Alpha 0.001 ****** ------
L1
Coef: [[ 0.         0.        -1.1531502]]
Intercept: [-0.05432506]
Accuracy: 0.5666666666666667

L2
Coef: [[ 0.42014238  0.70804928 -0.65946165]]
Intercept: [-0.85912656]
Accuracy: 0.6916666666666667

Elastic Net
Coef: [[ 0.40307689  0.         -0.67888559]]
Intercept: [-0.86542035]
Accuracy: 0.7083333333333334


------ ****** Alpha 0.01 ****** ------
L1
Coef: [[ 0.          0.         -0.03984293]]
Intercept: [-0.9252897]
Accuracy: 0.6833333333333333

L2
Coef: [[-0.04731102  0.10569639 -0.22124028]]
Intercept: [-0.99591171]
Accuracy: 0.6833333333333333

Elastic Net
Coef: [[ 0.          0.         -0.06396977]]
Intercept: [-0.90474453]
Accuracy: 0.6833333333333333


------ ****** Alpha 1 ****** ------
L1
Coef: [[0. 0. 0.]]
Intercept: [-0.99925628]
Accuracy: 0.6833333333333333

L2
Coef: [[-0.00151273  0.00050209 -0.00411768]]
Intercept: [-0.99883472]
Accuracy: 0.6833333333333333

Elastic Net
Coef: [[0. 0. 0.]]
Intercept: [-0.99927491]
Accuracy



### Elastic Net com alpha 0.001 é o que possui uma acurácia maior com 0.6916