# Imports

In [34]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Perceptron
from sklearn.metrics import f1_score
from sklearn.model_selection import GridSearchCV

# Train

In [7]:
train_set = pd.read_csv('train/train_encoded.csv')

In [8]:
X, y = train_set.iloc[:,:-1],train_set.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# Perceptron

In [13]:
model = Perceptron()
model.fit(X_train, y_train)
preds = model.predict(X_test)
f1 = np.sqrt(f1_score(y_test, preds))
print("F1-Mean Score: %f" % (f1))

F1-Mean Score: 0.319953


In [9]:
model = Perceptron(class_weight='balanced')
model.fit(X_train, y_train)
preds = model.predict(X_test)
f1 = np.sqrt(f1_score(y_test, preds))
print("F1-Mean Score: %f" % (f1))

F1-Mean Score: 0.774677


### L1

In [14]:
model = Perceptron(penalty='l1')
model.fit(X_train, y_train)
preds = model.predict(X_test)
f1 = np.sqrt(f1_score(y_test, preds))
print("F1-Mean Score: %f" % (f1))

F1-Mean Score: 0.383131


In [10]:
model = Perceptron(penalty='l1', class_weight='balanced')
model.fit(X_train, y_train)
preds = model.predict(X_test)
f1 = np.sqrt(f1_score(y_test, preds))
print("F1-Mean Score: %f" % (f1))

F1-Mean Score: 0.675365


In [48]:
model = Perceptron(penalty='l1', class_weight='balanced', alpha=0.1)
model.fit(X_train, y_train)
preds = model.predict(X_test)
f1 = np.sqrt(f1_score(y_test, preds))
print("F1-Mean Score: %f" % (f1))

F1-Mean Score: 0.775326


In [46]:
model = Perceptron(penalty='l1', class_weight='balanced', alpha=0.1, eta0=2)
model.fit(X_train, y_train)
preds = model.predict(X_test)
f1 = np.sqrt(f1_score(y_test, preds))
print("F1-Mean Score: %f" % (f1))

F1-Mean Score: 0.775326


In [19]:
model = Perceptron(penalty='l1', class_weight='balanced', alpha=0.01)
model.fit(X_train, y_train)
preds = model.predict(X_test)
f1 = np.sqrt(f1_score(y_test, preds))
print("F1-Mean Score: %f" % (f1))

F1-Mean Score: 0.457306


In [22]:
model = Perceptron(penalty='l1', class_weight='balanced', alpha=0.001)
model.fit(X_train, y_train)
preds = model.predict(X_test)
f1 = np.sqrt(f1_score(y_test, preds))
print("F1-Mean Score: %f" % (f1))

F1-Mean Score: 0.000000


In [21]:
model = Perceptron(penalty='l1', class_weight='balanced', alpha=0.0001)
model.fit(X_train, y_train)
preds = model.predict(X_test)
f1 = np.sqrt(f1_score(y_test, preds))
print("F1-Mean Score: %f" % (f1))

F1-Mean Score: 0.675365


In [None]:
model = Perceptron(penalty='l1', class_weight='balanced', alpha=0.1)
model.fit(X_train, y_train)
preds = model.predict(X_test)
f1 = np.sqrt(f1_score(y_test, preds))
print("F1-Mean Score: %f" % (f1))

## L2

In [15]:
model = Perceptron(penalty='l2')
model.fit(X_train, y_train)
preds = model.predict(X_test)
f1 = np.sqrt(f1_score(y_test, preds))
print("F1-Mean Score: %f" % (f1))

F1-Mean Score: 0.262865


In [11]:
model = Perceptron(penalty='l2', class_weight='balanced')
model.fit(X_train, y_train)
preds = model.predict(X_test)
f1 = np.sqrt(f1_score(y_test, preds))
print("F1-Mean Score: %f" % (f1))

F1-Mean Score: 0.772840


## Elasticnet

In [17]:
model = Perceptron(penalty='elasticnet')
model.fit(X_train, y_train)
preds = model.predict(X_test)
f1 = np.sqrt(f1_score(y_test, preds))
print("F1-Mean Score: %f" % (f1))

F1-Mean Score: 0.262865


In [12]:
model = Perceptron(penalty='elasticnet', class_weight='balanced')
model.fit(X_train, y_train)
preds = model.predict(X_test)
f1 = np.sqrt(f1_score(y_test, preds))
print("F1-Mean Score: %f" % (f1))

F1-Mean Score: 0.772840


# Grid Search

In [36]:
params = {
    'penalty': ['l2', 'l1', 'elasticnet'],
    'alpha': [1, 0.1, 0.01, 0.001],
    'fit_intercept': [True, False],
    'max_iter': [1000, 2000, 5000, 10000],
    'eta0': [1, 2, 5, 10, 50],
    'n_jobs': [-1],
    'n_iter_no_change': [5, 10, 20],
    'class_weight': ['balanced']
}

In [37]:
grid_model = GridSearchCV(estimator=Perceptron(), param_grid=params)
result = grid_model.fit(X_train, y_train)

In [38]:
grid_model.best_params_

{'alpha': 0.001,
 'class_weight': 'balanced',
 'eta0': 1,
 'fit_intercept': False,
 'max_iter': 1000,
 'n_iter_no_change': 20,
 'n_jobs': -1,
 'penalty': 'l1'}

In [39]:
preds = grid_model.predict(X_test)
f1 = np.sqrt(f1_score(y_test, preds))
print("F-Mean Score: %f" % (f1))

F-Mean Score: 0.681401


In [40]:
model = Perceptron(penalty='l1', n_iter_no_change=20, max_iter=1000, eta0=1, alpha=0.001,\
                   class_weight='balanced', fit_intercept=False)
model.fit(X_train, y_train)
preds = model.predict(X_test)
f1 = np.sqrt(f1_score(y_test, preds))
print("F1-Mean Score: %f" % (f1))

F1-Mean Score: 0.681401


# Test

In [49]:
test_set = pd.read_csv('test/test_encoded.csv')

In [50]:
col_id = test_set['id']
test_set.drop('id', axis=1, inplace=True)

In [51]:
preds = model.predict(test_set)
preds

array([1, 1, 1, ..., 1, 1, 1])

In [52]:
test_set['id'] = col_id

In [53]:
test_set['target'] = preds.astype('int64')
test_set[['id', 'target']]

Unnamed: 0,id,target
0,0,1
1,2,1
2,3,1
3,9,1
4,11,1
...,...,...
3258,10861,1
3259,10865,1
3260,10868,1
3261,10874,1


In [54]:
test_set[['id', 'target']].to_csv('result.csv', header=True, index=False)