In [5]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.grid_search import GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler


np.random.seed(42)

train = pd.read_csv('train.csv')
x_train = train.drop(['id', 'species'], axis=1).values
le = LabelEncoder().fit(train['species'])
y_train = le.transform(train['species'])

scaler = StandardScaler().fit(x_train)
x_train = scaler.transform(x_train)

params = {'C':[100,500, 1000, 10000], 'tol': [0.001, 0.0001,0.00001]}
log_reg = LogisticRegression(solver='lbfgs', multi_class='multinomial')
clf = GridSearchCV(log_reg, params, scoring='log_loss', refit='True', n_jobs=-1, cv=10)
clf.fit(x_train, y_train)

print("best params: " + str(clf.best_params_))
for params, mean_score, scores in clf.grid_scores_:
  print("%0.3f (+/-%0.03f) for %r" % (mean_score, scores.std(), params))
  print(scores)

test = pd.read_csv('test.csv')
test_ids = test.pop('id')
x_test = test.values
x_test = scaler.transform(x_test)

y_test = clf.predict_proba(x_test)

submission = pd.DataFrame(y_test, index=test_ids, columns=le.classes_)
submission.to_csv('submission10.csv')

best params: {'C': 10000, 'tol': 0.0001}
-0.033 (+/-0.022) for {'C': 100, 'tol': 0.001}
[-0.03890676 -0.06810568 -0.00726642 -0.03960359 -0.03890152 -0.03990723
 -0.06514724 -0.01044165 -0.00688661 -0.0130237 ]
-0.033 (+/-0.022) for {'C': 100, 'tol': 0.0001}
[-0.03878382 -0.0696209  -0.00719814 -0.03988442 -0.03934758 -0.04012912
 -0.06514724 -0.01052128 -0.00653116 -0.01296899]
-0.033 (+/-0.022) for {'C': 100, 'tol': 1e-05}
[-0.03878382 -0.0696209  -0.00719814 -0.03988442 -0.03934758 -0.04012912
 -0.06514724 -0.01052128 -0.00653116 -0.01296899]
-0.029 (+/-0.022) for {'C': 500, 'tol': 0.001}
[-0.0423761  -0.06162847 -0.00364373 -0.02864003 -0.03966322 -0.0325184
 -0.06695879 -0.00663288 -0.00419572 -0.00789675]
-0.030 (+/-0.023) for {'C': 500, 'tol': 0.0001}
[-0.03827506 -0.0651127  -0.00382025 -0.03376723 -0.03902954 -0.03549898
 -0.06821205 -0.00634474 -0.00367696 -0.00766159]
-0.030 (+/-0.023) for {'C': 500, 'tol': 1e-05}
[-0.03827506 -0.0651127  -0.00382025 -0.03376723 -0.03902954 