-
Notifications
You must be signed in to change notification settings - Fork 0
/
gradient_boosting.py
37 lines (28 loc) · 1.08 KB
/
gradient_boosting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import numpy as np
from sklearn import svm
from sklearn import cross_validation
import pandas as pd
from pprint import pprint
import sklearn.ensemble as ens
from sklearn.cross_validation import cross_val_score
print " GRADIENT BOOSTING METHOD!\n"
print "Starting to load data..."
test = pd.DataFrame.from_csv('test_cz.csv')
train = pd.DataFrame.from_csv('train_cz.csv')
train_labels = pd.read_csv('TrainLabels.csv')
submission = pd.read_csv('SampleSubmission.csv')
print "Data loaded successfully!\n"
sp = np.array(train_labels.values[:, 1].ravel(), dtype=int)
rfc = ens.GradientBoostingClassifier(n_estimators=500,learning_rate=0.05, max_features=0.25)
print "Starting to train..."
rfc.fit(train.values[:, :], sp)
print "Training finished!\n"
print "Starting cross-validation..."
scores = cross_val_score(rfc, train.values[:, :], sp)
print "Cross-validation accuracy: {}".format(scores.mean())
print "Predicting ..."
preds = rfc.predict_proba(test.values[:, :])
print "Predicted!\n"
preds = preds[:, 1]
submission['Prediction'] = preds
submission.to_csv('gbmv2_benchmark.csv', index=False)