In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from xgboost import XGBRFClassifier

from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.metrics import f1_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier

In [5]:
df = pd.read_csv('train.csv', index_col = 'ID')
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values
df['Class'].value_counts()

0    197982
1       383
Name: Class, dtype: int64

In [None]:
model = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 5678)
# 85.92% second shortest

In [6]:
model = XGBRFClassifier(n_estimators = 10, subsample = 0.9, solsample_bynode = 0.2)
# 85.1, fastest model took less than 5 minutes, used alot of computing power though

In [None]:
model = DecisionTreeClassifier()
# 78%

In [None]:
model = BaggingClassifier(n_estimators=10)
# 85.9 long asf

In [None]:
model = Pipeline([('s',StandardScaler()),('m',KNeighborsClassifier())])
# 85.41% long asf

In [None]:
model = ExtraTreesClassifier(n_estimators=10)
# 85.81 pretty long but not as fast as random forest

In [7]:
rskf = RepeatedStratifiedKFold(n_splits=10, random_state=1)
scores = []
counter = 1
for train_index, test_index in rskf.split(X, y):
    print("ITERATION: " + str(counter) + " ## TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    #score = cross_val_score(model, X_train, y_train, cv=rskf)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    scores.append(f1_score(y_test, y_pred))
    counter += 1

ITERATION: 1 ## TRAIN: [     0      1      2 ... 198361 198362 198364] TEST: [     4      9     27 ... 198315 198329 198363]
Parameters: { "solsample_bynode" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


ITERATION: 2 ## TRAIN: [     0      2      3 ... 198362 198363 198364] TEST: [     1      6     15 ... 198351 198353 198358]
Parameters: { "solsample_bynode" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


ITERATION: 3 ## TRAIN: [     1      2      3 ... 198361 198363 198364] TEST: [     0     45     71 ... 1

ITERATION: 16 ## TRAIN: [     0      1      2 ... 198361 198362 198363] TEST: [     8     10     13 ... 198351 198355 198364]
Parameters: { "solsample_bynode" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


ITERATION: 17 ## TRAIN: [     0      1      2 ... 198361 198362 198364] TEST: [    15     17     38 ... 198345 198347 198363]
Parameters: { "solsample_bynode" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


ITERATION: 18 ## TRAIN: [     0      2      3 ... 198362 198363 198364] TEST: [     1     18     21 ..

ITERATION: 31 ## TRAIN: [     0      1      2 ... 198362 198363 198364] TEST: [    18     37     66 ... 198330 198355 198356]
Parameters: { "solsample_bynode" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


ITERATION: 32 ## TRAIN: [     0      1      2 ... 198361 198363 198364] TEST: [    21     22     40 ... 198337 198346 198362]
Parameters: { "solsample_bynode" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


ITERATION: 33 ## TRAIN: [     0      3      4 ... 198361 198362 198363] TEST: [     1      2      7 ..

ITERATION: 46 ## TRAIN: [     0      1      2 ... 198362 198363 198364] TEST: [     4     43     56 ... 198357 198358 198359]
Parameters: { "solsample_bynode" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


ITERATION: 47 ## TRAIN: [     0      1      2 ... 198360 198362 198364] TEST: [    64     65     85 ... 198353 198361 198363]
Parameters: { "solsample_bynode" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


ITERATION: 48 ## TRAIN: [     0      1      2 ... 198362 198363 198364] TEST: [    16     40     42 ..

ITERATION: 61 ## TRAIN: [     0      1      2 ... 198362 198363 198364] TEST: [     5     17     19 ... 198333 198335 198340]
Parameters: { "solsample_bynode" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


ITERATION: 62 ## TRAIN: [     0      1      2 ... 198362 198363 198364] TEST: [    13     25     30 ... 198349 198350 198354]
Parameters: { "solsample_bynode" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


ITERATION: 63 ## TRAIN: [     0      1      2 ... 198362 198363 198364] TEST: [     8     20     23 ..

ITERATION: 76 ## TRAIN: [     0      1      2 ... 198362 198363 198364] TEST: [    26     33     39 ... 198340 198341 198359]
Parameters: { "solsample_bynode" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


ITERATION: 77 ## TRAIN: [     0      1      2 ... 198361 198363 198364] TEST: [     3     12     18 ... 198353 198354 198362]
Parameters: { "solsample_bynode" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


ITERATION: 78 ## TRAIN: [     0      1      2 ... 198362 198363 198364] TEST: [    11     19     66 ..

ITERATION: 91 ## TRAIN: [     0      1      2 ... 198362 198363 198364] TEST: [    14     26     30 ... 198321 198348 198354]
Parameters: { "solsample_bynode" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


ITERATION: 92 ## TRAIN: [     0      2      3 ... 198362 198363 198364] TEST: [     1     12     19 ... 198328 198342 198357]
Parameters: { "solsample_bynode" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


ITERATION: 93 ## TRAIN: [     0      1      2 ... 198362 198363 198364] TEST: [    24     40     49 ..

In [8]:
print(np.round(np.mean(scores),4) * 100)

85.1


In [None]:
type(model)

In [None]:
test = pd.read_csv('test.csv', index_col = 'ID')
pred = model.predict(test)
ans = pd.DataFrame(index = test.index)
ans = ans.assign(Class = pred)
ans.to_csv('Submission.csv')
ans

In [None]:
ans.value_counts()