In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
import glob
import os
import math
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn import preprocessing
pd.set_option('display.max_columns', 200)

In [2]:
train_df = pd.read_csv('train.csv')

# 基礎確認

In [3]:
train_df.head()

Unnamed: 0,index,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,native-country,Y
0,322,21,Private,132652,Some-college,10,Divorced,Adm-clerical,Own-child,White,Female,United-States,0
1,11968,29,Private,132652,HS-grad,9,Divorced,Adm-clerical,Not-in-family,White,Female,United-States,0
2,10868,19,Private,132652,Bachelors,13,Never-married,Prof-specialty,Not-in-family,White,Female,United-States,0
3,3394,17,Private,132652,HS-grad,9,Never-married,Craft-repair,Own-child,White,Male,United-States,0
4,15993,47,Private,132652,Some-college,10,Married-civ-spouse,Craft-repair,Husband,White,Male,United-States,0


In [4]:
train_df.shape

(11900, 13)

In [5]:
train_df.describe()

Unnamed: 0,index,age,fnlwgt,education-num,Y
count,11900.0,11900.0,11900.0,11900.0,11900.0
mean,8474.659076,27.958319,130706.714118,10.38437,0.256134
std,4927.91637,9.563625,14218.695201,2.209696,0.436515
min,0.0,17.0,24694.0,3.0,0.0
25%,4181.75,20.0,132652.0,9.0,0.0
50%,8453.5,24.0,132652.0,10.0,0.0
75%,12761.25,34.0,132652.0,13.0,1.0
max,16997.0,64.0,132652.0,16.0,1.0


In [6]:
train_df["workclass"].value_counts()

Private             9237
Self-emp-not-inc     854
Local-gov            583
?                    527
State-gov            298
Self-emp-inc         237
Federal-gov          164
Name: workclass, dtype: int64

In [7]:
train_df["fnlwgt"].value_counts()

132652    11681
24694       169
34572        50
Name: fnlwgt, dtype: int64

In [8]:
train_df['fnlwgt'] = train_df['fnlwgt'].astype(str)
train_df['fnlwgt'].value_counts()

132652    11681
24694       169
34572        50
Name: fnlwgt, dtype: int64

In [9]:
train_df["education"].value_counts()

HS-grad         4034
Some-college    2813
Bachelors       2235
Masters          805
Assoc-voc        641
Assoc-acdm       305
11th             302
Prof-school      234
10th             173
7th-8th          128
9th              107
12th              71
5th-6th           51
1st-4th            1
Name: education, dtype: int64

In [10]:
train_df["marital-status"].value_counts()

Married-civ-spouse    6004
Never-married         3392
Divorced              1911
Separated              312
Widowed                281
Name: marital-status, dtype: int64

In [11]:
train_df["occupation"].value_counts()

Adm-clerical         1730
Craft-repair         1616
Sales                1546
Prof-specialty       1536
Exec-managerial      1493
Other-service         924
Machine-op-inspct     626
Handlers-cleaners     603
?                     601
Tech-support          403
Farming-fishing       333
Transport-moving      286
Protective-serv       203
Name: occupation, dtype: int64

In [12]:
train_df["relationship"].value_counts()

Husband           5400
Not-in-family     2502
Own-child         2017
Unmarried         1249
Wife               529
Other-relative     203
Name: relationship, dtype: int64

In [13]:
train_df["race"].value_counts()

White                 10978
Black                   677
Asian-Pac-Islander      245
Name: race, dtype: int64

In [14]:
train_df["native-country"].value_counts()

United-States    11869
Mexico              25
Philippines          6
Name: native-country, dtype: int64

# 前処理

In [15]:
train_df.shape

(11900, 13)

In [16]:
train_df['fnlwgt']=train_df['fnlwgt'].astype(str)

In [17]:
train_df.dtypes

index              int64
age                int64
workclass         object
fnlwgt            object
education         object
education-num      int64
marital-status    object
occupation        object
relationship      object
race              object
sex               object
native-country    object
Y                  int64
dtype: object

In [18]:
test_df = pd.read_csv('test.csv')
test_df['fnlwgt']=test_df['fnlwgt'].astype(str)

In [19]:
test_df.dtypes

index              int64
age                int64
workclass         object
fnlwgt            object
education         object
education-num      int64
marital-status    object
occupation        object
relationship      object
race              object
sex               object
native-country    object
dtype: object

In [20]:
test_df.head()

Unnamed: 0,index,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,native-country
0,3873,17,Local-gov,132652,Masters,14,Married-civ-spouse,Prof-specialty,Husband,White,Male,United-States
1,3625,23,Private,132652,Some-college,10,Married-civ-spouse,Sales,Husband,White,Male,United-States
2,3028,19,Private,132652,11th,7,Never-married,Handlers-cleaners,Own-child,White,Female,United-States
3,13814,30,State-gov,132652,HS-grad,9,Never-married,Protective-serv,Unmarried,Black,Female,United-States
4,15398,60,Private,132652,Bachelors,13,Married-civ-spouse,Prof-specialty,Husband,White,Male,United-States


In [21]:
test_df.shape

(5100, 12)

In [22]:
concat_df = pd.concat([train_df.iloc[:, 0:12], test_df])
concat_df.head()

Unnamed: 0,index,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,native-country
0,322,21,Private,132652,Some-college,10,Divorced,Adm-clerical,Own-child,White,Female,United-States
1,11968,29,Private,132652,HS-grad,9,Divorced,Adm-clerical,Not-in-family,White,Female,United-States
2,10868,19,Private,132652,Bachelors,13,Never-married,Prof-specialty,Not-in-family,White,Female,United-States
3,3394,17,Private,132652,HS-grad,9,Never-married,Craft-repair,Own-child,White,Male,United-States
4,15993,47,Private,132652,Some-college,10,Married-civ-spouse,Craft-repair,Husband,White,Male,United-States


In [23]:
concat_df.dtypes

index              int64
age                int64
workclass         object
fnlwgt            object
education         object
education-num      int64
marital-status    object
occupation        object
relationship      object
race              object
sex               object
native-country    object
dtype: object

In [24]:
concat_df['fnlwgt']

0       132652
1       132652
2       132652
3       132652
4       132652
5       132652
6       132652
7       132652
8       132652
9       132652
10      132652
11      132652
12      132652
13      132652
14      132652
15      132652
16      132652
17      132652
18       24694
19      132652
20      132652
21      132652
22      132652
23      132652
24      132652
25      132652
26      132652
27      132652
28      132652
29      132652
         ...  
5070    132652
5071    132652
5072    132652
5073    132652
5074    132652
5075    132652
5076    132652
5077     24694
5078    132652
5079    132652
5080    132652
5081    132652
5082    132652
5083    132652
5084    132652
5085    132652
5086    132652
5087    132652
5088    132652
5089    132652
5090    132652
5091    132652
5092    132652
5093    132652
5094    132652
5095    132652
5096    132652
5097    132652
5098    132652
5099    132652
Name: fnlwgt, Length: 17000, dtype: object

In [25]:
for val in concat_df['fnlwgt'].unique():
    print(f':{val}:')

:132652:
:24694:
:34572:


In [26]:
concat_df = pd.get_dummies(concat_df)

In [27]:
train_rev_df = concat_df.iloc[0:11900, :]
train_rev_df.head()

Unnamed: 0,index,age,education-num,workclass_?,workclass_Federal-gov,workclass_Local-gov,workclass_Private,workclass_Self-emp-inc,workclass_Self-emp-not-inc,workclass_State-gov,fnlwgt_132652,fnlwgt_24694,fnlwgt_34572,education_10th,education_11th,education_12th,education_1st-4th,education_5th-6th,education_7th-8th,education_9th,education_Assoc-acdm,education_Assoc-voc,education_Bachelors,education_HS-grad,education_Masters,education_Prof-school,education_Some-college,marital-status_Divorced,marital-status_Married-civ-spouse,marital-status_Never-married,marital-status_Separated,marital-status_Widowed,occupation_?,occupation_Adm-clerical,occupation_Craft-repair,occupation_Exec-managerial,occupation_Farming-fishing,occupation_Handlers-cleaners,occupation_Machine-op-inspct,occupation_Other-service,occupation_Prof-specialty,occupation_Protective-serv,occupation_Sales,occupation_Tech-support,occupation_Transport-moving,relationship_Husband,relationship_Not-in-family,relationship_Other-relative,relationship_Own-child,relationship_Unmarried,relationship_Wife,race_Asian-Pac-Islander,race_Black,race_White,sex_Female,sex_Male,native-country_Mexico,native-country_Philippines,native-country_United-States
0,322,21,10,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1
1,11968,29,9,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1
2,10868,19,13,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1
3,3394,17,9,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1
4,15993,47,10,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1


In [28]:
train_rev_df.shape

(11900, 59)

In [29]:
train_rev_df['Y'] = train_df['Y']
train_rev_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,index,age,education-num,workclass_?,workclass_Federal-gov,workclass_Local-gov,workclass_Private,workclass_Self-emp-inc,workclass_Self-emp-not-inc,workclass_State-gov,fnlwgt_132652,fnlwgt_24694,fnlwgt_34572,education_10th,education_11th,education_12th,education_1st-4th,education_5th-6th,education_7th-8th,education_9th,education_Assoc-acdm,education_Assoc-voc,education_Bachelors,education_HS-grad,education_Masters,education_Prof-school,education_Some-college,marital-status_Divorced,marital-status_Married-civ-spouse,marital-status_Never-married,marital-status_Separated,marital-status_Widowed,occupation_?,occupation_Adm-clerical,occupation_Craft-repair,occupation_Exec-managerial,occupation_Farming-fishing,occupation_Handlers-cleaners,occupation_Machine-op-inspct,occupation_Other-service,occupation_Prof-specialty,occupation_Protective-serv,occupation_Sales,occupation_Tech-support,occupation_Transport-moving,relationship_Husband,relationship_Not-in-family,relationship_Other-relative,relationship_Own-child,relationship_Unmarried,relationship_Wife,race_Asian-Pac-Islander,race_Black,race_White,sex_Female,sex_Male,native-country_Mexico,native-country_Philippines,native-country_United-States,Y
0,322,21,10,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0
1,11968,29,9,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0
2,10868,19,13,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0
3,3394,17,9,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0
4,15993,47,10,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0


In [30]:
test_rev_df = concat_df.iloc[11900:, :]
test_rev_df.head()

Unnamed: 0,index,age,education-num,workclass_?,workclass_Federal-gov,workclass_Local-gov,workclass_Private,workclass_Self-emp-inc,workclass_Self-emp-not-inc,workclass_State-gov,fnlwgt_132652,fnlwgt_24694,fnlwgt_34572,education_10th,education_11th,education_12th,education_1st-4th,education_5th-6th,education_7th-8th,education_9th,education_Assoc-acdm,education_Assoc-voc,education_Bachelors,education_HS-grad,education_Masters,education_Prof-school,education_Some-college,marital-status_Divorced,marital-status_Married-civ-spouse,marital-status_Never-married,marital-status_Separated,marital-status_Widowed,occupation_?,occupation_Adm-clerical,occupation_Craft-repair,occupation_Exec-managerial,occupation_Farming-fishing,occupation_Handlers-cleaners,occupation_Machine-op-inspct,occupation_Other-service,occupation_Prof-specialty,occupation_Protective-serv,occupation_Sales,occupation_Tech-support,occupation_Transport-moving,relationship_Husband,relationship_Not-in-family,relationship_Other-relative,relationship_Own-child,relationship_Unmarried,relationship_Wife,race_Asian-Pac-Islander,race_Black,race_White,sex_Female,sex_Male,native-country_Mexico,native-country_Philippines,native-country_United-States
0,3873,17,14,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1
1,3625,23,10,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1
2,3028,19,7,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1
3,13814,30,9,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,1
4,15398,60,13,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1


In [31]:
train_rev_df.shape

(11900, 60)

In [32]:
# 0がやや多いので比率は調整する必要あり
train_rev_df['Y'].value_counts()

0    8852
1    3048
Name: Y, dtype: int64

In [33]:
train_rev_df.iloc[:, :-1].head()

Unnamed: 0,index,age,education-num,workclass_?,workclass_Federal-gov,workclass_Local-gov,workclass_Private,workclass_Self-emp-inc,workclass_Self-emp-not-inc,workclass_State-gov,fnlwgt_132652,fnlwgt_24694,fnlwgt_34572,education_10th,education_11th,education_12th,education_1st-4th,education_5th-6th,education_7th-8th,education_9th,education_Assoc-acdm,education_Assoc-voc,education_Bachelors,education_HS-grad,education_Masters,education_Prof-school,education_Some-college,marital-status_Divorced,marital-status_Married-civ-spouse,marital-status_Never-married,marital-status_Separated,marital-status_Widowed,occupation_?,occupation_Adm-clerical,occupation_Craft-repair,occupation_Exec-managerial,occupation_Farming-fishing,occupation_Handlers-cleaners,occupation_Machine-op-inspct,occupation_Other-service,occupation_Prof-specialty,occupation_Protective-serv,occupation_Sales,occupation_Tech-support,occupation_Transport-moving,relationship_Husband,relationship_Not-in-family,relationship_Other-relative,relationship_Own-child,relationship_Unmarried,relationship_Wife,race_Asian-Pac-Islander,race_Black,race_White,sex_Female,sex_Male,native-country_Mexico,native-country_Philippines,native-country_United-States
0,322,21,10,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1
1,11968,29,9,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1
2,10868,19,13,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1
3,3394,17,9,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1
4,15993,47,10,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1


# 学習

In [34]:
import lightgbm
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, precision_score, recall_score

In [35]:
x_train, x_val, y_train, y_val = train_test_split(train_rev_df.iloc[:, 1:-1], train_rev_df.iloc[:, -1], test_size=0.3, random_state=42)

In [36]:
print(f'x_train:shape: {x_train.shape}, x_val:shape: {x_val.shape}, y_train:{y_train.shape}, y_val:{y_val.shape}')

x_train:shape: (8330, 58), x_val:shape: (3570, 58), y_train:(8330,), y_val:(3570,)


In [37]:
x_train.head()

Unnamed: 0,age,education-num,workclass_?,workclass_Federal-gov,workclass_Local-gov,workclass_Private,workclass_Self-emp-inc,workclass_Self-emp-not-inc,workclass_State-gov,fnlwgt_132652,fnlwgt_24694,fnlwgt_34572,education_10th,education_11th,education_12th,education_1st-4th,education_5th-6th,education_7th-8th,education_9th,education_Assoc-acdm,education_Assoc-voc,education_Bachelors,education_HS-grad,education_Masters,education_Prof-school,education_Some-college,marital-status_Divorced,marital-status_Married-civ-spouse,marital-status_Never-married,marital-status_Separated,marital-status_Widowed,occupation_?,occupation_Adm-clerical,occupation_Craft-repair,occupation_Exec-managerial,occupation_Farming-fishing,occupation_Handlers-cleaners,occupation_Machine-op-inspct,occupation_Other-service,occupation_Prof-specialty,occupation_Protective-serv,occupation_Sales,occupation_Tech-support,occupation_Transport-moving,relationship_Husband,relationship_Not-in-family,relationship_Other-relative,relationship_Own-child,relationship_Unmarried,relationship_Wife,race_Asian-Pac-Islander,race_Black,race_White,sex_Female,sex_Male,native-country_Mexico,native-country_Philippines,native-country_United-States
2602,17,9,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1
6621,28,10,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,1
511,32,9,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1
11235,23,9,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1
6189,36,13,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1


In [65]:
lgb_clf = lightgbm.LGBMClassifier(max_depth=3)
lgb_clf.fit(x_train, y_train)

LGBMClassifier(max_depth=3)

In [66]:
y_pred = lgb_clf.predict(x_val)
accuracy_score(y_val, y_pred)

0.8436974789915966

In [67]:
y_pred = lgb_clf.predict(test_rev_df.iloc[:, 1:])
y_pred

array([1, 0, 0, ..., 0, 0, 1], dtype=int64)

In [68]:
len(y_pred)

5100

In [69]:
submit_df = pd.read_csv('sample_submit.csv', header=None)
submit_df.head()

Unnamed: 0,0,1
0,3873,0
1,3625,0
2,3028,0
3,13814,0
4,15398,0


In [70]:
submit_df['2'] = y_pred
submit_df.head()

Unnamed: 0,0,1,2
0,3873,0,1
1,3625,0,0
2,3028,0,0
3,13814,0,0
4,15398,0,1


In [71]:
submit_df = submit_df.drop(submit_df.columns[[1]], axis=1)

In [72]:
submit_df.to_csv("submit3.csv", sep=",", header=None, index=0)

# サンプリング数の変更

In [77]:
from imblearn.over_sampling import SMOTE

In [78]:
x_train.shape

(8330, 58)

In [79]:
smote = SMOTE()
x_train_resampled, y_train_resampled = smote.fit_sample(x_train, y_train)

In [80]:
x_train_resampled.shape, y_train_resampled.shape

((12388, 58), (12388,))

In [81]:
y_train_resampled.value_counts()

1    6194
0    6194
Name: Y, dtype: int64

In [82]:
lgb_clf = lightgbm.LGBMClassifier(max_depth=20)
lgb_clf.fit(x_train_resampled, y_train_resampled)
y_pred = lgb_clf.predict(x_val)
accuracy_score(y_val, y_pred)



0.8243697478991596

In [83]:
y_pred = lgb_clf.predict(test_rev_df.iloc[:, 1:])
y_pred

array([1, 1, 0, ..., 0, 0, 1], dtype=int64)

In [84]:
submit_df = pd.read_csv('sample_submit.csv', header=None)
submit_df['2'] = y_pred
submit_df = submit_df.drop(submit_df.columns[[1]], axis=1)

In [85]:
submit_df.to_csv("submit_smote.csv", sep=",", header=None, index=0)