# Prediction with different classifiers

**Plan :**

[1. Loading the libraries and the data](#1)  
[2. Predictions with different classifiers](#2)  
> [A - SVM](#2a)  
> [B - Random forest](#2b)  
> [C - Random forest with AdaBoost](#2c)   
> [D - Extra tree classifier](#2d)    
> [E - Extra gradient boosting](#2e)    
> [F - LightGBM classifier](#2f)    
> [G - Neural network](#2g)  
> [H - Convolutional neural network](#2h)  

<a id="1"></a>
## 1. Loading the libraries and the data

In [1]:
import random
import numpy as np
import igraph
from sklearn import svm
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn import preprocessing
import nltk
import csv
import pandas as pd
import lightgbm as lgb
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

In [2]:
random.seed(0)

### Loading the features created in the feature engineering code

In [3]:
train = pd.read_csv('train_complete.csv', header = 0)
test = pd.read_csv('test_complete.csv', header = 0)

In [4]:
features = ['Title overlap', 'Abstract overlap', 'Temporal difference', 'Common authors', 'Common journal',
            'Cosine similarity', 'Authors in abstract', 'LSA distance', # Semantic features
            'Betweenness centrality', 'Same cluster', 'Page rank', 'Ressource allocation', 'Jaccard coefficient',
            'Adamic Adar', 'Preferential attachment', 'Target_indegree', 'Target_outdegree',
            'Source_indegree', 'Source_outdegree', 'Common_in', 'Common_out' #Topological features
           ]

In [5]:
training_features = train[features]
testing_features = test[features]

In [6]:
labels_array = train['Edge']

In [7]:
training_features.head()

Unnamed: 0,Title overlap,Abstract overlap,Temporal difference,Common authors,Common journal,Cosine similarity,Authors in abstract,LSA distance,Betweenness centrality,Same cluster,...,Ressource allocation,Jaccard coefficient,Adamic Adar,Preferential attachment,Target_indegree,Target_outdegree,Source_indegree,Source_outdegree,Common_in,Common_out
0,2,4,0,0,1,0.039132,0,0.176262,8166.884091,0,...,0.142857,0.058824,0.513898,72,3.0,3.0,8.0,4.0,0.0,0.0
1,1,7,1,0,0,0.015247,0,0.185291,31162.082411,1,...,0.226401,0.097087,4.320366,11613,11.0,68.0,124.0,23.0,0.0,0.0
2,0,6,-2,0,0,0.008888,0,0.281976,-10559.734281,1,...,0.0,0.0,0.0,5,1.0,0.0,2.0,3.0,0.0,0.0
3,0,8,-4,0,0,0.00474,0,0.306004,611.223395,0,...,0.0,0.0,0.0,280,4.0,16.0,2.0,12.0,0.0,0.0
4,0,8,-5,0,0,0.027379,0,0.218788,-501.379284,1,...,0.0,0.0,0.0,168,7.0,0.0,2.0,22.0,0.0,0.0


In [8]:
testing_features.head()

Unnamed: 0,Title overlap,Abstract overlap,Temporal difference,Common authors,Common journal,Cosine similarity,Authors in abstract,LSA distance,Betweenness centrality,Same cluster,...,Ressource allocation,Jaccard coefficient,Adamic Adar,Preferential attachment,Target_indegree,Target_outdegree,Source_indegree,Source_outdegree,Common_in,Common_out
0,0,7,0,0,0,0.055452,0,0.137422,213620.5,0,...,0.0,0.0,0.0,1062,49.0,10.0,3.0,15.0,0.0,0.0
1,2,6,1,0,1,0.11067,0,0.246302,1121793.0,1,...,0.311535,0.074303,5.377973,13590,100.0,203.0,39.0,6.0,0.0,0.0
2,1,4,2,0,1,0.043831,0,0.281266,230572.7,0,...,1.342594,0.065338,15.053612,164797,209.0,14.0,726.0,13.0,0.0,0.0
3,1,13,0,0,1,0.054856,0,0.251033,698948.6,1,...,0.298419,0.221053,4.899424,3315,11.0,40.0,16.0,49.0,0.0,0.0
4,0,4,5,0,0,0.147222,0,0.199901,-26005.92,0,...,0.0,0.0,0.0,1050,1.0,6.0,144.0,6.0,0.0,0.0


In [9]:
print training_features.shape
print testing_features.shape

(615512, 21)
(32648, 21)


In [10]:
nltk.download('punkt') # for tokenization
nltk.download('stopwords')
stpwds = set(nltk.corpus.stopwords.words("english"))
stemmer = nltk.stem.PorterStemmer()

with open("data/testing_set.txt", "r") as f:
    reader = csv.reader(f)
    testing_set  = list(reader)

testing_set = [element[0].split(" ") for element in testing_set]

###################
# random baseline #
###################

random_predictions = np.random.choice([0, 1], size=len(testing_set))
random_predictions = zip(range(len(testing_set)),random_predictions)

with open("data/random_predictions.csv","wb") as pred:
    csv_out = csv.writer(pred)
    for row in random_predictions:
        csv_out.writerow(row)
        
# note: Kaggle requires that you add "ID" and "category" column headers

###############################
# beating the random baseline #
###############################

# the following script gets an F1 score of approximately 0.66

# data loading and preprocessing 

# the columns of the data frame below are: 
# (1) paper unique ID (integer)
# (2) publication year (integer)
# (3) paper title (string)
# (4) authors (strings separated by ,)
# (5) name of journal (optional) (string)
# (6) abstract (string) - lowercased, free of punctuation except intra-word dashes

with open("data/training_set.txt", "r") as f:
    reader = csv.reader(f)
    training_set  = list(reader)

training_set = [element[0].split(" ") for element in training_set]

with open("data/node_information.csv", "r") as f:
    reader = csv.reader(f)
    node_info  = list(reader)

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/delavergne/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/delavergne/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


<a id="2"></a>
## 2. Predictions with different classifiers

### Scaling the data

In [11]:
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler()
training_features = min_max_scaler.fit_transform(training_features)
testing_features = min_max_scaler.transform(testing_features)

<a id="2a"></a>
## A - SVM

In [12]:
# initialize basic SVM
classifier = svm.LinearSVC()

In [15]:
from sklearn import cross_validation
np.mean(cross_validation.cross_val_score(classifier, training_features, labels_array, cv=3, scoring ="f1"))



0.96441523689045106

In [16]:
%%time
#train
classifier.fit(training_features, labels_array)

CPU times: user 12.4 s, sys: 333 ms, total: 12.7 s
Wall time: 13.1 s


LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)

In [17]:
# issue predictions
predictions_SVM = list(classifier.predict(testing_features))

# write predictions to .csv file suitable for Kaggle (just make sure to add the column names)
predictions_SVM = zip(range(len(testing_set)), predictions_SVM)
test = pd.DataFrame(predictions_SVM)
sub = test.copy()
sub['id']=sub.index
sub['category'] = sub[1]
sub = sub[['id', 'category']]

In [18]:
sub.head(10)

Unnamed: 0,id,category
0,0,0
1,1,1
2,2,1
3,3,1
4,4,1
5,5,0
6,6,0
7,7,1
8,8,0
9,9,1


In [None]:
#sub.to_csv('sub/sub_SVM.csv', index=False)

<a id="2b"></a>
## B - Random forest 

In [19]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier

In [22]:
#%%time
# Random Forest
#for i in np.linspace(300, 400, num = 10).tolist():
#    RF = RandomForestClassifier(n_estimators=int(i), 
#                            max_features = 5,
#                            max_depth = 7)
#    scores = cross_validation.cross_val_score(RF, training_features, labels_array, cv=5, scoring = "f1")
#
#    print 'trees: ', i, ' score: ', np.mean(scores)

In [23]:
%%time
RF = RandomForestClassifier(max_features = 5,
                            n_estimators = 400,
                            max_depth = 7)
RF.fit(training_features, labels_array)

CPU times: user 10min 5s, sys: 5.17 s, total: 10min 10s
Wall time: 10min 12s


In [24]:
predictions_RF_pr = RF.predict_proba(testing_features)

In [25]:
predictions_RF_cutoff = []
for pred in predictions_RF_pr:
    if pred[1] >=0.7:
        predictions_RF_cutoff.append(1)
    else:
        predictions_RF_cutoff.append(0)

In [26]:
sub['category'] = predictions_RF_cutoff

In [27]:
sub.head(10)

Unnamed: 0,id,category
0,0,0
1,1,1
2,2,1
3,3,1
4,4,0
5,5,0
6,6,0
7,7,1
8,8,0
9,9,1


In [None]:
#sub.to_csv('sub/sub_RF.csv', index=False)

<a id="2c"></a>
## C - Random forest with AdaBoost

In [28]:
#for i in np.linspace(10, 200, num = 20).tolist():
#    clf = AdaBoostClassifier(base_estimator = RandomForestClassifier(n_estimators = 400,
#                            max_features = 5,
#                            max_depth = 7), n_estimators=int(i))
#    scores = cross_validation.cross_val_score(clf, training_features, labels_array, cv=3, scoring = 'f1')
#    print 'estimators: ', i, ' score: ', np.mean(scores)

In [29]:
%%time
clf = AdaBoostClassifier(base_estimator = RandomForestClassifier(n_estimators = 170, max_depth = 7, max_features = 5),
                         n_estimators=50)
clf.fit(training_features, labels_array)

KeyboardInterrupt: 

In [None]:
predictions_Ada = clf.predict(testing_features)

In [None]:
sub['category'] = predictions_Ada

In [None]:
sub.head(10)

In [None]:
#sub.to_csv('sub/sub_RFwAdaB.csv', index=False)

<a id="2d"></a>
## D- Extra tree classifier

In [30]:
from sklearn.ensemble import ExtraTreesClassifier
xrf = ExtraTreesClassifier(max_features=None, min_samples_leaf= 20, n_estimators = 300, n_jobs= 3)

In [31]:
%%time
xrf.fit(training_features, labels_array)

CPU times: user 14min 31s, sys: 4.92 s, total: 14min 36s
Wall time: 4min 54s


ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
           max_depth=None, max_features=None, max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=20, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=300, n_jobs=3,
           oob_score=False, random_state=None, verbose=0, warm_start=False)

In [32]:
proba_xrf = xrf.predict_proba(testing_features)

In [33]:
predictions_xrf_cutoff = []
for pred in proba_xrf:
    if pred[1] >=0.7:
        predictions_xrf_cutoff.append(1)
    else:
        predictions_xrf_cutoff.append(0)

In [36]:
sub['category'] = predictions_xrf_cutoff

In [37]:
sub.head(10)

Unnamed: 0,id,category
0,0,0
1,1,1
2,2,1
3,3,1
4,4,0
5,5,0
6,6,0
7,7,1
8,8,0
9,9,1


In [None]:
#sub.to_csv('sub/sub_XRF.csv', index=False)

<a id="2e"></a>
## E - Extra Gradient Boosting

In [75]:
from sklearn.model_selection import StratifiedKFold
kfold = 3
skf = StratifiedKFold(n_splits=kfold, random_state=10)

def xgb_score(preds, dtrain):
    labels = dtrain.get_label()
    return 'log_loss', metrics.log_loss(labels, preds)

In [77]:
import xgboost as xgb

In [None]:
%%time
X= training_features
#X = X_train_normalized.values 
y= labels_array
pred =0

for i, (train_index, test_index) in enumerate(skf.split(X, y)):
    params = {
        'eta': 0.01, 
        'max_depth': 5,
        'min_child_weight':0,
        'objective': 'binary:logistic',
        'eval_metric': 'logloss',
        'gamma':0,
        'subsample':0.8,
        'colsample_bytree':0.8,
        'seed': i,
        'silent': True
    }
    
    print('[Fold %d/%d]' % (i + 1, kfold))
    X_train, X_valid = X[train_index], X[test_index]
    y_train, y_valid = y[train_index], y[test_index]
    # Convert our data into XGBoost format
    d_train = xgb.DMatrix(X_train, y_train)
    d_valid = xgb.DMatrix(X_valid, y_valid)
    d_test = xgb.DMatrix(testing_features)
    watchlist = [(d_train, 'train'), (d_valid, 'valid')]


    mdl = xgb.train(params, d_train, 5000, watchlist, early_stopping_rounds=50, feval=xgb_score, maximize=False, verbose_eval=50)

    print('[Fold %d/%d Prediciton:]' % (i + 1, kfold))
    # Predict on our test data
    pred += mdl.predict(d_test, ntree_limit=mdl.best_ntree_limit)

[Fold 1/3]


In [None]:
pred_xgb = pred/kfold
pred_xgb = [int(round(prediction)) for prediction in pred]
pred_xgb

In [None]:
sub['category'] = pred_xgb

In [None]:
#sub.to_csv('sub/sub_XGB.csv', index=False)

<a id="2f"></a>
## F - LightGBM

In [38]:
lgb_params = {
    'learning_rate': 0.07,
        'max_depth': 5,
        'task': 'train',
        'boosting_type': 'gbdt',
        'objective': 'binary',
        'feature_fraction': 0.8,
        'num_leaves':30,
        'min_data_in_leaf':600,
        'lambda_l1': 0.7,
        'lambda_l2':0.6}

In [39]:
import lightgbm as lgb
lgb_class = lgb.LGBMClassifier(n_estimators=2252)

In [40]:
from sklearn.model_selection import KFold
K = 5
cv = KFold(n_splits = K, shuffle = True, random_state=0)

X = training_features
X_test = testing_features
y = labels_array.values

In [43]:
%%time
from sklearn.metrics import f1_score
sumf1=0
pred_test=0
scores=[]
for i, (idx_train, idx_val) in enumerate(cv.split(X)):
    print("Fold ", i )
    X_train = X[idx_train]
    y_train = y[idx_train]
    X_valid = X[idx_val]
    y_valid = y[idx_val]
    lgb_class.fit(X_train, y_train, verbose=50, eval_set=(X_valid, y_valid))
    pred=lgb_class.predict_proba(X_valid)
    pred = np.argmax(pred, axis=1)
    pred_test_fold = lgb_class.predict_proba(X_test)
    pred_test+=pred_test_fold
    score=f1_score(pred,y_valid)
    scores.append(score)
    print(score)
    sumf1 +=score
sumf1 = sumf1/K
print("Total score ")
print(sumf1)
print(scores)

('Fold ', 0)
[50]	valid_0's binary_logloss: 0.0712975
[100]	valid_0's binary_logloss: 0.0661358
[150]	valid_0's binary_logloss: 0.0860903
[200]	valid_0's binary_logloss: 0.0941979
[250]	valid_0's binary_logloss: 0.095925
[300]	valid_0's binary_logloss: 0.0962043
[350]	valid_0's binary_logloss: 0.0976698
[400]	valid_0's binary_logloss: 0.0978682
[450]	valid_0's binary_logloss: 0.0977961
[500]	valid_0's binary_logloss: 0.0976426
[550]	valid_0's binary_logloss: 0.0976045
[600]	valid_0's binary_logloss: 0.0973372
[650]	valid_0's binary_logloss: 0.0973786
[700]	valid_0's binary_logloss: 0.0974113
[750]	valid_0's binary_logloss: 0.0974302
[800]	valid_0's binary_logloss: 0.097426
[850]	valid_0's binary_logloss: 0.0974098
[900]	valid_0's binary_logloss: 0.0974206
[950]	valid_0's binary_logloss: 0.0974725
[1000]	valid_0's binary_logloss: 0.0974947
[1050]	valid_0's binary_logloss: 0.0975055
[1100]	valid_0's binary_logloss: 0.0974636
[1150]	valid_0's binary_logloss: 0.0974773
[1200]	valid_0's bin

[700]	valid_0's binary_logloss: 0.104868
[750]	valid_0's binary_logloss: 0.104705
[800]	valid_0's binary_logloss: 0.104742
[850]	valid_0's binary_logloss: 0.104766
[900]	valid_0's binary_logloss: 0.104876
[950]	valid_0's binary_logloss: 0.104814
[1000]	valid_0's binary_logloss: 0.104845
[1050]	valid_0's binary_logloss: 0.104858
[1100]	valid_0's binary_logloss: 0.104864
[1150]	valid_0's binary_logloss: 0.104734
[1200]	valid_0's binary_logloss: 0.10476
[1250]	valid_0's binary_logloss: 0.104678
[1300]	valid_0's binary_logloss: 0.104733
[1350]	valid_0's binary_logloss: 0.104776
[1400]	valid_0's binary_logloss: 0.104788
[1450]	valid_0's binary_logloss: 0.104955
[1500]	valid_0's binary_logloss: 0.104975
[1550]	valid_0's binary_logloss: 0.104921
[1600]	valid_0's binary_logloss: 0.10494
[1650]	valid_0's binary_logloss: 0.10503
[1700]	valid_0's binary_logloss: 0.105079
[1750]	valid_0's binary_logloss: 0.105147
[1800]	valid_0's binary_logloss: 0.10518
[1850]	valid_0's binary_logloss: 0.105231
[1

In [44]:
pred_lgbm = pred_test/K
pred_lgbm = np.argmax(pred_lgbm, axis=1)

In [45]:
sub['category'] = pred_lgbm

In [46]:
sub.head(10)

Unnamed: 0,id,category
0,0,0
1,1,1
2,2,1
3,3,1
4,4,1
5,5,0
6,6,0
7,7,1
8,8,0
9,9,1


In [None]:
#sub.to_csv('sub/sub_LGBM.csv', index=False)

<a id="2g"></a>
## G - Neural network

In [47]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD, Adam, RMSprop
from keras.utils import np_utils

Using TensorFlow backend.


In [66]:
from sklearn import cross_validation
X_train, X_valid, y_train, y_valid = cross_validation.train_test_split(training_features,labels_array, 
                                                                     test_size=0.4, random_state=0)
Y_train = np.array([y_train,(1-y_train)]).T
Y_valid = np.array([y_valid,(1-y_valid)]).T

In [53]:
# neural network building...
model = Sequential()
model.add(Dense(16, input_shape=(21,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(2))
model.add(Activation('softmax'))

In [54]:
# choose a optimizer for our neural network
rms = RMSprop()
model.compile(loss='categorical_crossentropy', optimizer=rms)

In [55]:
from keras import callbacks
early_stopping = callbacks.EarlyStopping(monitor='val_loss',
                              patience=2,
                              verbose=0, mode='auto')

In [56]:
%%time
model.fit(X_train, Y_train,
          batch_size=128, epochs=70, verbose=2,
          validation_data=(X_valid, Y_valid), callbacks = [early_stopping])
score = model.evaluate(X_valid, Y_valid, verbose=0)
score

Train on 369307 samples, validate on 246205 samples
Epoch 1/70
 - 9s - loss: 0.2151 - val_loss: 0.1047
Epoch 2/70
 - 13s - loss: 0.1259 - val_loss: 0.1004
Epoch 3/70
 - 13s - loss: 0.1197 - val_loss: 0.0991
Epoch 4/70
 - 13s - loss: 0.1186 - val_loss: 0.1031
Epoch 5/70
 - 13s - loss: 0.1174 - val_loss: 0.0977
Epoch 6/70
 - 13s - loss: 0.1176 - val_loss: 0.0973
Epoch 7/70
 - 13s - loss: 0.1173 - val_loss: 0.1012
Epoch 8/70
 - 13s - loss: 0.1171 - val_loss: 0.1000
CPU times: user 2min 21s, sys: 45.7 s, total: 3min 7s
Wall time: 1min 50s


In [57]:
prediction_NN = model.predict_proba(testing_features, batch_size=128, verbose=1)
prediction_NN



array([[  7.17933178e-02,   9.28206623e-01],
       [  1.00000000e+00,   4.67915567e-11],
       [  1.00000000e+00,   8.42130448e-21],
       ..., 
       [  3.71415354e-02,   9.62858498e-01],
       [  1.69833448e-22,   1.00000000e+00],
       [  6.92459941e-01,   3.07540089e-01]], dtype=float32)

In [58]:
prediction_NN = 1 - prediction_NN
tmp = prediction_NN[:,1].round()
preds = tmp.astype(int)

In [59]:
sub['category'] = preds

In [60]:
sub.head(10)

Unnamed: 0,id,category
0,0,0
1,1,1
2,2,1
3,3,1
4,4,1
5,5,0
6,6,0
7,7,1
8,8,0
9,9,1


In [None]:
#sub.to_csv('sub/sub_NN.csv', index=False)

<a id="2h"></a>
## H - Convolutional neural network

In [67]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_valid = X_valid.reshape(X_valid.shape[0], X_valid.shape[1], 1)

In [68]:
from keras.layers import Convolution1D, GlobalMaxPooling1D, Flatten
model = Sequential()
model.add(Convolution1D(filters = 32,
              kernel_size = 3,
              input_shape = X_train.shape[1:3],
                       activation = 'relu'))
model.add(GlobalMaxPooling1D())
#model.add(Convolution1D(filters = 64,
#              kernel_size = 3,
#              activation = 'relu'))
#model.add(GlobalMaxPooling1D())
#model.add(Flatten())
model.add(Dropout(0.25))
model.add(Dense(1024, input_shape = (21,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(2))
model.add(Activation('softmax'))

In [69]:
rms = RMSprop()
model.compile(loss='categorical_crossentropy', optimizer=rms, metrics = ['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_2 (Conv1D)            (None, 19, 32)            128       
_________________________________________________________________
global_max_pooling1d_2 (Glob (None, 32)                0         
_________________________________________________________________
dropout_9 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_11 (Dense)             (None, 1024)              33792     
_________________________________________________________________
activation_11 (Activation)   (None, 1024)              0         
_________________________________________________________________
dropout_10 (Dropout)         (None, 1024)              0         
_________________________________________________________________
dense_12 (Dense)             (None, 2)                 2050      
__________

In [71]:
%%time
batch_size = 32
epochs = 20

# Run the train
history = model.fit(X_train, Y_train,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=1,
                        validation_data=(X_valid, Y_valid))
score = model.evaluate(X_valid, Y_valid, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 369307 samples, validate on 246205 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
('Test loss:', 0.33423814170807131)
('Test accuracy:', 0.87572145163897919)
CPU times: user 47min 4s, sys: 5min 52s, total: 52min 57s
Wall time: 25min 39s


In [72]:
X_test = testing_features.reshape(testing_features.shape[0], testing_features.shape[1], 1)
prediction_CNN = model.predict_classes(X_test, batch_size=32, verbose=1)



In [73]:
sub['category'] = 1 - prediction_CNN

In [74]:
sub.head(10)

Unnamed: 0,id,category
0,0,1
1,1,1
2,2,1
3,3,1
4,4,1
5,5,0
6,6,0
7,7,1
8,8,0
9,9,1


In [None]:
#sub.to_csv('sub/sub_CNN.csv', index=False)