In [1]:
import pandas as pd
import numpy as np
from numba import njit

import matplotlib.pyplot as plt
from matplotlib import cm
from numpy import linalg as LA
from tqdm import tqdm
from numba import njit

from sklearn import svm

from kernel_functions import gram_phi, count_kuplet_k, count_kuplet_3
from preprocessing import preprocessing

In [2]:
# load all data as the numpy array type
#X = pd.read_csv('data/Xtr1_mat50.csv', sep=' ', header=None).values
X_raw0 = pd.read_csv('data/Xtr0.csv', sep= ' ', header = None).values.reshape((-1))
X_raw1 = pd.read_csv('data/Xtr1.csv', sep=' ', header=None).values.reshape((-1))
X_raw2 = pd.read_csv('data/Xtr2.csv', sep=' ', header=None).values.reshape((-1))

# transform to an array of string
X_valid0 = pd.read_csv('data/Xte0.csv', sep=' ', header=None).values.reshape((-1))
X_valid1 = pd.read_csv('data/Xte1.csv', sep=' ', header=None).values.reshape((-1))
X_valid2 = pd.read_csv('data/Xte1.csv', sep=' ', header=None).values.reshape((-1))


Y0 = pd.read_csv('data/Ytr0.csv', sep=',', header=0)['Bound'].values
Y1 = pd.read_csv('data/Ytr1.csv', sep=',', header=0)['Bound'].values
Y2 = pd.read_csv('data/Ytr2.csv', sep=',', header=0)['Bound'].values

#print('numerical features shape', X.shape)
#print('numerical features first row', X[0])
print('sequences shape: ', X_raw0.shape)
print('sequence first row: ', X_raw0[0])
print('labels shape', Y0.shape)

sequences shape:  (2000,)
sequence first row:  TCCTCAACTTTTATTGGGCCGCTGTGGCACCAGAATCTACGAATGGCGCCCTCTAGAGTTGTGTAAAGAAGTGGCGTCACCTCATTATAAATAAAAGGTTG
labels shape (2000,)


In [3]:
from kernel import *

def solve_svm_kernel(X_train, X_test, Y_train, Y_test, kernel='k_gram_gaussian', k=3, lamb=0.1, gamma=0.1, kktreg=1e-9):
    """
    kernel in ['k_gram', 'k_gram_gaussian', 'k_substring']
    """
    assert kernel in ['k_gram', 'k_gram_gaussian', 'k_substring']
    
    if kernel in ['k_gram', 'k_gram_gaussian']:
        if k == 3:
            X_train_process = np.array([count_kuplet_3(x) for x in X_train])
            X_test_process = np.array([count_kuplet_3(x) for x in X_test])
        else:
            X_train_process = np.array([count_kuplet_k(x,k=k) for x in X_train])
            X_test_process = np.array([count_kuplet_k(x,k=k) for x in X_test])
        
        # Adding 1 for the sake of the bias
        X_train_process = np.concatenate((X_train_process, np.ones((X_train_process.shape[0], 1))), axis=1)
        X_test_process = np.concatenate((X_test_process, np.ones((X_test_process.shape[0], 1))), axis=1)
        
    if kernel=="k_gram":
        # Computing the Gram-Matrix
        K = X_train_process.dot(X_train_process.T)
        w = solve_svm(K, Y_train, lamb=lamb, kktreg=kktreg)
        K_test = np.dot(X_test_process, np.transpose(X_train_process))
        print(K_test.shape)
    
    if kernel=="k_gram_gaussian":
        # Computing the Gram-Matrix
        K = np.array([LA.norm(X_train_process - y, axis=1) for y in X_train_process])
        K = np.exp(-K/gamma)
        # K = X_train_process.dot(X_train_process.T)
        w = solve_svm(K, Y_train, lamb=lamb)
        K_test = np.array([LA.norm(X_train_process - y, axis=1) for y in X_test_process])
        print(K_test.shape)
        K_test = np.exp(-K_test/gamma)
    
    if kernel=='k_substring':
        # Computing the Gram-Matrix
        # X_train_process = X_train
        # X_test_process = X_test
        N_train = len(X_train)
        N_test = len(X_test)
        K = np.zeros((N_train, N_train))
        for i in range(len(X_train)):
            for j in tqdm(range(i, len(X_train)), desc="Computing Traning Kernel 2"):
                K[i][j] = K[j][i] = kernel_string(X_train[i], X_train[j], k)
        w = solve_svm(K, Y_train, lamb=lamb, kktreg=kktreg)
        K_test = np.zeros((N_test, N_train))
        for i in tqdm(range(len(X_test)), desc="Compution testing Kernel"):
            for j in range(len(X_train)):
                K_test[i][j] = K_test[j][i] = kernel_string(X_test[i], X_train[j], k)
                
    # if kernel in ['k_gram', 'k_gram_gaussian']:
    n = K.shape[0]
    Y_predicted = np.dot(K_test, w[:n]) > 0.
    result = ((Y_test+1.)/ 2. == np.transpose(Y_predicted))
    Y_predicted_train = np.dot(K, w[:n]) > 0.
    result_train = ((Y_train+1)/ 2 == np.transpose(Y_predicted_train))
    if np.alltrue(Y_predicted):
        print("Toute les valeurs sont TRUE")
    if np.alltrue(Y_predicted==False):
        print("Toute les valeurs sont FALSE")
    return np.mean(result), np.mean(result_train)
             


In [4]:
X_train, Y_train, X_test, Y_test = preprocessing(X_raw1, Y1)
print('train shape', X_train.shape)
print('test shape', X_test.shape)

train shape (1600,)
test shape (400,)


In [5]:
X_train[:][0]

'AATGGGAGGCCCTGGAAGGCCGGATGGTCATGAAGGGCCACAGCTCCGCCCAGATAAAGCCGTCCGGGGGTGCGGTCGGCGAGCAGCACCCCCGGGAGGTG'

In [6]:
X1 = X_train[:][0]
X2 = X_train[:][1]
lamb = 0.5
k=3
# X1 = 'CGCTA'
# X2 = 'CTAG'
k = min(len(X1), len(X2))

B = np.zeros((k, len(X1), len(X2)))

base_azote = ['A', 'C', 'T', 'G']
d = {'A':[0.], 'C':[0.], 'T':[0.], 'G':[0.]}

k_grams_count1 = pd.DataFrame(data=d)
k_grams_count2 = pd.DataFrame(data=d)

# for i in range(len(X1)):
#     k_grams_count1[X1[i]] += 1.
#     k_grams_count2 = pd.DataFrame(data=d)
#     for j in range(len(X2)):
#         k_grams_count2[X2[j]] += lamb**(len(X2)-j)
#         B[1, i, j] = np.dot(k_grams_count1, np.transpose(k_grams_count2))[0][0]

def compute_B(X1, X2, k):
    
    B = np.zeros((k+1, len(X1), len(X2)))
    B[0, :, :]=1.
    if(X1[0] == X2[0]):
        B[1, 0, 0] = lamb**2
    # while(m<k):
    for j in range(1, len(X2)):
        B[1, 0, j] += lamb*B[1, 0, j-1]
        if X1[0] == X2[j]:
            # if(m==1):
            B[1, 0, j] += lamb**2 * 1.
    for i in range(1, len(X1)):
        B[1, i, 0] += lamb*B[1, i-1, 0]
        if X1[i] == X2[0]:
            # if(m==1):
            B[1, 0, j] += lamb**2 * 1. 
        
    for m in range(1, k+1):
        # while(j<len(X2)):
        for j in range(0, len(X2)-1):
            for i in range(0, len(X1)-1):
                # if(m <= i+2 and m <= j+2):
                test = (X1[i+1] == X2[j+1])
                B[m, i+1, j+1] = lamb * (B[m, i, j+1] + B[m, i+1, j]) - \
                    lamb**2 * B[m, i+1, j] + test * lamb**2 * B[m-1, i, j]
                
                # tmp = [X1[i+1]==x for x in X2[1:(j+1)]]
                # lamb_cumul = lamb**(np.arange(j+1, 1, -1))
                # B[m, i+1, j] += 
                
                # for s in range(1, j+1):
                #    if X2[s]==X1[i+1]:
                #        B[m, i+1, j] += B[m-1, i, s-1] * lamb**(j - s + 2)
                # i += 1
            # j+=1
        # m+=1
    
    return B

# B = compute_B(X1, X2, k)

def compute_K(X1, X2, k):
    
    B = compute_B(X1, X2, k-1)

    # m = 1
    K = np.zeros((k+1, len(X1), len(X2)))
    K[0, :, :] = 1.


    if(X1[0] == X2[0]):
        K[1, :, 0] = K[1, 0, :] =lamb**2
        
    C = np.zeros((k, len(X1), len(X2)))
    for m in range(k):
        for i in range(len(X1)-1):
            C[m, i, 0] = 0.
            for j in range(len(X2)-1):   
                C[m, i, j+1] = C[m, i, j] + B[m, i, j]*(X1[i+1]==X2[j+1])
        
    # while(m<=k):
    for m in range(1, k+1):
        # j = 0
        # while(j<len(X2)):
        for j in range(m-1, len(X2)):
            # i=0
            # if(j>0 and m==1): # else for m>1 K[m, 0, j] = 0.
            #     K[m, 0, j] = K[m, 0, j-1]
            for i in range(max(m-2, 0), len(X1)-1):
            # while(i<len(X1)-1):
                K[m, i+1, j] = K[m, i, j]
                # s = np.arange(1, 10)
                # tmp = [X1[i+1]==x for x in X2[1:(j+1)]]
                # su = np.dot(tmp, B[m-1, i, :j])
                # K[m, i+1, j] += lamb**2 * np.dot(tmp, B[m-1, i, :j])
                K[m, i+1, j] += lamb**2 * C[m-1, i, j]
                # for s in range(1, len(X2[:(j+1)])):
                #     if X2[s] == X1[i+1]:
                #         K[m, i+1, j] += lamb**2 * B[m-1, i, s-1]
                # i+=1
            # j+=1
        # m+=1
    return K

def kernel_string(X1, X2, k=4):
    K = compute_K(X1, X2, k)
    return K[k, len(X1)-1, len(X2)-1]

# print(kernel_string(X1, X2))

# K = compute_K(X1, X2, k)



In [37]:
X1[:2]

'GT'

In [7]:
X1 = 'CCGAG'
X2 = 'CTAG'
k=4
X1_t = X1# [:100]
X2_t = X2# [:100]
# B = compute_B(X1_t, X2_t, k)
K, C = compute_K(X1_t, X2_t, k)
print(K)
# print(B)

ValueError: too many values to unpack (expected 2)

In [39]:
X1 = 'CTAG'
X2 = 'CTAG'

C = np.zeros((k, len(X1), len(X2)))

for m in range(1, k):
    for i in range(len(X1)-1):
        C[m, i, 0] = 0.
        for j in range(1, len(X2)-1):   
            C[m, i, j+1] = C[m, i, j] + B[m-1, i, j]*(X1[i+1]==X2[j])

print(C)
        

[[[ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]]

 [[ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]]

 [[ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]]

 [[ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]]]


0.00390625

In [40]:
print(compute_K(X1_t, X2_t, k))

[[[ 1.          1.          1.          1.        ]
  [ 1.          1.          1.          1.        ]
  [ 1.          1.          1.          1.        ]
  [ 1.          1.          1.          1.        ]
  [ 1.          1.          1.          1.        ]]

 [[ 0.25        0.25        0.25        0.25      ]
  [ 0.25        0.25        0.25        0.25      ]
  [ 0.25        0.25        0.25        0.5       ]
  [ 0.25        0.25        0.5         0.75      ]
  [ 0.25        0.25        0.5         1.        ]]

 [[ 0.          0.          0.          0.        ]
  [ 0.          0.          0.          0.        ]
  [ 0.          0.          0.          0.01367188]
  [ 0.          0.          0.015625    0.02929688]
  [ 0.          0.          0.015625    0.09960938]]

 [[ 0.          0.          0.          0.        ]
  [ 0.          0.          0.          0.        ]
  [ 0.          0.          0.          0.        ]
  [ 0.          0.          0.          0.        ]
  [ 0.

In [8]:
from itertools import product
from string import ascii_lowercase

dic_k = [''.join(i) for i in product(base_azote, repeat = 1)]
nb_feat = len(dic_k)


In [9]:
k_grams_count1 = np.zeros(nb_feat)
k_grams_count2 = np.zeros(nb_feat)

for i,e in enumerate(dic_k):
    k_grams_count1[i] = X1.count(e)
    k_grams_count2[i] = X2.count(e)


In [10]:
# With Cross validation

X_train0, Y_train0, X_test0, Y_test0 = preprocessing(X_raw0, Y0)
X_train1, Y_train1, X_test1, Y_test1 = preprocessing(X_raw1, Y1)
X_train2, Y_train2, X_test2, Y_test2 = preprocessing(X_raw2, Y2)
print('train shape', X_train0.shape)
print('test shape', X_test0.shape)

print('train shape', X_train1.shape)
print('test shape', X_test1.shape)

print('train shape', X_train2.shape)
print('test shape', X_test2.shape)


train shape (1600,)
test shape (400,)
train shape (1600,)
test shape (400,)
train shape (1600,)
test shape (400,)


In [11]:
X_train0[:][0]

'TCTAAGCTGCTTGGACACACATTTTATATGAAAATGATGAATCACATCAAATAATAGTACTGGAGGATTCTATATTGTTCAGTATGTGGATGTTATGCTGT'

In [36]:
acc_test0, acc_train0 = solve_svm_kernel(X_train0, 
                                       X_test0, 
                                       Y_train0, 
                                       Y_test0, 
                                       kernel='k_gram', 
                                       k=6, lamb=0.1, gamma=0.05)

print('accuracy for train : {}'.format(acc_train0))
print('accuracy for test : {}'.format(acc_test0))

     pcost       dcost       gap    pres   dres
 0:  2.7119e+00  3.8727e+00  3e+03  1e+00  1e+04
 1:  3.4647e+00 -9.3057e+01  1e+02  4e-02  4e+02
 2:  1.9899e+00 -1.4551e+01  2e+01  7e-03  6e+01
 3:  1.5511e+00 -4.8440e-01  2e+00  5e-04  4e+00
 4:  9.0268e-01  6.3988e-01  3e-01  1e-09  2e-08
 5:  7.2478e-01  6.8182e-01  4e-02  8e-10  1e-08
 6:  6.9991e-01  6.9463e-01  5e-03  6e-10  3e-09
 7:  6.9696e-01  6.9660e-01  4e-04  6e-10  5e-10
 8:  6.9676e-01  6.9675e-01  1e-05  6e-10  1e-10
 9:  6.9676e-01  6.9676e-01  3e-07  6e-10  2e-11
Optimal solution found.
(400, 1600)
accuracy for train : 0.916875
accuracy for test : 0.7325


In [16]:
acc_test1, acc_train1 = solve_svm_kernel(X_train1, 
                                       X_test1, 
                                       Y_train1, 
                                       Y_test1, 
                                       kernel='k_gram', 
                                       k=6, lamb=0.01, gamma=1.)

print('accuracy for train : {}'.format(acc_train1))
print('accuracy for test : {}'.format(acc_test1))

     pcost       dcost       gap    pres   dres
 0:  1.7457e-01  1.1772e+00  3e+03  1e+00  5e+04
 1:  1.1633e+00 -3.6739e+01  4e+01  2e-02  5e+02
 2:  1.0722e+00 -3.6673e+00  5e+00  2e-03  6e+01
 3:  9.1122e-01 -9.5270e-01  2e+00  5e-04  2e+01
 4:  5.1043e-01 -7.6092e-02  6e-01  1e-04  4e+00
 5:  1.5341e-01  9.6628e-02  6e-02  5e-06  2e-01
 6:  1.1857e-01  1.0687e-01  1e-02  9e-07  3e-02
 7:  1.1170e-01  1.0929e-01  2e-03  1e-07  5e-03
 8:  1.1014e-01  1.0986e-01  3e-04  1e-08  4e-04
 9:  1.0995e-01  1.0994e-01  1e-05  5e-10  1e-05
10:  1.0995e-01  1.0995e-01  3e-07  5e-10  2e-07
11:  1.0995e-01  1.0995e-01  1e-08  5e-10  3e-09
Optimal solution found.
(400, 1600)
accuracy for train : 0.99875
accuracy for test : 0.8575


In [29]:
acc_test2, acc_train2 = solve_svm_kernel(X_train2, 
                                       X_test2, 
                                       Y_train2, 
                                       Y_test2, 
                                       kernel='k_gram', 
                                       k=6, lamb=0.1, gamma=1.)

print('accuracy for train : {}'.format(acc_train2))
print('accuracy for test : {}'.format(acc_test2))

     pcost       dcost       gap    pres   dres
 0:  3.4525e+00  4.6267e+00  3e+03  1e+00  9e+03
 1:  4.1767e+00 -9.5953e+01  1e+02  4e-02  3e+02
 2:  2.2176e+00 -1.5638e+01  2e+01  7e-03  5e+01
 3:  1.6365e+00 -6.1372e-01  2e+00  6e-04  4e+00
 4:  1.0258e+00  7.2441e-01  3e-01  1e-09  2e-08
 5:  8.1832e-01  7.7156e-01  5e-02  9e-10  1e-08
 6:  7.9040e-01  7.8477e-01  6e-03  7e-10  3e-09
 7:  7.8713e-01  7.8684e-01  3e-04  7e-10  5e-10
 8:  7.8697e-01  7.8696e-01  9e-06  7e-10  9e-11
 9:  7.8696e-01  7.8696e-01  2e-07  7e-10  2e-11
Optimal solution found.
(400, 1600)
accuracy for train : 0.871875
accuracy for test : 0.6875


## Best parameters found for k_gram kernel
#### k = 3
* Set0: lambda=1e-3, gamma = 5 (gaussian), score = 0.68
* Set1: lambda=0.01, score = 
* Set2: lambda= 1e-5, score = 

#### k = 6

* Set0: lamda=0.1 , score = 0.73 (on test)
* Set1: lambda=0.01, score= 0.85 (on test)
* Set2: lambda= 1e-1, score = 0.6875 (on test)


In [25]:
def solve_svm_test(X_train, X_test, Y_train, kernel='k_gram_gaussian', k=3, lamb=0.1, gamma=0.1, kktreg=1e-9):
    """
    kernel in [k_gram, k_gram_gaussian]
    """
    Y_train_process = (Y_train-0.5) * 2
    if kernel in ['k_gram', 'k_gram_gaussian']:
        if k == 3:
            X_train_process = np.array([count_kuplet_3(x) for x in X_train])
            X_test_process = np.array([count_kuplet_3(x) for x in X_test])
        else:
            X_train_process = np.array([count_kuplet_k(x,k=k) for x in X_train])
            X_test_process = np.array([count_kuplet_k(x,k=k) for x in X_test])
        
        # Adding 1 for the sake of the bias
        X_train_process = np.concatenate((X_train_process, np.ones((X_train_process.shape[0], 1))), axis=1)
        X_test_process = np.concatenate((X_test_process, np.ones((X_test_process.shape[0], 1))), axis=1)
        
        
    if kernel=="k_gram":
        # Computing the Gram-Matrix
        K = X_train_process.dot(X_train_process.T)
        w = solve_svm(K, Y_train_process, lamb=lamb, kktreg=kktreg)
        K_test = np.dot(X_test_process, np.transpose(X_train_process))
    
    if kernel=="k_gram_gaussian":
        # Computing the Gram-Matrix
        K = np.array([LA.norm(X_train_process - y, axis=1) for y in X_train_process])
        K = np.exp(-K/gamma)
        # K = X_train_process.dot(X_train_process.T)
        w = solve_svm(K, Y_train_process, lamb=lamb)
        K_test = np.array([LA.norm(X_train_process - y, axis=1) for y in X_test_process])
        K_test = np.exp(-K_test/gamma)
    
    n = K.shape[0]
    Y_predicted = np.dot(K_test, w[:n]) > 0.
    Y_predicted = (Y_predicted + 0.)
    Y_predicted_train = np.dot(K, w[:n]) > 0.
    result_train = (Y_train == np.transpose(Y_predicted_train))
    print('accuracy on train : {}'.format(np.mean(result_train)))
    # result = ((Y_test+1)/ 2 == np.transpose(Y_predicted))
    
    return np.transpose(Y_predicted) # , np.transpose(Y_predicted_train)


In [37]:
print('train shape : {}'.format(X_raw0.shape))
print('train shape 2 : {}'.format(Y0.shape))
print('test shape : {}'.format(X_valid0.shape))

Y_predict0 = solve_svm_test(X_raw0, 
                           X_valid0, 
                           Y0, 
                           kernel='k_gram', 
                           k=6, lamb=.1, gamma=5.)

train shape : (2000,)
train shape 2 : (2000,)
test shape : (1000,)
     pcost       dcost       gap    pres   dres
 0:  4.7028e+00  6.2464e+00  4e+03  1e+00  2e+04
 1:  4.9179e+00 -1.5739e+02  2e+02  6e-02  7e+02
 2:  2.2650e+00 -2.4945e+01  3e+01  9e-03  1e+02
 3:  1.6865e+00 -1.8054e+00  3e+00  8e-04  9e+00
 4:  1.1372e+00  6.4318e-01  5e-01  1e-09  2e-08
 5:  7.7090e-01  6.9706e-01  7e-02  9e-10  2e-08
 6:  7.2972e-01  7.1811e-01  1e-02  6e-10  4e-09
 7:  7.2332e-01  7.2245e-01  9e-04  6e-10  9e-10
 8:  7.2285e-01  7.2282e-01  3e-05  6e-10  2e-10
 9:  7.2284e-01  7.2284e-01  8e-07  6e-10  3e-11
10:  7.2284e-01  7.2284e-01  3e-08  6e-10  7e-12
Optimal solution found.
accuracy on train : 0.892


In [26]:
print('train shape : {}'.format(X_raw1.shape))
print('train shape 2 : {}'.format(Y1.shape))
print('test shape : {}'.format(X_valid1.shape))

Y_predict1 = solve_svm_test(X_raw1, 
                           X_valid1, 
                           Y1, 
                           kernel='k_gram', 
                           k=6, lamb=0.01, gamma=3.)

train shape : (2000,)
train shape 2 : (2000,)
test shape : (1000,)
     pcost       dcost       gap    pres   dres
 0:  2.8983e-01  1.2948e+00  4e+03  1e+00  6e+04
 1:  1.2756e+00 -4.9209e+01  5e+01  2e-02  8e+02
 2:  1.1341e+00 -4.9220e+00  6e+00  2e-03  8e+01
 3:  9.7335e-01 -1.1897e+00  2e+00  5e-04  2e+01
 4:  6.1181e-01 -1.4378e-01  8e-01  1e-04  6e+00
 5:  2.0782e-01  1.1487e-01  9e-02  8e-06  3e-01
 6:  1.5178e-01  1.3215e-01  2e-02  1e-06  6e-02
 7:  1.4022e-01  1.3654e-01  4e-03  2e-07  8e-03
 8:  1.3784e-01  1.3747e-01  4e-04  1e-08  5e-04
 9:  1.3760e-01  1.3758e-01  2e-05  6e-10  2e-05
10:  1.3758e-01  1.3758e-01  7e-07  6e-10  4e-07
11:  1.3758e-01  1.3758e-01  2e-08  6e-10  6e-09
Optimal solution found.
accuracy on train : 0.997


In [38]:
print('train shape : {}'.format(X_raw2.shape))
print('train shape 2 : {}'.format(Y2.shape))
print('test shape : {}'.format(X_valid2.shape))

Y_predict2 = solve_svm_test(X_raw2, 
                           X_valid2, 
                           Y2, 
                           kernel='k_gram', 
                           k=6, lamb=1e-1, gamma=3.)

train shape : (2000,)
train shape 2 : (2000,)
test shape : (1000,)
     pcost       dcost       gap    pres   dres
 0:  6.1279e+00  7.7606e+00  4e+03  1e+00  1e+04
 1:  6.1775e+00 -1.6590e+02  2e+02  6e-02  5e+02
 2:  2.5770e+00 -3.1004e+01  3e+01  1e-02  9e+01
 3:  1.7643e+00 -2.5145e+00  4e+00  1e-03  9e+00
 4:  1.2983e+00  7.2710e-01  6e-01  1e-09  1e-08
 5:  8.6537e-01  7.8043e-01  8e-02  1e-09  2e-08
 6:  8.1592e-01  8.0095e-01  1e-02  8e-10  6e-09
 7:  8.0703e-01  8.0601e-01  1e-03  7e-10  1e-09
 8:  8.0645e-01  8.0642e-01  3e-05  7e-10  2e-10
 9:  8.0644e-01  8.0644e-01  7e-07  7e-10  3e-11
Optimal solution found.
accuracy on train : 0.8425


In [42]:
test0 = Y_predict0[:][0]
test1 = Y_predict1[:][0]
test2 = Y_predict2[:][0]

bound = np.concatenate((test0,test1,test2), axis=0).reshape((-1)).astype(int)
final = pd.DataFrame(np.arange(3000), columns=['Id'])
final['Bound'] = bound
final.to_csv('resultk_6.csv', index= None)

In [43]:
# Check before submit if the final has good shape
final

Unnamed: 0,Id,Bound
0,0,0
1,1,1
2,2,0
3,3,0
4,4,1
5,5,1
6,6,1
7,7,0
8,8,1
9,9,0
