# Support Vector Machine using TensorFlow

Let's load the data to get started

In [1]:
import numpy as np
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score

In [2]:
X = np.genfromtxt('data/X_train.txt', delimiter=None)
Y = np.genfromtxt('data/Y_train.txt', delimiter=None)

# Shuffle rows of X and Y in the same way
s = np.arange(X.shape[0])
np.random.seed(0)
np.random.shuffle(s)

X = X[s]
Y = Y[s]

Let's analyze the data

In [3]:
print(X.shape)

(200000, 14)


In [4]:
ratio = 0.8

train_indices = np.random.choice(len(X),
                                 int(round(len(X)*ratio)),
                                 replace=False)
test_indices = np.array(list(set(range(len(X))) - set(train_indices)))

X_train = X[train_indices]
Y_train = Y[train_indices]

X_test = X[test_indices]
Y_test = Y[test_indices]

# Scale the data, fitting only on training data
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

print(X_train.shape)

(160000, 14)


In [5]:
classifier = svm.SVC(probability=True, verbose=True, cache_size=3000)
classifier.fit(X_train, Y_train)

[LibSVM]

SVC(C=1.0, cache_size=3000, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=True)

In [6]:
predictions = classifier.predict_proba(X_test)
score = roc_auc_score(Y_test, predictions[:, 1])
print(score)

0.683700689813552


# Scores

```
```

In [7]:
X_submit = np.genfromtxt('data/X_test.txt', delimiter=None)
Y_submit = np.vstack((np.arange(X_submit.shape[0]), classifier.predict_proba(X_submit)[:,1])).T
np.savetxt('Y_submit.txt', Y_submit, '%d, %.2f', header='ID,Prob1',comments='',delimiter=',')