In [1]:
import pandas as pd
import numpy as np

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, Imputer
from sklearn.svm import LinearSVC

from matplotlib import pyplot as plt
%matplotlib inline

import pickle
import csv

# Validate

## Load the model

In [2]:
with open('svm_clf_one', 'rb') as f:
    s = f.read()

In [3]:
model = pickle.loads(s)

In [4]:
type(model)

sklearn.pipeline.Pipeline

## Load the test data

In [5]:
df = pd.read_csv('data/test.csv')

In [6]:
df.head(1)

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q


## Transform the test data

In [7]:
from transformers import Transformer

In [8]:
transformer = Transformer()
X, y = transformer.transform(df)

In [9]:
X[:5]

array([[ 0.75592895, -0.47091535,  0.87348191, -0.49741333],
       [-1.32287566,  0.70076689,  0.87348191, -0.51227801],
       [ 0.75592895, -0.47091535, -0.31581919, -0.46410047],
       [ 0.75592895,  0.70076689,  0.87348191, -0.48247516],
       [-1.32287566,  0.70076689,  0.87348191, -0.4174915 ]])

In [10]:
print(y)

None


## And the prediction function

In [11]:
cols = transformer.attribute_names
cols

['Sex', 'Embarked', 'Pclass', 'Fare']

In [12]:
def predictions(x, model_func):
    """Use axis=1"""
    p = model_func.predict([x])
    return p[0]

In [13]:
dfp = pd.DataFrame(X, columns=cols)

In [14]:
dfp.head(1)

Unnamed: 0,Sex,Embarked,Pclass,Fare
0,0.755929,-0.470915,0.873482,-0.497413


In [15]:
predictions = dfp.apply(predictions, model_func=model, axis=1)

## Create the predictions

In [16]:
df['Survived'] = predictions

In [17]:
df.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Survived
0,892,3,"Kelly, Mr. James",1,34.5,0,0,152,7.8292,-1,1,0
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",0,47.0,1,0,221,7.0,-1,2,1
2,894,2,"Myles, Mr. Thomas Francis",1,62.0,0,0,73,9.6875,-1,1,0
3,895,3,"Wirz, Mr. Albert",1,27.0,0,0,147,8.6625,-1,2,0
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",0,22.0,1,1,138,12.2875,-1,2,1


In [18]:
submission = df[['PassengerId', 'Survived']]
submission.head(1)

Unnamed: 0,PassengerId,Survived
0,892,0


In [19]:
submission.to_csv('submission.csv', index=False, quoting=csv.QUOTE_ALL)