In [1]:
import numpy as np
import pandas as pd
from time import time
from sklearn.preprocessing import StandardScaler

np.random.seed(1337)

df = pd.read_csv('titanic.csv')

In [2]:
df_train = df.iloc[:712, :]

df_train = df_train.drop(['Name', 'Ticket', 'Cabin', 'Embarked'], axis=1)

age_mean = df_train['Age'].mean()
df_train['Age'] = df_train['Age'].fillna(age_mean)
df_train['Sex'] = df_train['Sex'].map({'female': 0, 'male': 1}).astype(int)

scaler = StandardScaler()

X_train = scaler.fit_transform(df_train.iloc[:, 2:].values)
y_train = df_train['Survived'].values

In [3]:
df_test = df.iloc[712:, :]

df_test = df_test.drop(['Name', 'Ticket', 'Cabin', 'Embarked'], axis=1)

df_test['Age'] = df_test['Age'].fillna(age_mean)
df_test['Sex'] = df_test['Sex'].map({'female': 0, 'male': 1}).astype(int)

X_test = scaler.transform(df_test.iloc[:, 2:].values)
y_test = df_test['Survived'].values

In [4]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(random_state=0)
model = model.fit(X_train, y_train)

y_prediction = model.predict(X_test)
print "accuracy", np.sum(y_prediction == y_test) / float(len(y_test))

accuracy 0.832402234637


In [5]:
def softmax(x):
    return np.exp(x) / np.exp(x).sum()

In [6]:
max_accuracy = 0
best_weights = ()

start = time()

for i in xrange(1000):
    W = np.random.rand(2, 6)
    b = np.random.rand(2,)
    y_prediction = []

    for j in xrange(X_train.shape[0]):
        result = np.dot(W, X_train[j]) + b
        result = softmax(result)
        result = np.argmax(result)
        y_prediction.append(result)

    y_prediction = np.array(y_prediction)
    accuracy = np.sum(y_prediction == y_train) / float(len(y_train))
    
    if accuracy > max_accuracy:
        print 'accuracy %s, loop %s' % (round(accuracy, 3), i)
        max_accuracy = accuracy
        best_weights = (W, b)

print '\ntime taken %s seconds' % str(time() - start)

accuracy 0.292, loop 0
accuracy 0.64, loop 1
accuracy 0.642, loop 2
accuracy 0.695, loop 6
accuracy 0.712, loop 9
accuracy 0.737, loop 45
accuracy 0.739, loop 48
accuracy 0.754, loop 56
accuracy 0.794, loop 88

time taken 8.15364694595 seconds


In [7]:
W, b = best_weights
y_prediction = []

for j in xrange(X_test.shape[0]):
    result = np.dot(W, X_test[j]) + b
    result = softmax(result)
    result = np.argmax(result)
    y_prediction.append(result)

y_prediction = np.array(y_prediction)

print np.sum(y_prediction == y_test) / float(len(y_test))

0.815642458101


In [8]:
max_accuracy = 0
best_weights = ()

start = time()

for i in xrange(1000):
    W_1 = np.random.rand(100, 6)
    b_1 = np.random.rand(100,)

    W_2 = np.random.rand(2, 100)
    b_2 = np.random.rand(2,)   
    y_prediction = []

    for j in xrange(X_train.shape[0]):
        result = np.dot(W_1, X_train[j]) + b_1
        result = np.dot(W_2, result) + b_2
        result = softmax(result)
        result = np.argmax(result)
        y_prediction.append(result)

    y_prediction = np.array(y_prediction)
    accuracy = np.sum(y_prediction == y_train) / float(len(y_train))
    
    if accuracy > max_accuracy:
        print 'accuracy %s, loop %s' % (round(accuracy, 3), i)
        max_accuracy = accuracy
        best_weights = (W_1, b_1, W_2, b_2)

print '\ntime taken %s seconds' % str(time() - start)

accuracy 0.566, loop 0
accuracy 0.604, loop 1
accuracy 0.712, loop 2
accuracy 0.739, loop 6
accuracy 0.75, loop 30
accuracy 0.756, loop 61
accuracy 0.785, loop 91
accuracy 0.789, loop 185
accuracy 0.796, loop 709

time taken 8.62996792793 seconds


In [9]:
W_1, b_1, W_2, b_2 = best_weights
y_prediction = []

for j in xrange(X_test.shape[0]):
    result = np.dot(W_1, X_test[j]) + b_1
    result = np.dot(W_2, result) + b_2
    result = softmax(result)
    result = np.argmax(result)
    y_prediction.append(result)

y_prediction = np.array(y_prediction)

print np.sum(y_prediction == y_test) / float(len(y_test))

0.804469273743


In [10]:
max_accuracy = 0
best_weights = ()

start = time()

for i in xrange(1000):
    W_1 = np.random.rand(100, 6)
    b_1 = np.random.rand(100,)

    W_2 = np.random.rand(100, 100)
    b_2 = np.random.rand(100,)
    
    W_3 = np.random.rand(2, 100)
    b_3 = np.random.rand(2,)
    y_prediction = []

    for j in xrange(X_train.shape[0]):
        result = np.dot(W_1, X_train[j]) + b_1
        result = np.dot(W_2, result) + b_2
        result = np.dot(W_3, result) + b_3
        result = softmax(result)
        result = np.argmax(result)
        y_prediction.append(result)

    y_prediction = np.array(y_prediction)
    accuracy = np.sum(y_prediction == y_train) / float(len(y_train))
    
    if accuracy > max_accuracy:
        print 'accuracy %s, loop %s' % (round(accuracy, 3), i)
        max_accuracy = accuracy
        best_weights = (W_1, b_1, W_2, b_2, W_3, b_3)

print '\ntime taken %s seconds' % str(time() - start)

accuracy 0.579, loop 0
accuracy 0.608, loop 1
accuracy 0.617, loop 2
accuracy 0.625, loop 17
accuracy 0.635, loop 18
accuracy 0.636, loop 73
accuracy 0.643, loop 79
accuracy 0.656, loop 419

time taken 12.0794928074 seconds


In [11]:
W_1, b_1, W_2, b_2, W_3, b_3 = best_weights
y_prediction = []

for j in xrange(X_test.shape[0]):
    result = np.dot(W_1, X_test[j]) + b_1
    result = np.dot(W_2, result) + b_2
    result = np.dot(W_3, result) + b_3    
    result = softmax(result)
    result = np.argmax(result)
    y_prediction.append(result)

y_prediction = np.array(y_prediction)

print np.sum(y_prediction == y_test) / float(len(y_test))

0.631284916201
