In [1]:
import numpy as np
import tensorflow as tf

In [2]:
npz = np.load('Titanic_data_train.npz')

train_inputs = npz['inputs'].astype(np.float) #all inputs have to be float -> learn correctly
train_targets = npz['targets'].astype(np.int)

npz = np.load('Titanic_data_validation.npz')
validation_inputs, validation_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)

npz = np.load('Titanic_data_test.npz')
test_inputs = npz['inputs'].astype(np.float)

## Model

In [3]:
input_size = train_inputs.shape[1]
output_size = 2
hidden_layer_size = 100

model = tf.keras.Sequential([
    tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'), #1st hidden layer dot product of input and weight + bias
    tf.keras.layers.Dense(hidden_layer_size, activation = 'sigmoid'),
    tf.keras.layers.Dense(hidden_layer_size, activation = 'sigmoid'),
    tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
    tf.keras.layers.Dense(output_size, activation = 'softmax')
])

In [4]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics = ['accuracy'])

In [5]:
max_epochs = 100
batch_size = 100

early_stopping = tf.keras.callbacks.EarlyStopping(patience=2) 
#stop training when the validation loss starts increasing
#patience: how many consecutive increase we can tolerate

model.fit(train_inputs,
         train_targets,
         #batch_size = batch_size,
         epochs = max_epochs,
          callbacks = [early_stopping],
         validation_data = (validation_inputs, validation_targets),
          verbose=2)
#set up early stop --> prevent overfitting

Train on 615 samples, validate on 69 samples
Epoch 1/100
615/615 - 2s - loss: 0.7072 - accuracy: 0.5138 - val_loss: 0.6980 - val_accuracy: 0.4058
Epoch 2/100
615/615 - 0s - loss: 0.6681 - accuracy: 0.5967 - val_loss: 0.6680 - val_accuracy: 0.7971
Epoch 3/100
615/615 - 0s - loss: 0.6436 - accuracy: 0.6114 - val_loss: 0.6384 - val_accuracy: 0.7536
Epoch 4/100
615/615 - 0s - loss: 0.5709 - accuracy: 0.7463 - val_loss: 0.5810 - val_accuracy: 0.7536
Epoch 5/100
615/615 - 0s - loss: 0.4991 - accuracy: 0.7593 - val_loss: 0.5485 - val_accuracy: 0.7681
Epoch 6/100
615/615 - 0s - loss: 0.4884 - accuracy: 0.7675 - val_loss: 0.5546 - val_accuracy: 0.7536
Epoch 7/100
615/615 - 0s - loss: 0.4690 - accuracy: 0.7789 - val_loss: 0.6136 - val_accuracy: 0.7536


<tensorflow.python.keras.callbacks.History at 0x1ec76c3cd08>

In [6]:
model.predict(test_inputs)

array([[0.70590806, 0.29409197],
       [0.37726346, 0.6227366 ],
       [0.51825595, 0.48174408],
       [0.7653759 , 0.23462407],
       [0.3352907 , 0.66470927],
       [0.5403109 , 0.4596891 ],
       [0.16343747, 0.8365625 ],
       [0.4441902 , 0.5558098 ],
       [0.11572217, 0.8842778 ],
       [0.7291812 , 0.27081886],
       [0.7859538 , 0.2140461 ],
       [0.6102706 , 0.38972935],
       [0.03473802, 0.96526194],
       [0.5177654 , 0.48223466],
       [0.05184511, 0.9481549 ],
       [0.05776725, 0.9422328 ],
       [0.44778273, 0.5522173 ],
       [0.5925747 , 0.40742528],
       [0.36369023, 0.6363098 ],
       [0.33683348, 0.6631665 ],
       [0.36800727, 0.63199264],
       [0.4169295 , 0.58307046],
       [0.06938883, 0.93061113],
       [0.13908343, 0.8609166 ],
       [0.01467113, 0.98532885],
       [0.8154898 , 0.1845102 ],
       [0.03964439, 0.9603556 ],
       [0.6191931 , 0.38080692],
       [0.5859301 , 0.41406986],
       [0.7198052 , 0.28019485],
       [0.

In [7]:
import pandas as pd
ans = pd.DataFrame(columns = ["PassengerId", "Survived"])

In [8]:
predicted_values = model.predict(test_inputs)
data = pd.read_csv('test.csv')
ans['PassengerId'] = data['PassengerId'].copy()
for i in range(0, 418):
    if predicted_values[i][0] > predicted_values[i][1]:
        ans['Survived'][i] = 0
    else:
        ans['Survived'][i] = 1
ans['Survived'].sum()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


232

In [9]:
ans.to_csv("ans.csv", index=False)