# Simple neural network for classification

https://www.kaggle.com/code/carlmcbrideellis/very-simple-neural-network-for-classification/notebook

In [32]:
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense 

In [33]:
# read in the data
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

In [34]:
# select some features of interest
features = ["Pclass", "Sex", "SibSp", "Parch"]

In [35]:
# for the features that are categorical we use pd.get_dummies:
# "Convert categorical variable into dummy/indicator variables."
X_train = pd.get_dummies(train_data[features])
y_train = train_data["Survived"]
X_test = pd.get_dummies(test_data[features])

In [8]:
X_train.head()

Unnamed: 0,Pclass,SibSp,Parch,Sex_female,Sex_male
0,3,1,0,0,1
1,1,1,0,1,0
2,3,0,0,1,0
3,1,1,0,1,0
4,3,0,0,0,1


In [9]:
y_train.head()

0    0
1    1
2    1
3    1
4    0
Name: Survived, dtype: int64

In [10]:
X_test.head()

Unnamed: 0,Pclass,SibSp,Parch,Sex_female,Sex_male
0,3,0,0,0,1
1,3,1,0,1,0
2,2,0,0,0,1
3,3,0,0,0,1
4,3,1,1,1,0


In [11]:
# parameters for keras
input_dim = len(X_train.columns) # number of neurons in the input layer
n_neurons = 50 # number of neurons in the first hidden layer
epochs = 100 # number of training cycles

In [13]:
# keras model
model = Sequential() # a model consisting of successive layers
# input layer
model.add(Dense(n_neurons, input_dim=input_dim, activation='relu'))
# output layer, with one neuron
model.add(Dense(1, activation='sigmoid'))
# compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [15]:
# train the model
model.fit(X_train, y_train, epochs=epochs, verbose=0)

<keras.callbacks.History at 0x24896260d60>

In [19]:
# use the trained model to predict 'Survived' for the test data
predictions = model.predict(X_test)
# set a threshold of 50% for classification, i.e. >0.5 is True
# Note: the '*1' converts the Boolean array into an array containing 0 or 1
predictions = (predictions > 0.5)*1



In [20]:
predictions[:5]

array([[0],
       [1],
       [0],
       [0],
       [1]])

In [21]:
predictions.flatten()[:20]

array([0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1])

In [22]:
# write out CSV submission file
output = pd.DataFrame({'PassengerId': test_data.PassengerId, 'Survived': predictions.flatten()})
output.to_csv('submission.csv', index=False)