Import required modules and load the file. The first lines are displayed.

In [None]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split

path = "../Data/adult-2.csv"
data = pd.read_csv(path, delimiter=';')
print(data.head())
print("Empty columns: ", data.columns[data.isnull().any()])

In [None]:
# Output of the correlations
correlations = data[data.columns].corr(numeric_only=True)
#print(correlations)
print('All correlations')
print('-' * 30)
correlations_abs_sum = correlations[correlations.columns].abs().sum()
print(correlations_abs_sum)
print('Weakest correlations')
print('-' * 30)
print(correlations_abs_sum.nsmallest(5))

prepare data

In [None]:
# The columns with the lowest summed correlation values are removed from the dataset. 
data.drop(['fnlwgt','capital.loss','capital.gain'], axis = 1, inplace=True)

# Income is to be predicted.
col = data['income']
col = pd.get_dummies(col, dtype=float)
data = data.drop(['income'], axis = 1)

# Convert these columns into numerical values 0...n.
conv_num = ['workclass', 'education', 'marital.status', 'occuaption','relationship', 'hours.per.week', 'native.country']
data[conv_num] = data[conv_num].astype('category')
data[conv_num] = data[conv_num].apply(lambda x: x.cat.codes)

# Perform OHE for this data
conv_ohe = ['race', 'sex']
data = pd.get_dummies(data, columns = conv_ohe, dtype=float)

print(data.shape)
print(data.shape[1])

build ANN

In [None]:
# Create four tables from the two tables.
train_data, test_data, train_col, test_col = train_test_split(data,col, test_size=0.2, random_state=42)

# Build ANN.
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation=tf.nn.relu, input_dim=data.shape[1]),
    tf.keras.layers.Dense(256, activation=tf.nn.relu),
    tf.keras.layers.Dense(64, activation=tf.nn.relu),
    tf.keras.layers.Dense(2, activation=tf.nn.softmax)
])

# Configure ANN.
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

train

In [None]:
# 10 runs
model.fit(train_data, train_col, epochs=10)

test

In [None]:
test_loss, test_acc = model.evaluate(test_data, test_col)
print('Test accuracy:', test_acc)