## Dependencies

In [None]:
import numpy as np
import pandas as pd

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.utils import shuffle

In [None]:
raw = pd.DataFrame(pd.read_csv(
    "/kaggle/input/can-humans-really-be-random/survey.csv"
).drop(
    "Timestamp", axis=1
).values.reshape(-1, 1), columns=["0"])

for i in range(1, 21):
    raw.insert(i, column=str(i), value=raw.iloc(axis=1)[i-1])
for column in raw:
    raw[column] = raw[column].shift(list(raw.columns).index(column))

raw = raw.dropna()
raw = raw.reset_index().drop("index", axis=1).astype(np.int)

scaler = MinMaxScaler()
raw = pd.DataFrame(scaler.fit_transform(raw.values.reshape(-1, 1)).reshape(raw.values.shape)).drop(20, axis=1)
raw

In [None]:
X = np.array(raw.iloc(axis=1)[0:19]).reshape(-1, 19)
y = np.array(raw.iloc(axis=1)[19]).reshape(-1, 1)

X_train, X_valid, y_train, y_valid = train_test_split(X, y)

In [None]:
import matplotlib.pyplot as plt
from matplotlib.style import use

use("dark_background")

# Question Answer Distribution
Below is the visualization of the question answers.

In [None]:
fig, ax = plt.subplots(5, 4)
fig.set_size_inches((24, 18))
fig.subplots_adjust(hspace=0.6, wspace=0.6)
fig.patch.set_facecolor('#333')


plots = 0
cols = ["aquamarine", "azure", "gold", "coral", "wheat", "purple"] * 4


bins = 10
for i in range(5):
    for j in range(4):
        ax[i, j].hist(raw[raw.columns[plots]], bins=bins, color=str("xkcd:" + cols[plots]))
        
        ax[i, j].title.set_text(f"Question: {raw.columns[plots]}")
        ax[i, j].grid()
        ax[i, j].set_axisbelow(False)
        ax[i, j].patch.set_facecolor('#333')

        plots += 1

plt.show()

# Modelling

In [None]:
import os

os.environ['TF_CPP_MIN_LOG_LEVEL']  =  '3'
import tensorflow as tf
os.environ['TF_CPP_MIN_LOG_LEVEL']  =  '0'

from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import shuffle
from tensorflow import keras
from tensorflow.keras.layers import Dense, Dropout, LSTM, Activation
from tensorflow.keras.metrics import *
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

## DNN Regression

In [None]:
model = Sequential()
model.add(Dense(16, activation="elu", input_shape=(19,)))
model.add(Dense(32, activation="softplus"))
model.add(Dropout(0.2))
model.add(Dense(1))
model.add(Activation("relu"))

model.compile(
    loss="mean_squared_error",
    optimizer="sgd",
    metrics=["accuracy"]
)

history = model.fit(
    X_train, y_train,
    epochs=100,
    verbose=0,
    validation_split=0.1
).history

fig, ax = plt.subplots(1, 2)
fig.set_size_inches((24, 8))

fig.subplots_adjust(hspace=0.5, wspace=0.5)
fig.patch.set_facecolor('#333')

ax[0].plot(history["loss"], label = "loss", color='blue')
ax[0].plot(history["val_loss"], label = "val_loss", color='aquamarine')
ax[0].patch.set_facecolor('#333')
ax[0].legend()

ax[1].plot(history["accuracy"], label = "accuracy", color="gold")
ax[1].plot(history["val_accuracy"], label = "val_accuracy", color="khaki")
ax[1].patch.set_facecolor('#333')
ax[1].legend()

plt.show()

preds = model.predict(X_valid)
print(f"MAE: {mean_absolute_error(preds, y_valid)}")

## DNN Classification

In [None]:
model = Sequential()
model.add(Dense(16, activation="relu", input_shape=(19,)))
model.add(Dense(32, activation="softplus"))
model.add(Dropout(0.2))
model.add(Dense(10))
model.add(Activation("softmax"))

model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer="sgd",
    metrics=["accuracy"]
)

history = model.fit(
    X_train, y_train,
    epochs=100,
    verbose=0,
    validation_split=0.1
).history

fig, ax = plt.subplots(1, 2)
fig.set_size_inches((24, 8))

fig.subplots_adjust(hspace=0.5, wspace=0.5)
fig.patch.set_facecolor('#333')

ax[0].plot(history["loss"], label = "loss", color='blue')
ax[0].plot(history["val_loss"], label = "val_loss", color='aquamarine')
ax[0].patch.set_facecolor('#333')
ax[1].plot(history["accuracy"], label = "accuracy", color="gold")
ax[1].plot(history["val_accuracy"], label = "val_accuracy", color="khaki")
ax[1].patch.set_facecolor('#333')

plt.legend()
plt.show()

preds = model.predict(X_valid)
preds = np.array([np.argmax(i) for i in preds])

print(f"MAE: {mean_absolute_error(preds, y_valid)}")

# What does this mean?
Well. With a method, a DNN regressor, the loss is quite low. Could this mean that it shows that humans are not really capable of being random? You choose for yourself...