In [1]:
import tensorflow as tf
from tensorflow import keras

import pandas as pd
import numpy as np

In [2]:
train_df = pd.read_csv('./data/train.csv')
# we want to use binary crossentropy, which means we need one hot encoding.
# lucky for us, we can just use pd.get_dummies
# because we also have a marker, and not just a color, we gotta do the same thing on the marker

one_hot_color = pd.get_dummies(train_df.color).values
one_hot_marker = pd.get_dummies(train_df.marker).values

#now we need to concatonate these 2 values
# we can use np.concatenate

labels = np.concatenate((one_hot_color, one_hot_marker), axis=1)

In [3]:
model = keras.Sequential([
	keras.layers.Dense(64, input_shape=(2,), activation='relu'),
	keras.layers.Dense(64, activation='relu'),
	keras.layers.Dense(64, activation='relu'),
	keras.layers.Dense(9, activation='sigmoid')])

2021-12-26 17:10:14.799608: I tensorflow/core/platform/cpu_feature_guard.cc:145] This TensorFlow binary is optimized with Intel(R) MKL-DNN to use the following CPU instructions in performance critical operations:  SSE4.1 SSE4.2
To enable them in non-MKL-DNN operations, rebuild TensorFlow with the appropriate compiler flags.
2021-12-26 17:10:14.803177: I tensorflow/core/common_runtime/process_util.cc:115] Creating new thread pool with default inter op setting: 8. Tune using inter_op_parallelism_threads for best performance.


In [4]:
model.compile(optimizer="adam", 
                loss = keras.losses.BinaryCrossentropy(from_logits=True), 
                metrics=["accuracy"])

In [5]:
attributes = np.column_stack((train_df.x.values, train_df.y.values))

#we couldn't shuffle it earlier, because we need to use the labels, but now that we have them, 
    # we can shuffle both.
# it's super important that they get shuffled in the same way though, otherwise the labels
    # will be off.
    #We can do that with a random state and a set seed. 
x = np.column_stack((train_df.x.values, train_df.y.values))

np.random.RandomState(seed=42).shuffle(x)
np.random.RandomState(seed=42).shuffle(labels)

model.fit(x, labels, batch_size=4, epochs=10)

Train on 6000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fc993466b50>

In [6]:
test_df = pd.read_csv("./data/test.csv")
# test_df["color"] = test_df.color.apply(lambda x: color_map[x])
test_x = np.column_stack((test_df.x.values, test_df.y.values))

test_color = pd.get_dummies(test_df.color).values
test_market = pd.get_dummies(test_df.marker).values

test_label = np.concatenate((test_color, test_market), axis=1)

In [7]:
model.evaluate(test_x, test_label)



[0.63415940284729, 0.9372222]