# Import

In [None]:

!python3 -m pip install tensorflow==2.13.0rc1
!python3 -m pip install finta
!python3 -m pip install -q --upgrade keras-nlp
!python3 -m pip install numpy
!python3 -m pip install pandas
!python3 -m pip install matplotlib
!python3 -m pip install plotly==5.3.1
!python3 -m pip install opencv-python
!python3 -m pip install -U kaleido
!python3 -m pip install autokeras
!python3 -m pip install keras-tuner


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import CustomObjectScope
from tensorflow.keras.models import load_model
import keras_tuner
import pandas as pd
import time
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from PIL import Image
import io
from finta import TA
import os
import autokeras as ak
try:
    from google.colab import runtime
except:
    pass

np.random.seed(0)
tf.random.set_seed(0)

print(tf.version.VERSION)
print(ak.__version__)
print(keras_tuner.__version__)

# Gather Training Data
classes:
0 - no trend
1 - short trend
2 - long trend

In [None]:
batch_size = 32

# import the training data as grey scale images
train_data = ak.image_dataset_from_directory(
    "image_data/train/",
    subset="training",
    image_size=(200, 50),
    batch_size=batch_size,
    color_mode="grayscale",
)

# reduce the training data to 100 images
# train_data = train_data.take(100)

# you cant just remove half of the data because then you wont get a realistic amount of wins and losses so you have to change the class_weight
# count the number of images in each class
win_count = len(os.listdir("image_data/train/win"))
loss_count = len(os.listdir("image_data/train/loss"))
total_count = win_count + loss_count
# class_weight = {0: loss_count / total_count, 1: win_count / total_count}
class_weight = {0: win_count / total_count, 1: loss_count / total_count}
print(class_weight)

test_data = ak.image_dataset_from_directory(
    "image_data/test/",
    subset="testing",
    image_size=(200, 50),
    batch_size=batch_size,
    color_mode="grayscale",
)


# Classification

In [None]:
# create a metric to calculate the win rate of the model, which is the accuracy of the predictions when the prediction is win

def win_rate(y_true, y_pred):
    # convert y_pred to 0 or 1
    y_pred = tf.where(y_pred < 0.5, 0, 1)

    # convert the values to float32
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)
    correct_count = tf.math.reduce_sum(tf.where(tf.math.logical_and(tf.math.equal(y_true, y_pred), tf.math.not_equal(y_pred, 0)), 1.0, 0.0))
    total_count = tf.math.reduce_sum(tf.where(tf.math.not_equal(y_pred, 0), 1.0, 0.0))
    win_rate = tf.math.divide_no_nan(correct_count, total_count)
    return win_rate

# create and train a model using auto keras
clf = ak.ImageClassifier(
    max_trials=5,
    directory='drive/MyDrive/',
    project_name='model',
    metrics=['accuracy', win_rate],
    objective=keras_tuner.Objective("val_win_rate", direction="max")
)

# train the model
history = clf.fit(
    train_data,
    epochs=1000,
    validation_data=test_data,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_win_rate', patience=10, mode='max', restore_best_weights=True)],
    class_weight=class_weight,
)

# plot the loss
plt.plot(history.history['loss'])
if 'val_loss' in history.history:
    plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
# save
plt.savefig('drive/MyDrive/loss.png', format='png')

# accuracy
plt.plot(history.history['accuracy'])
if 'val_accuracy' in history.history:
    plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
# save
plt.savefig('drive/MyDrive/accuracy.png', format='png')

# win rate
plt.plot(history.history['win_rate'])
if 'val_win_rate' in history.history:
    plt.plot(history.history['val_win_rate'])
plt.title('model win rate')
plt.ylabel('win rate')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
# save
plt.savefig('drive/MyDrive/win_rate.png', format='png')

with CustomObjectScope({'win_rate': win_rate}):
    model = clf.export_model()

try:
    model.save("drive/MyDrive/model_autokeras", save_format="tf")
except Exception:
    model.save("drive/MyDrive/model_autokeras.h5")



# Test the model

In [None]:
model = load_model("drive/MyDrive/model_autokeras", custom_objects={'win_rate': win_rate})

test_data = []
test_results = []

# for each file in image_data/test/loss, add the image to test_data and add 0 to test_results, make sure the image is 200x50 and grey scale
for file in os.listdir("image_data/test/loss"):
    # if the file is png
    if file[-4:] == ".png":
        image = Image.open("image_data/test/loss/" + file)
        image = image.resize((50, 200))
        image = image.convert('L')
        image = np.array(image)
        test_data.append(image)
        test_results.append(0)

# for each file in image_data/test/win, add the image to test_data and add 1 to test_results, make sure the image is 200x50 and grey scale
for file in os.listdir("image_data/test/win"):
    # if the file is png
    if file[-4:] == ".png":
        image = Image.open("image_data/test/win/" + file)
        image = image.resize((50, 200))
        image = image.convert('L')
        image = np.array(image)
        test_data.append(image)
        test_results.append(1)

test_data = np.array(test_data)
test_results = np.array(test_results)

# predict the results
predictions = model.predict(test_data)

# convert the predictions to 0 or 1
overall_predictions = np.where(predictions < 0.5, 0, 1).reshape(-1)

# calculate the accuracy
accuracy = np.sum(overall_predictions == test_results) / len(test_results)

# calculate the win rate
win_rate = np.sum(np.logical_and(overall_predictions == test_results, overall_predictions != 0)) / np.sum(overall_predictions != 0)

print("accuracy:", accuracy)
win_confidence_tracker = []
loss_confidence_tracker = []
# for each prediction, if the prediction is correct, add the confidence to win_confidence_tracker, else add the confidence to loss_confidence_tracker
for i in range(len(predictions)):
    confidence = ((0.5-predictions[i][0]) * 2) if overall_predictions[i] == 0 else ((predictions[i][0]-0.5) * 2)
    if overall_predictions[i] == test_results[i]:
        win_confidence_tracker.append(confidence)
    else:
        loss_confidence_tracker.append(confidence)
plt.clf()
plt.hist(win_confidence_tracker, bins=100, alpha=0.5, label='Correct')
plt.hist(loss_confidence_tracker, bins=100, alpha=0.5, label='Incorrect')
plt.legend(loc='upper right')
plt.savefig("drive/MyDrive/model_accuracy_confidence.png", format='png')
plt.show()

print("win rate:", win_rate)
win_confidence_tracker = []
loss_confidence_tracker = []
# for each prediction, if the prediction is correct, add the confidence to win_confidence_tracker, else add the confidence to loss_confidence_tracker
for i in range(len(predictions)):
    if overall_predictions[i] != 0:
        confidence = ((0.5-predictions[i][0]) * 2) if overall_predictions[i] == 0 else ((predictions[i][0]-0.5) * 2)
        if overall_predictions[i] == test_results[i]:
            win_confidence_tracker.append(confidence)
        else:
            loss_confidence_tracker.append(confidence)
plt.clf()
plt.hist(win_confidence_tracker, bins=100, alpha=0.5, label='Correct')
plt.hist(loss_confidence_tracker, bins=100, alpha=0.5, label='Incorrect')
plt.legend(loc='upper right')
# write plot to file
plt.savefig("drive/MyDrive/model_winrate_confidence.png", format='png')
plt.show()


# Google colab support

In [None]:
runtime.unassign()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import zipfile
with zipfile.ZipFile('drive/MyDrive/image_data.zip', 'r') as zip_ref:
    zip_ref.extractall("")