In [1]:
import os
import gc

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.keras import layers

### Convert images from pixals into data, then use pretrained model to accurately classify the stocks
- Mathematical detection algorithms are at best 84% accurate. Goal is to 
- https://www.tensorflow.org/tutorials/images/transfer_learning
- Make a requirements text file
- We do not apply data augmentation because we already have patterns classified as up or down, and flipping them would be counter intuitive

### MobileNet V2 Trained by Google

In [2]:
# Global Variables
BATCH_SIZE = 32
IMG_SIZE = 224

In [3]:
# Paths: For local machine
train_dir = 'C:/Users/Nick/Documents/SchoolStuff/spring2024/machineLearning/final_project/tradingpatterns/stock_patterns/train'
validation_dir = 'C:/Users/Nick/Documents/SchoolStuff/spring2024/machineLearning/final_project/tradingpatterns/stock_patterns/validation'

# train_dir = 'C:/Users/Nick/Desktop/stock images/train'
# validation_dir = 'C:/Users/Nick/Desktop/stock images/validation'

In [4]:
# Create training and validation dataset
train_dataset = tf.keras.utils.image_dataset_from_directory(train_dir,
                                                            shuffle=True,
                                                            batch_size=BATCH_SIZE,
                                                            color_mode='rgb',
                                                            image_size=(IMG_SIZE, IMG_SIZE))

validation_dataset = tf.keras.utils.image_dataset_from_directory(validation_dir,
                                                                 shuffle=True,
                                                                 batch_size=BATCH_SIZE,
                                                                 color_mode='rgb',
                                                                 image_size=(IMG_SIZE, IMG_SIZE))
class_names = train_dataset.class_names

Found 6404 files belonging to 4 classes.
Found 5984 files belonging to 4 classes.


In [5]:
# Create a test set
val_batches = tf.data.experimental.cardinality(validation_dataset)
test_dataset = validation_dataset.take(val_batches // 5)
validation_dataset = validation_dataset.skip(val_batches // 5)

# Rescale the images from [-1 to 1] vs [0 to 255]
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input

print('Number of validation batches: %d' % tf.data.experimental.cardinality(validation_dataset))
print('Number of test batches: %d' % tf.data.experimental.cardinality(test_dataset))

Number of validation batches: 150
Number of test batches: 37


In [6]:
# Create the base model from the pre-trained model MobileNet V2
IMG_SHAPE = (IMG_SIZE, IMG_SIZE) + (3,)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet')

In [7]:
# This feature extractor converts each 255x255 image into a 5x5x1280 block of features
image_batch, label_batch = next(iter(train_dataset))
feature_batch = base_model(image_batch)

In [8]:
# Unfreeze this to train
base_model.trainable = False

# Adding a classification head using max
global_max_layer = tf.keras.layers.GlobalMaxPooling2D()
feature_batch_max = global_max_layer(feature_batch)
# print(feature_batch_average.shape)

# Add a dense layer to convert it to a single prediction per image , activation='softmax'
prediction_layer = tf.keras.layers.Dense(len(class_names)) 
prediction_batch = prediction_layer(feature_batch_max)
# print(prediction_batch.shape)

# Process the inputs so that they are between [-1, 1] and then
inputs = tf.keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
# Add the preprocessing layer
x = preprocess_input(inputs)
# Add the base model after, keeping training to false
x = base_model(x, training=False)
# Add max pooling layer
x = global_max_layer(x)
# Add dropout layer
x = tf.keras.layers.Dropout(0.2)(x)
# Add softmax prediction layer
outputs = prediction_layer(x)
# Create a model from inputs, outputs.
model = tf.keras.Model(inputs, outputs)

In [9]:
# model.summary()

https://www.tensorflow.org/api_docs/python/tf/keras/losses/SparseCategoricalCrossentropy

In [10]:
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), # False when we have a softmax layer
              metrics=['accuracy'])

In [11]:
# loss0, accuracy0 = model.evaluate(validation_dataset)

In [12]:
initial_epochs = 10

history = model.fit(train_dataset,
                    epochs=initial_epochs,
                    steps_per_epoch=5,
                    validation_data=validation_dataset,
                    validation_steps=5)

Epoch 1/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 943ms/step - accuracy: 0.2678 - loss: 3.8671 - val_accuracy: 0.2062 - val_loss: 3.1050
Epoch 2/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 634ms/step - accuracy: 0.3194 - loss: 3.1850 - val_accuracy: 0.2375 - val_loss: 2.4077
Epoch 3/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 618ms/step - accuracy: 0.2035 - loss: 3.7725 - val_accuracy: 0.2875 - val_loss: 2.1583
Epoch 4/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 616ms/step - accuracy: 0.2285 - loss: 3.3610 - val_accuracy: 0.3187 - val_loss: 2.3561
Epoch 5/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 623ms/step - accuracy: 0.1971 - loss: 3.3427 - val_accuracy: 0.2875 - val_loss: 2.1628
Epoch 6/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 623ms/step - accuracy: 0.2677 - loss: 2.9385 - val_accuracy: 0.3562 - val_loss: 2.1417
Epoch 7/10
[1m5/5[0m [32m━━━━━━━━━━━━

In [13]:
# acc = history.history['accuracy']
# val_acc = history.history['val_accuracy']
# 
# loss = history.history['loss']
# val_loss = history.history['val_loss']
# 
# plt.figure(figsize=(8, 8))
# plt.subplot(2, 1, 1)
# plt.plot(acc, label='Training Accuracy')
# plt.plot(val_acc, label='Validation Accuracy')
# plt.legend(loc='lower right')
# plt.ylabel('Accuracy')
# plt.ylim([0, .5])
# plt.title('Training and Validation Accuracy')
# 
# plt.subplot(2, 1, 2)
# plt.plot(loss, label='Training Loss')
# plt.plot(val_loss, label='Validation Loss')
# plt.legend(loc='upper right')
# plt.ylabel('Cross Entropy')
# plt.title('Training and Validation Loss')
# plt.xlabel('epoch')
# plt.ylim([0, 10])
# plt.show()

### Customization
1) Feature Extraction
2) Fine-Tuning

In [14]:
base_model.trainable = True

In [15]:
# Let's take a look to see how many layers are in the base model
print("Number of layers in the base model: ", len(base_model.layers))

# Fine-tune from this layer onwards
fine_tune_at = 100

# Freeze all the layers before the `fine_tune_at` layer
for layer in base_model.layers[:fine_tune_at]:
  layer.trainable = False

Number of layers in the base model:  154


In [16]:
# model.summary()

In [17]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate/10),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), # False when we have a Softmax layer
              metrics=['accuracy'])

In [18]:
fine_tune_epochs = 10
total_epochs =  initial_epochs + fine_tune_epochs

history_fine = model.fit(train_dataset,
                         epochs=total_epochs,
                         initial_epoch=len(history.epoch),
                         steps_per_epoch=5,
                         validation_data=validation_dataset,
                         validation_steps=5)

Epoch 11/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 1s/step - accuracy: 0.2211 - loss: 5.9212 - val_accuracy: 0.2750 - val_loss: 2.2585
Epoch 12/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 808ms/step - accuracy: 0.3022 - loss: 5.7755 - val_accuracy: 0.2375 - val_loss: 2.4212
Epoch 13/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 833ms/step - accuracy: 0.2396 - loss: 5.1575 - val_accuracy: 0.2688 - val_loss: 2.0993
Epoch 14/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 836ms/step - accuracy: 0.2594 - loss: 4.9652 - val_accuracy: 0.2937 - val_loss: 2.0631
Epoch 15/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 844ms/step - accuracy: 0.2776 - loss: 4.6181 - val_accuracy: 0.2625 - val_loss: 2.1158
Epoch 16/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 827ms/step - accuracy: 0.3059 - loss: 4.8334 - val_accuracy: 0.2688 - val_loss: 2.2389
Epoch 17/20
[1m5/5[0m [32m━━━━━━━

In [19]:
loss, accuracy = model.evaluate(test_dataset)
print('Test accuracy :', accuracy)

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 290ms/step - accuracy: 0.2358 - loss: 2.2936
Test accuracy : 0.24662162363529205


In [20]:
# acc += history_fine.history['accuracy']
# val_acc += history_fine.history['val_accuracy']
# 
# loss += history_fine.history['loss']
# val_loss += history_fine.history['val_loss']

In [21]:
# acc = history.history['accuracy']
# val_acc = history.history['val_accuracy']
# 
# loss = history.history['loss']
# val_loss = history.history['val_loss']
# 
# plt.figure(figsize=(8, 8))
# plt.subplot(2, 1, 1)
# plt.plot(acc, label='Training Accuracy')
# plt.plot(val_acc, label='Validation Accuracy')
# plt.legend(loc='lower right')
# plt.ylabel('Accuracy')
# plt.ylim([0, 1])
# plt.title('Training and Validation Accuracy')
# 
# plt.subplot(2, 1, 2)
# plt.plot(loss, label='Training Loss')
# plt.plot(val_loss, label='Validation Loss')
# plt.legend(loc='upper right')
# plt.ylabel('Cross Entropy')
# plt.title('Training and Validation Loss')
# plt.xlabel('epoch')
# plt.ylim([0, 10])
# plt.show()

In [22]:
# Add a softmax layer to our model and apply our test data
# probability_model = tf.keras.Sequential([model, tf.keras.layers.Softmax()])
# image_batch, label_batch = test_dataset.as_numpy_iterator().next()
# predictions = probability_model.predict(image_batch)

In [23]:
# plt.figure(figsize=(10, 10))
# for i in range(9):
#   ax = plt.subplot(3, 3, i + 1)
#   plt.imshow(image_batch[i].astype("uint8"))
#   guess = str(class_names[np.argmax(predictions[i+9])])
#   actual = str(class_names[label_batch[i+9]])
#   title = "Prediction: " + guess + "\n" + "Actual: " + actual
#   plt.title(title)
#   plt.axis("off")