#### Convolution CNN for leaning images and processing - using high-level Keras preprocessing utilities and layers to read a directory of images on disk.

https://www.tensorflow.org/tutorials/load_data/images

#### deep_learn3.ipynb - was run for 10 year images.

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [1]:
import numpy as np
import pandas as pd
import os
import os.path
import PIL
from PIL import Image
import PIL.Image
import tensorflow as tf
#import tensorflow_datasets as tfds
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import datetime

In [2]:
print(tf.__version__)

2.5.0


#### Dataset - directory
candlesticks_together

##### Define path for dataset

In [3]:
import pathlib
image_dir = pathlib.Path('candlesticks10yrs/')
#data_test_dir = pathlib.Path('candlesticks_test')

### Create File DataFrame

In [4]:
filepaths = pd.Series(list(image_dir.glob(r'**/*.png')), name='Filepath').astype(str)

In [5]:
filepaths

0      candlesticks10yrs\2011-07-10.png
1      candlesticks10yrs\2011-07-17.png
2      candlesticks10yrs\2011-07-24.png
3      candlesticks10yrs\2011-07-31.png
4      candlesticks10yrs\2011-08-07.png
5      candlesticks10yrs\2011-08-14.png
6      candlesticks10yrs\2011-08-21.png
7      candlesticks10yrs\2011-08-28.png
8      candlesticks10yrs\2011-09-04.png
9      candlesticks10yrs\2011-09-11.png
10     candlesticks10yrs\2011-09-18.png
11     candlesticks10yrs\2011-09-25.png
12     candlesticks10yrs\2011-10-02.png
13     candlesticks10yrs\2011-10-09.png
14     candlesticks10yrs\2011-10-16.png
15     candlesticks10yrs\2011-10-23.png
16     candlesticks10yrs\2011-10-30.png
17     candlesticks10yrs\2011-11-06.png
18     candlesticks10yrs\2011-11-13.png
19     candlesticks10yrs\2011-11-20.png
20     candlesticks10yrs\2011-11-27.png
21     candlesticks10yrs\2011-12-04.png
22     candlesticks10yrs\2011-12-11.png
23     candlesticks10yrs\2011-12-18.png
24     candlesticks10yrs\2011-12-25.png


In [6]:
signal_df = pd.read_csv('signal10yrs.csv')  

In [7]:
signal_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 521 entries, 0 to 520
Data columns (total 2 columns):
Date      521 non-null object
Signal    521 non-null int64
dtypes: int64(1), object(1)
memory usage: 8.2+ KB


In [8]:
signal_df['Date']= pd.to_datetime(signal_df['Date'])

In [9]:
signal_df.set_index('Date', inplace = True)

In [10]:
signal_df.head()

Unnamed: 0_level_0,Signal
Date,Unnamed: 1_level_1
2011-07-10,0
2011-07-17,1
2011-07-24,1
2011-07-31,1
2011-08-07,0


In [11]:
signal_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 521 entries, 2011-07-10 to 2021-07-04
Data columns (total 1 columns):
Signal    521 non-null int64
dtypes: int64(1)
memory usage: 8.1 KB


In [12]:
filepaths[0]

'candlesticks10yrs\\2011-07-10.png'

In [13]:
date_list = []
for i in filepaths:
    #print(i)
    dtstr = os.path.split(i)[1].split(".")[0]
    #print(dtstr)
    #print(datetime.datetime.strptime(dtstr, '%Y-%m-%d').date())
    date_list.append(datetime.datetime.strptime(dtstr, '%Y-%m-%d').date())
    

In [14]:
date_list

[datetime.date(2011, 7, 10),
 datetime.date(2011, 7, 17),
 datetime.date(2011, 7, 24),
 datetime.date(2011, 7, 31),
 datetime.date(2011, 8, 7),
 datetime.date(2011, 8, 14),
 datetime.date(2011, 8, 21),
 datetime.date(2011, 8, 28),
 datetime.date(2011, 9, 4),
 datetime.date(2011, 9, 11),
 datetime.date(2011, 9, 18),
 datetime.date(2011, 9, 25),
 datetime.date(2011, 10, 2),
 datetime.date(2011, 10, 9),
 datetime.date(2011, 10, 16),
 datetime.date(2011, 10, 23),
 datetime.date(2011, 10, 30),
 datetime.date(2011, 11, 6),
 datetime.date(2011, 11, 13),
 datetime.date(2011, 11, 20),
 datetime.date(2011, 11, 27),
 datetime.date(2011, 12, 4),
 datetime.date(2011, 12, 11),
 datetime.date(2011, 12, 18),
 datetime.date(2011, 12, 25),
 datetime.date(2012, 1, 1),
 datetime.date(2012, 1, 8),
 datetime.date(2012, 1, 15),
 datetime.date(2012, 1, 22),
 datetime.date(2012, 1, 29),
 datetime.date(2012, 2, 5),
 datetime.date(2012, 2, 12),
 datetime.date(2012, 2, 19),
 datetime.date(2012, 2, 26),
 datetime.

In [15]:
images_df = pd.DataFrame()
images_df['Date'] = date_list 
images_df['Filepath'] =  filepaths

In [16]:
images_df['Date']= pd.to_datetime(images_df['Date'])

In [17]:
images_df.head()

Unnamed: 0,Date,Filepath
0,2011-07-10,candlesticks10yrs\2011-07-10.png
1,2011-07-17,candlesticks10yrs\2011-07-17.png
2,2011-07-24,candlesticks10yrs\2011-07-24.png
3,2011-07-31,candlesticks10yrs\2011-07-31.png
4,2011-08-07,candlesticks10yrs\2011-08-07.png


In [18]:
images_df.set_index('Date', inplace=True)

In [19]:
images_df.head()

Unnamed: 0_level_0,Filepath
Date,Unnamed: 1_level_1
2011-07-10,candlesticks10yrs\2011-07-10.png
2011-07-17,candlesticks10yrs\2011-07-17.png
2011-07-24,candlesticks10yrs\2011-07-24.png
2011-07-31,candlesticks10yrs\2011-07-31.png
2011-08-07,candlesticks10yrs\2011-08-07.png


In [20]:
images_df.index

DatetimeIndex(['2011-07-10', '2011-07-17', '2011-07-24', '2011-07-31',
               '2011-08-07', '2011-08-14', '2011-08-21', '2011-08-28',
               '2011-09-04', '2011-09-11',
               ...
               '2021-05-02', '2021-05-09', '2021-05-16', '2021-05-23',
               '2021-05-30', '2021-06-06', '2021-06-13', '2021-06-20',
               '2021-06-27', '2021-07-04'],
              dtype='datetime64[ns]', name='Date', length=521, freq=None)

In [21]:
images = pd.concat([images_df,signal_df], join='inner', axis=1)

In [22]:
images.head(10)

Unnamed: 0_level_0,Filepath,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2011-07-10,candlesticks10yrs\2011-07-10.png,0
2011-07-17,candlesticks10yrs\2011-07-17.png,1
2011-07-24,candlesticks10yrs\2011-07-24.png,1
2011-07-31,candlesticks10yrs\2011-07-31.png,1
2011-08-07,candlesticks10yrs\2011-08-07.png,0
2011-08-14,candlesticks10yrs\2011-08-14.png,1
2011-08-21,candlesticks10yrs\2011-08-21.png,1
2011-08-28,candlesticks10yrs\2011-08-28.png,1
2011-09-04,candlesticks10yrs\2011-09-04.png,1
2011-09-11,candlesticks10yrs\2011-09-11.png,1


In [23]:
# Let's only use 40 images to speed up training time
image_df = images.sample(421, random_state=1).reset_index(drop=True)
train_df, test_df = train_test_split(image_df, train_size=0.7, shuffle=True, random_state=1)

In [24]:
train_df.head()

Unnamed: 0,Filepath,Signal
385,candlesticks10yrs\2011-09-18.png,1
186,candlesticks10yrs\2012-11-11.png,0
27,candlesticks10yrs\2020-04-26.png,1
89,candlesticks10yrs\2014-09-07.png,0
327,candlesticks10yrs\2017-10-08.png,1


In [25]:
test_df.head()

Unnamed: 0,Filepath,Signal
378,candlesticks10yrs\2014-05-18.png,1
165,candlesticks10yrs\2015-09-20.png,1
17,candlesticks10yrs\2017-10-15.png,1
179,candlesticks10yrs\2012-01-15.png,1
4,candlesticks10yrs\2014-11-09.png,0


### Loading Images

In [26]:
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)

test_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255
)

In [27]:
train_df

Unnamed: 0,Filepath,Signal
385,candlesticks10yrs\2011-09-18.png,1
186,candlesticks10yrs\2012-11-11.png,0
27,candlesticks10yrs\2020-04-26.png,1
89,candlesticks10yrs\2014-09-07.png,0
327,candlesticks10yrs\2017-10-08.png,1
73,candlesticks10yrs\2012-04-15.png,0
69,candlesticks10yrs\2012-10-14.png,1
405,candlesticks10yrs\2020-05-17.png,1
91,candlesticks10yrs\2016-01-24.png,0
192,candlesticks10yrs\2013-12-08.png,1


In [28]:
train_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='Filepath',
    y_col='Signal',
    target_size=(120, 120),
    color_mode='rgb',
    class_mode='raw',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='training'
)

val_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='Filepath',
    y_col='Signal',
    target_size=(120, 120),
    color_mode='rgb',
    class_mode='raw',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='validation'
)

test_images = test_generator.flow_from_dataframe(
    dataframe=test_df,
    x_col='Filepath',
    y_col='Signal',
    target_size=(120, 120),
    color_mode='rgb',
    class_mode='raw',
    batch_size=32,
    shuffle=False
)

Found 236 validated image filenames.
Found 58 validated image filenames.
Found 127 validated image filenames.


### Training

In [29]:
inputs = tf.keras.Input(shape=(120, 120, 3))
x = tf.keras.layers.Conv2D(filters=16, kernel_size=(3, 3), activation='relu')(inputs)
print('after 1st conv2d - ',x)
x = tf.keras.layers.MaxPool2D()(x)
print('after 1st maxpool2d - ',x)
x = tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(x)
print('after 2nd conv2d - ',x)
x = tf.keras.layers.MaxPool2D()(x)
print('after 2nd maxpool2d - ',x)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
print('after 1st GlobalAvgPooling2D - ',x)
features_x = x
x = tf.keras.layers.Dense(64, activation='relu')(x)
x = tf.keras.layers.Dense(64, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='linear')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer='adam',
    loss='mse'
)

history = model.fit(
    train_images,
    validation_data=val_images,
    epochs=100,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=15,
            restore_best_weights=True
        )
    ]
)

after 1st conv2d -  KerasTensor(type_spec=TensorSpec(shape=(None, 118, 118, 16), dtype=tf.float32, name=None), name='conv2d/Relu:0', description="created by layer 'conv2d'")
after 1st maxpool2d -  KerasTensor(type_spec=TensorSpec(shape=(None, 59, 59, 16), dtype=tf.float32, name=None), name='max_pooling2d/MaxPool:0', description="created by layer 'max_pooling2d'")
after 2nd conv2d -  KerasTensor(type_spec=TensorSpec(shape=(None, 57, 57, 32), dtype=tf.float32, name=None), name='conv2d_1/Relu:0', description="created by layer 'conv2d_1'")
after 2nd maxpool2d -  KerasTensor(type_spec=TensorSpec(shape=(None, 28, 28, 32), dtype=tf.float32, name=None), name='max_pooling2d_1/MaxPool:0', description="created by layer 'max_pooling2d_1'")
after 1st GlobalAvgPooling2D -  KerasTensor(type_spec=TensorSpec(shape=(None, 32), dtype=tf.float32, name=None), name='global_average_pooling2d/Mean:0', description="created by layer 'global_average_pooling2d'")
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Ep

In [31]:
print(features_x)

KerasTensor(type_spec=TensorSpec(shape=(None, 32), dtype=tf.float32, name=None), name='global_average_pooling2d/Mean:0', description="created by layer 'global_average_pooling2d'")


### Results

In [39]:
predicted_signals = np.squeeze(model.predict(test_images))
true_signals = test_images.labels

rmse = np.sqrt(model.evaluate(test_images, verbose=0))
print("     Test RMSE: {:.5f}".format(rmse))

r2 = r2_score(true_signals, predicted_signals)
print("Test R^2 Score: {:.5f}".format(r2))

     Test RMSE: 0.50460
Test R^2 Score: -0.03290


In [40]:
null_rmse = np.sqrt(np.sum((true_signals - np.mean(true_signals))**2) / len(true_signals))
print("Null/Baseline Model Test RMSE: {:.5f}".format(null_rmse))

Null/Baseline Model Test RMSE: 0.49650


## We can see that our model is not even better than the null/baseline model (predict mean every time)!

In [41]:
true_signals

array([1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0,
       0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0,
       0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,
       1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1,
       1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1], dtype=int64)

In [42]:
predicted_signals

array([0.73961574, 0.63909054, 0.5752292 , 0.63968307, 0.6365635 ,
       0.60602766, 0.73154074, 0.6824593 , 0.5863318 , 0.62283474,
       0.65158194, 0.62816757, 0.7016016 , 0.65575135, 0.7087291 ,
       0.6205291 , 0.67094505, 0.5907646 , 0.66167575, 0.68010587,
       0.6350618 , 0.5985036 , 0.58009833, 0.6466519 , 0.6113315 ,
       0.6320772 , 0.6529108 , 0.61997837, 0.59532934, 0.6074732 ,
       0.62559444, 0.552777  , 0.6338214 , 0.6038495 , 0.6136678 ,
       0.6027068 , 0.71152896, 0.5961202 , 0.5959399 , 0.62074745,
       0.5728868 , 0.68265384, 0.6716972 , 0.61344   , 0.6331195 ,
       0.6363059 , 0.63833183, 0.63778114, 0.67405134, 0.6304092 ,
       0.6235529 , 0.7087313 , 0.59180814, 0.57369035, 0.7866733 ,
       0.63419497, 0.63565344, 0.6717923 , 0.7303404 , 0.60981685,
       0.657378  , 0.68300647, 0.64998287, 0.63987845, 0.5813835 ,
       0.70618886, 0.6914393 , 0.65404314, 0.70061857, 0.5636789 ,
       0.627767  , 0.71198124, 0.70057464, 0.65200526, 0.57717

In [43]:
len(true_signals)

127

In [44]:
len(predicted_signals)

127

In [45]:
final_predictions_df = pd.DataFrame()
final_predictions_df['Actual'] = true_signals
final_predictions_df['Predicted'] =  predicted_signals

In [46]:
final_predictions_df

Unnamed: 0,Actual,Predicted
0,1,0.739616
1,1,0.639091
2,1,0.575229
3,1,0.639683
4,0,0.636563
5,1,0.606028
6,1,0.731541
7,0,0.682459
8,1,0.586332
9,1,0.622835
