Here I use the premade modules to 
* read in, clean and 
* transform the raw input data. 

Then I evaluate the transformed data.

**The output should be a suitable input for a tensorflow CNN model.**

## Contents

[1. Dependencies](#sec1)

[2. Data cleaning](#sec2)

[3. Data labelling & image transformation](#sec3)

[4. Explore resulting images](#sec4)

## 1. Dependencies
<a id = 'sec1'></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

import sys
sys.path.append('../utils/')
import data_cleaning 
from labelled_image_preparation import data_to_labelled_img
from labels import trading_strategies
from transform  import gramian_angular_field, recurrence_plot, markov_transition_field
from visualize import ts_with_markers


## 2. Data cleaning
<a id = 'sec2'></a>

### Before cleaning : quick report

In [None]:
# data_cleaning.report()

### Data related parameter settings


In [None]:
path = "../data/gemini_BTCUSD"
name = "gemini_BTCUSD"
VarName = "Close"
Freq = "3H"

In [None]:
data = data_cleaning.create_cleaned_set(
    file_with_path=path,
    varname=VarName,
    datename="Date",
    datetime_last="2018-08-23 01:50:00",
    freq = Freq
)

In [None]:
data.head()

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
data.plot(grid = True)

## Data Labelling & Image transformation
<a id = 'sec3'></a>

### Decide hyperparameters of transformation

In [None]:
Label_window_size = 3
Image_window_size = 20

# image transformation strategy (will be for all)
Image_trf = "MTF"

In [None]:
labelled_pd, images, image_labels, label_names = data_to_labelled_img(
    data=data, 
    column_name=VarName, 
    label_window_size = Label_window_size, 
    image_window_size = Image_window_size, 
    image_trf_strat = Image_trf
)

In [None]:
labelled_pd.describe()

In [None]:
np.amax(images)

In [None]:
np.amin(images)

### Show labelled data

In [None]:
fig = ts_with_markers.plot_ts_markers(data = labelled_pd,
        main_col = 'Series',
        by_col = "Strategy",
        to_mark = ["Sell", "Buy"],
        color = ['g', 'r'],
        marker_type = ['v', '^']
        )
fig.set_size_inches(20, 10)
fig.show()

## If Recurrence Plot: Normalize images between 0 and 1

In [None]:
if Image_trf == "RP":
    for image in images:
        scaler = MinMaxScaler(feature_range=(0, 1), copy=True)
        scaler.fit(image)
        image = scaler.transform(image)

# Fixing floating point inaccuracy
images = np.where(images >= 1., 1., images)
images = np.where(images <= 0., 0., images)

### Print images

In [None]:
for idx, image in enumerate(images):
    plt.imshow(image, cmap = "Greys")
    plt.title(label_names[np.int(np.argwhere(image_labels[idx]))])
    plt.show()

## Save images and labels

In [None]:
np.savez('../data/test_'+ str(name) + "_" + str(VarName) + "_" + str(Freq) + "_LWS" + str(Label_window_size) + "_IWS" + str(Image_window_size) + "_Trf" + str(Image_trf),
         images = images, image_labels = image_labels, label_names = label_names
        )