## Pattern Prediction in Stock Prices

### Step 1: Loading and Plotting our Data Samples

In [47]:
import pandas as pd
import os
import plotly.graph_objects as go
import numpy as np
from sklearn import preprocessing

#### Define helper functions:

In [48]:
# function to load a dict of dataframes for files in 'service/data'
def build_dataframes(filenames):
    """Loads data of given filenames in dataframes and returns a dict of dataframes"""
    if filenames is None:
        raise ValueError('Argument can not be None')
    datasets = {}
    cur_path = os.getcwd() 
    base_data_path = os.path.abspath(os.path.join(cur_path, os.pardir, 'service/data'))
    for f in filenames:
        file_path = os.path.join(base_data_path, f)
        # load data set
        df = pd.read_csv(file_path, sep=',', header=None)
        df.columns = ['symbol', 'timestamp', 'open', 'high', 'low', 'close', 'volume']
        # add price change of candle
        df['%_change'] = ((df.close / df.open) - 1) * 100
        datasets[f.split(".")[0]] = df
        
    return datasets

In [49]:
# function to plot the time series using plotly
def plot_series(dataframe):
    """Plots a given dataframe"""
    if dataframe is None:
        raise ValueError('Argument can not be None')
    if 'timestamp' not in dataframe.columns or 'close' not in dataframe.columns:
        raise ValueError('Dataframe misses columns [\'timestamp\', \'close\']')
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(x = pd.to_datetime(dataframe['timestamp'], unit='s'), y = dataframe['close']))
    fig.update_layout(
        title=str(dataframe['symbol'][0]),
        xaxis_rangeslider_visible=True,
        xaxis_title="Date",
        yaxis_title="Closing Price",
        font=dict(
            family="Courier New, monospace",
            size=18,
            color="#7f7f7f"
        )
    )
    fig.show()

In [50]:
#Klasse 0: Der parabolische Verlauf wird innerhalb der nächsten 10 Zeiteinheiten des jeweiligen Kurses nicht brechen.

#Klasse 1: Der parabolische Verlauf wird in innerhalb der nächsten 10 Zeiteinheiten des jeweiligen Kurses brechen.


#class 0 datasets
filenames_cl0 = ['AAPL2.csv', 'AMZN.csv', 'NFLX.csv']

# class 1 datasets
filenames_cl1 = ['BTCUSDT.csv', 'ETHUSDT.csv', 'MATICUSDT.csv', 'ICXUSDT.csv', 'BNBUSDT.csv', 'EOSBTC.csv', 'RVNBTC.csv', 'TRXBTC.csv', 'XLMBTC.csv', 'ADABTC.csv', 'WAVESBTC.csv', 'KOD.csv']

In [51]:
# load data for each class
datasets_cl1 = build_dataframes(filenames_cl1)
datasets_cl0 = build_dataframes(filenames_cl0)

# plot all data samples
for key in datasets_cl1:
    plot_series(datasets_cl1[key])
    
for key in datasets_cl0:
    plot_series(datasets_cl0[key])
    