In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import copy
from PIL import Image

In [2]:
sample_df = pd.read_csv('ASINDU/20151117.csv')
tickers = sample_df.Ticker.unique()
input_cols = ['Timestamp', 'Ticker', 'HighPrice', 'LowPrice', 'TotalQuantity']
final_cols = ['Timestamp', 'Ticker', 'HighPrice', 'LowPrice', 'TotalQuantity', 'change_high', 
              'change_low', 'pixel_high', 'pixel_low', 'volume_norm']
interval = 8
max_volume = 1_000_000

In [3]:
def get_timestamps():
    tms = ['09:0'+ str(x) + ':00' for x in range(1,10)]
    tms_2 = ['09:' + str(x) + ':00' for x in range(10,60)]
    tms_3 = ['10:0' + str(x) + ':00' for x in range(5)]
    tms.extend(tms_2)
    tms.extend(tms_3)
    return tms

tms = get_timestamps()

In [4]:
def get_first_datapoint_idx(df):
    for i,r in df.iterrows():
        if np.isnan(r['TotalQuantity']):
            pass
        else:
            return i, r['HighPrice'], r['LowPrice']

In [5]:
def set_df_values(df_to_set, idx, ticker, high, low, quantity):
    df_to_set.at[idx, 'Ticker'] = ticker
    df_to_set.at[idx, 'HighPrice'] = high
    df_to_set.at[idx, 'LowPrice'] = low
    df_to_set.at[idx, 'TotalQuantity'] = quantity
    return df_to_set

In [6]:
def gen_clean_dataset(ticker, input_cols, final_cols, tms):
    df = pd.DataFrame(tms, columns=['Timestamp'])
    df = df.merge(sample_df[sample_df['Ticker']==tick], how='left', on='Timestamp')
    df = df[input_cols].reset_index(drop=True)
    first_idx, high, low = get_first_datapoint_idx(df)
    for i in range(first_idx):
        set_df_values(df, i, tick, high, low, 0)
    for i in range(first_idx + 1, len(df)):
        if np.isnan(df['TotalQuantity'][i]):
            set_df_values(df, i, tick, high, low, 0)
        else:
            high, low = df['HighPrice'][i], df['LowPrice'][i]
    return df

In [7]:
def cap(x, floor, ceil):
    if x < floor:
        return floor
    elif x > ceil:
        return ceil
    else:
        return x

In [8]:
def build_pixels(df, interval, max_volume):
    day_open = (df['HighPrice'][0] + df['LowPrice'][0]) / 2
    df['change_high'] = df['HighPrice'].apply(lambda x : 100*(x-day_open)/day_open)
    df['change_low'] = df['LowPrice'].apply(lambda x : 100*(x-day_open)/day_open)
    df['pixel_high'] = df['change_high'].apply(lambda x : cap(32-int(x*interval),0,63))
    df['pixel_low'] = df['change_low'].apply(lambda x : cap(32-int(x*interval),0,63))
    df['volume_norm'] = df['TotalQuantity'].apply(lambda x : cap(200*(x/max_volume),0,200))
    return df

In [9]:
def draw_movement_image(df, include_vol=True):
    img = np.ones([64,64])*255
    for i, r in df.iterrows():
        for xix in range(r['pixel_high'], r['pixel_low']+1):
            shade = 0
            if include_vol:
                shade = 200-r['volume_norm']
            img[xix][i] = shade
    return img

In [10]:
clean_df = pd.DataFrame(columns=final_cols)
for tick in tickers:
    df = gen_clean_dataset(tick, input_cols, final_cols, tms)
    df = build_pixels(df, interval, max_volume)
    clean_df = clean_df.append(df)

In [11]:
clean_df

Unnamed: 0,Timestamp,Ticker,HighPrice,LowPrice,TotalQuantity,change_high,change_low,pixel_high,pixel_low,volume_norm
0,09:01:00,V,79.20,79.20,0.0,0.000000,0.000000,32,32,0.0000
1,09:02:00,V,79.20,79.20,0.0,0.000000,0.000000,32,32,0.0000
2,09:03:00,V,79.20,79.20,0.0,0.000000,0.000000,32,32,0.0000
3,09:04:00,V,79.20,79.20,0.0,0.000000,0.000000,32,32,0.0000
4,09:05:00,V,79.20,79.20,0.0,0.000000,0.000000,32,32,0.0000
...,...,...,...,...,...,...,...,...,...,...
59,10:00:00,PFE,33.17,33.13,77521.0,-0.240602,-0.360902,33,34,15.5042
60,10:01:00,PFE,33.18,33.11,82921.0,-0.210526,-0.421053,33,35,16.5842
61,10:02:00,PFE,33.18,33.15,36206.0,-0.210526,-0.300752,33,34,7.2412
62,10:03:00,PFE,33.19,33.15,41037.0,-0.180451,-0.300752,33,34,8.2074


In [13]:
for tick in tickers:

    df = clean_df[clean_df['Ticker']==tick]
    img = draw_movement_image(df, False)
    
#     # compare encoded image to scatterplot of minute bars
#     fig, (ax1, ax2) = plt.subplots(1,2)
#     fig.suptitle(tick)
#     ax1.imshow(img, cmap='gray');
#     ax2.scatter(df['Timestamp'], df['change_high'])
#     ax2.scatter(df['Timestamp'], df['change_low'])

    pil_img = Image.fromarray(img)
    if pil_img.mode != 'RGB':
        pil_img = pil_img.convert('RGB')
    pil_img.save("data_bin/" + tick + '.png')
    