In [11]:
import datetime
import numpy as np
import pandas as pd

from helpers import common as cm

In [2]:
df_raw = cm.load_data('datasets/BittrexChart')

In [8]:
df_raw.columns = [
    'created_at',
    'currency',
    'exchange',
    'price',
    'price_close',
    'price_high',
    'price_low',
    'price_open',
    'quantity',
    'timestamp',
    'timestamp_close',
    'timestamp_open',
    'uuid',
]

In [14]:
def extract_time_components(x):
    date = datetime.datetime.fromtimestamp(x)
    return int(date.strftime('%Y%m%d')), int(date.strftime('%H')), int(date.strftime('%M'))

def add_time_component_columns(df):
    df['date'], df['hour'], df['minute'] = zip(*df['timestamp_close'].apply(extract_time_components))
    return df_raw

df_raw_with_time = add_time_component_columns(df_raw)

In [None]:
def group_by(df, columns):
    group = df.groupby(columns, axis=0)
    return [(key, group.get_group(key)) for key in group.groups.keys()]
    
def group_by_date(df):
    return sorted(group_by(df, ['date']), key=lambda x: x[0])

def group_by_currency(df):
    return sorted(group_by(df, ['currency']), key=lambda x: x[0])

def group_by_hour(df):
    return sorted(group_by(df, ['hour']), key=lambda x: x[0])

def group_by_minute(df):
    return sorted(group_by(df, ['minute']), key=lambda x: x[0])

def transform_data_for_hour(df):
    group_sorted = df.sort_values(
        ['timestamp_close'], ascending=[1]
    ).drop_duplicates(
        'timestamp_close', keep='last'
    )
    volume = sum(group_sorted['quantity'])
    price_open = group_sorted.iloc[0]['price_open']
    closing_prices = group_sorted['price_close']
    price_close = closing_prices.iloc[len(group_sorted) - 1]
    price_high = max(closing_prices)
    price_low = min(closing_prices)
    return volume, price_open, price_close, price_high, price_low

def transform_all(df):
    new_columns = [
        'date',
        'hour',
        'minute',
        'currency',
        'volume',
        'price_open',
        'price_close',
        'price_high',
        'price_low',
    ]
    d = {}
    for currency, g_by_c in group_by_currency(df):
        arr = []
        for date, g_by_d in group_by_date(g_by_c):
            for hour, g_by_h in group_by_hour(g_by_d):
                values = transform_data_for_hour(g_by_h)
                arr.append((date, hour, currency) + values)
        d[currency] = pd.DataFrame(data=arr, columns=new_columns)
    return d

In [None]:
# df_transformed_initial = transform_all(data)