In [None]:
import IPython.core.display as di
import datetime as dt
import importlib as ilib
import logging
import matplotlib
import matplotlib.finance as fin
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cm as cmx
import matplotlib.gridspec as gridspec
import numpy as np
import os
import pandas as pd
import quandl as qdl
import timeit

import omega.configuration as c
import omega.core.chain as cc
import omega.core.chart as chart
import omega.data.cot as odc
import omega.core.instrument as ci
import omega.xl.spreading as spread

%matplotlib inline
qdl.ApiConfig.api_key = 'SkmBQRG9gxQK4HmeSoze'

# Identify Goldman Roll

In [None]:
ilib.reload(cc)
ilib.reload(chart)
ilib.reload(spread)
ilib.reload(ci)

In [None]:
# Disable logging output
logger = logging.getLogger()
logger.setLevel(logging.WARNING)

In [None]:
start = timeit.default_timer()
aRY = False
sback = 90  # Spread Loopback
universe = spread.get_universe('Research')
print('Number of traded commodities: {} out of 27!'.format(len(universe)))
# Load Data
data = dict()
cots = dict()
cots2 = dict()
stats = dict()
stdsvols = dict()
candles = dict()
for idx, row in universe.iterrows():
    ct = row['CTicker']
    data[ct] = cc.FutureChain(ct, ci.FutureType.Spread).initialize_data(extra_days=False)
    nbs = len(data[ct])
    print('{}: {} spreads in the dataset!'.format(ct, nbs))
    # COT Download
    cot = odc.cot_data(ct)
    cots[ct] = cot['HPS'].resample('B').last().ffill()
    cots2[ct] = cot['HPH'].resample('B').last().ffill()
    # Variables
    avg_vol = [0 for _ in range(sback)]
    avg_std = [0 for _ in range(sback)]
    stdl = []
    voll = []
    midl = []
    errorl = []
    candl = []
    dfs = []
    nb = 0
    for tk, df in data[ct].items():
        if len(df) < sback:
            print('Problem with {} (length: {}), not enough data!'.format(tk, len(df)))
            continue      
        # Volumes
        avg_vol += df.Volume.values.astype(int)  # Little hack as volume is written as double (to correct)
        # Rolling variance
        avg_std += df.Close.rolling(10).std().values
        stdl.append(df.Close.rolling(10).std().values)
        # Signal
        df['SignalS'] = pd.Series([-1 if x > 0 else 1 for x in cots[ct][df.index]], index=df.index)
        df['SignalH'] = pd.Series([1 if x > 0 else -1 for x in cots2[ct][df.index]], index=df.index)
        # Ranges
        if aRY:
            # Annualized roll yield
            df['aRY'] = d.roll_yield(tk, df)
            # aRY based
            mid = (df['aRY'].max() + df['aRY'].min()) / 2
            error = df['aRY'].max() - mid
        else:
            # Spread range
            mid = (df['Low'].min() + df['High'].max()) / 2
            error = df['High'].max() - mid
        midl.append(mid)
        errorl.append(error)
        # Variation Candles
        candle = {'Open': df.iloc[0]['Open'], 'High': df['High'].max(), 'Low': df['Low'].min(), 'Close': df.iloc[-1]['Close']}
        candl.append(candle)
        nb += 1
    stats[ct] = {'Vol': avg_vol / nb, 'Std': avg_std / nb, 'Mids': midl, 'Errors': errorl}
    stdsvols[ct] = {'Vol': voll, 'Std': stdl}
    candles[ct] = pd.DataFrame(candl)
print('Execution time: {:.2f} seconds!'.format(timeit.default_timer() - start))

In [None]:
start = timeit.default_timer()
# Getting data
perfs = dict()
universe = spread.get_universe('Research')
for idx, row in universe.iterrows():
    ct = row['CTicker']
    strategy = row['Strategy']
    reversal = int(row['Reversal'])
    # Variables
    pnl = pd.Series()
    for tk, df in data[ct].items():
        try:
            if 'SignalS' not in df.columns:
                continue
            df['Signal'] = df['SignalS']  # (df['SignalS'] - df['SignalH']) /2
            df['CS'] = cots[ct][df.index]
            df['Switch'] = (df['Signal'] + df['Signal'].shift(1)) / 2
            df['Diff'] = df['Close'] - df['Close'].shift(1)
            df['PnL'] = df['Switch'] * df['Diff'] * ci.get(ct, 'Point') * row['Size']
            df.fillna(0, inplace=True)
            # PnL (with reversal)
            pnl_df = df
            if reversal > 0:
                pnl_df['PnL'][-reversal:] = -pnl_df['PnL'][-reversal:]
            # Reversal Only
            if 'R' == strategy:
                pnl_df['PnL'][0:-reversal] = 0
            # Front-Running Only
            if 'F' == strategy and reversal > 0:
                pnl_df['PnL'][-reversal:] = 0
            # PnL
            pnl = df['PnL'] if nb == 0 else pnl.add(pnl_df['PnL'], fill_value=0)
            nb += 1
        except Exception as e:
            print('Problem with {}! {}'.format(tk, e))
    perfs[ct] = {'Perf': pnl.cumsum()}
print('Execution time: {:.2f} seconds!'.format(timeit.default_timer() - start))

In [None]:
results = r'C:\Users\Laurent\Desktop\Results'
btType = '.HPS'
save_png = False
di.display_html('<h3>Spreads Tear Sheets</h3>', raw=True)
for k, v in stats.items():
    f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(18, 9))
    ax1.set_title('Average Volume')
    ax1.plot(stats[k]['Vol'])
    ax2.set_title('Spread Yield' if aRY else 'Spread Variation')
    ax2.errorbar(range(len(stats[k]['Mids'])), stats[k]['Mids'], xerr=0, yerr=stats[k]['Errors'])
    ax3.set_title('Average Std')
    ax3.plot(stats[k]['Std'])
    ax4.set_title('Performance: {}'.format(perfs[k]['Perf'][-1]))
    ax4.plot(perfs[k]['Perf'])
    f.suptitle(k, fontsize=16)
    if save_png:
        f.savefig(os.path.join(results, '{}.Tear{}.png'.format(k, btType)))

In [None]:
di.display_html('<h3>Total Performance</h3>', raw=True)
total = pd.Series()
for idx, (k, v) in enumerate(stats.items()):
    if idx == 0:
        total = perfs[k]['Perf']
    else:
        total = total.add(perfs[k]['Perf']).ffill().bfill()
print(total[-1])
total.plot()
# total.to_csv('Mini.csv')

In [None]:
if True:
    di.display_html('<h3>Individual Performances</h3>', raw=True)
    idfs = data['LH']
    for tk, df in idfs.items():
        # Plot
        if 'PnL' in df.columns:
            f, (ax1, ax2, ax3) = plt.subplots(3, sharex=True)
            f.suptitle('{}: {}'.format(tk, df.PnL.sum()), fontsize=16)
            ax1.plot(df.Close)
            ax2.plot(df.CS)
            ax3.plot(df.PnL.cumsum())
            f.subplots_adjust(hspace=0)
        else:
            print('DataFrame for {} is incomplete!'.format(tk))

In [None]:
if False:
    di.display_html('<h3>Individual Goldman Roll Performances</h3>', raw=True)
    ct = 'SB'
    idfs = data[ct]
    avg_diff = [0 for _ in range(20)]
    pnl = pd.Series()
    nb = 0
    for tk, df in idfs.items():
        # Date of when the Goldman Roll Starts
        dt_gr_start = df[df.index >= i.gr_month_start(tk)].index[4]
        df_gr = df.reset_index()
        df_gr.rename(columns = {'index': 'Date'}, inplace = True)
        i_start = df_gr[df_gr['Date'] == dt_gr_start].index.tolist()[0]
        df_gr = df_gr.iloc[i_start - 20:i_start]
        df_gr.set_index('Date', inplace=True)
        df_gr.index.names = [None]
        # print('{} {}'.format(tk, len(df_gr)))
        if len(df_gr) == 20 and 'Diff' in df_gr.columns:
            #f, (ax1, ax2, ax3) = plt.subplots(3)
            #f.suptitle(tk, fontsize=16)
            #ax1.plot(df_gr['Diff'].cumsum().values)
            #ax2.plot(df_gr['MN'])
            #ax3.plot(df_gr['PnL'].cumsum())
            df_gr['PnL'] = df_gr['Switch'] * df_gr['Diff'] * i.get(ct, 'Point') * universe.loc[i.get_stem(ct, 'Reuters')]['Size']
            pnl = df_gr['PnL'] if nb == 0 else pnl.add(df_gr['PnL'], fill_value=0)
            avg_diff += df_gr['Diff'].cumsum().values
            nb +=1
    avg_diff /= nb
    # Plot
    f, (ax1, ax2) = plt.subplots(2)
    f.suptitle(ct, fontsize=16)
    ax1.plot(avg_diff)
    ax2.plot(pnl.cumsum())
    print(pnl.sum())

In [None]:
if True:
    di.display_html('<h3>Candlesticks Variations</h3>', raw=True)
    for idx, row in universe.iterrows():
        ct = row['CTicker']
        df = candles[ct]
        fig, ax = plt.subplots()
        fig.suptitle(ct, fontsize=16)
        try:
            fin.candlestick2_ohlc(ax, df['Open'], df['High'], df['Low'], df['Close'],width=0.6)
        except:
            # TODO: Find root of this little hack...
            df = df + 100
            fin.candlestick2_ohlc(ax, df['Open'], df['High'], df['Low'], df['Close'],width=0.6)

In [None]:
if False:
    di.display_html('<h3>Std Analysis</h3>', raw=True)
    idfs = stdsvols['LB']
    idfs2 = []
    length = 10
    for idx, df in enumerate(idfs[length:]):
        adf = sum(idfs[idx + 0:idx + length]) / length
        f, ax = plt.subplots(1)
        ax.plot(adf)

In [None]:
def plot_gradient_df(df, title, cname):
    # Figure
    f, (ax1, ax2) = plt.subplots(2, figsize=(18, 8))
    f.subplots_adjust(top=0.93, bottom=0.01, left=0.2, right=0.99)
    gs = gridspec.GridSpec(2, 1, height_ratios=[1, 50]) 
    # Colors
    cm = plt.get_cmap(cname)
    cNorm  = colors.Normalize(vmin=0, vmax=len(df.columns))
    scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=cm)
    # Gradient
    gradient = np.linspace(0, 1, 256)
    gradient = np.vstack((gradient, gradient))
    # Gradient Plot
    ax1 = plt.subplot(gs[0])
    ax1.imshow(gradient, aspect='auto', cmap=plt.get_cmap(cname))
    ax1.get_xaxis().set_visible(False)
    ax1.get_yaxis().set_visible(False)
    gs.update(hspace=0.02)
    # Chart plot
    ax2 = plt.subplot(gs[1])
    for idx, col in enumerate(df.columns):
        ax2.plot(df[col], color=scalarMap.to_rgba(idx))
    # Title
    f.suptitle(title, fontsize=16)

In [None]:
# Plot contracts
if True:
    stem = 'LH'
    fc = cc.FutureChain(stem, ci.FutureType.Spread)
    fc.initialize_contracts(cc.Status.Expired, filter='H', initialize_data=True)
    for ct in fc.contracts:
        df = fc.data[ct]
        fig, ax = plt.subplots(figsize=(18, 9))
        fig.suptitle(ct, fontsize=16)
        try:
            fin.candlestick2_ohlc(ax, df['Open'], df['High'], df['Low'], df['Close'], width=0.6)
        except:
            # TODO: Find root of this little hack...
            df = df + 100
            fin.candlestick2_ohlc(ax, df['Open'], df['High'], df['Low'], df['Close'], width=0.6)

In [None]:
ilib.reload(cc)
if True:
    stem = 'LB'
    field = 'Close'
    di.display_html('<h3>{} Analysis</h3>'.format(field), raw=True)
    # Chain
    fc = cc.FutureChain(stem, ci.FutureType.Spread)
    fc.initialize_contracts(cc.Status.Expired, filter='H', initialize_data=True)
    print('Number of contracts: {}'.format(len(fc.contracts)))
    print('Contracts: {}'.format(fc.contracts))
    # Data
    vals = pd.DataFrame()
    for ct in fc.contracts:
        vals[ct] = fc.data[ct][field].values
        #vals[ct] = cc.aggregate(fc, ct, field)
    plot_gradient_df(vals, '{} - {}'.format(stem, field), 'inferno')