In [None]:
import numpy as np 
import pandas as pd
import plotly as py
import plotly.graph_objs as go
import plotly.express as px
from plotly.offline import init_notebook_mode
init_notebook_mode(connected = True)
import seaborn as sns

import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

pd.set_option('display.max_columns', None)
#########################################################
df = pd.read_csv('../input/stock-exchange-data/indexData.csv')
dfp = pd.read_csv('../input/stock-exchange-data/indexProcessed.csv')
info = pd.read_csv('../input/stock-exchange-data/indexInfo.csv')

In [None]:
df

In [None]:
df = df.set_index(pd.DatetimeIndex(df['Date']))
#drop redundant columns
df1 = df[['Index', 'Open','Close']]
df1.tail()

In [None]:
price=df1[df1['Index']=='NYA']
price

In [None]:
price = price.drop(['Index'], 1)

In [None]:
price = df2

#define variables
STARTING_BALANCE = 10000
down_days = 1
#calculate return and balance
price['oc'] = price.Close / price.Open
price['cc'] = price.Close / price.Close.shift(1)
price.cc.iat[0] = 1
price['Bench_Bal'] = STARTING_BALANCE * price.cc.cumprod()


#calculate benchmark drawdown
price['Bench_Peak'] = price.Bench_Bal.cummax()
price['Bench_DD'] = price.Bench_Bal - price.Bench_Peak

bench_dd = round(((price.Bench_DD / price.Bench_Peak).min() * 100), 2)


#calculate additional columns for strategy

#check if today is a down day
price['Down'] = price.oc < 1

#count consecutive down days
#https://stackoverflow.com/questions/27626542/counting-consecutive-positive-value-in-python-array
down = price['Down']
price['Consecutive'] = down * (down.groupby((down != down.shift()).cumsum()).cumcount() + 1)

price.tail()

In [None]:
#identify entries and allocate trading fees
price['Long'] = price.Consecutive >= down_days

#calculate system return and balance
price['Sys_Ret'] = np.where(price.Long.shift(1) == True, price.cc, 1)
price['Sys_Bal'] = STARTING_BALANCE * price.Sys_Ret.cumprod()

price.tail()

In [None]:
#plot results
plt.plot(price.Bench_Bal)
plt.plot(price.Sys_Bal)

plt.show()

In [None]:
#calculate system drawdown
price['Sys_Peak'] = price.Sys_Bal.cummax()
price['Sys_DD'] = price.Sys_Bal - price.Sys_Peak

sys_dd = round(((price.Sys_DD / price.Sys_Peak).min()) * 100, 2)

sys_dd

In [None]:
YEARS = 25
#calculate metrics
bench_return = round(((price.Bench_Bal[-1]/price.Bench_Bal[0]) - 1) * 100, 2)
bench_cagr = round(((((price.Bench_Bal[-1]/price.Bench_Bal[0])**(1/YEARS))-1)*100), 2)
sys_return = round(((price.Sys_Bal[-1]/price.Sys_Bal[0]) - 1) * 100, 2)
sys_cagr = round(((((price.Sys_Bal[-1]/price.Sys_Bal[0])**(1/YEARS))-1)*100), 2)
sys_in_market = round((price.Long.value_counts().loc[True] / len(price)) * 100)
sys_win = price.Sys_Ret[price.Sys_Ret > 1.0].count()
sys_loss = price.Sys_Ret[price.Sys_Ret < 1.0].count()
sys_winrate = round(sys_win / (sys_win + sys_loss) * 100, 2)

print(f'Benchmark Total return: {bench_return}%')
print(f'Benchmark CAGR: {bench_cagr}')
print(f'Benchmark DD: {bench_dd}%')
print('')
print(f'System Total return: {sys_return}%')
print(f'System CAGR: {sys_cagr}')
print(f'System DD: {sys_dd}%')
print(f'Time in Market: {sys_in_market}%')
print(f'Trades Won: {sys_win}')
print(f'Trades Loss: {sys_loss}')
print(f'Winrate: {sys_winrate}%')

# **Correlation**

In [None]:
# Correlation plots
df.dropna(inplace = True)
df.reset_index(drop = True, inplace = True)

new_features = ['p_change', 'close-1', 'close-1%', 'volume-1', 'volume-1%']
for i in new_features:
    df[i] = 0
    df[i] = df[i].astype('float')
    
for k in range(1, len(df)):
    if df['Index'][k] == df['Index'][k-1]:
        df['p_change'][k] = df['Close'][k] - df['Open'][k]
        df['close-1'][k] = df['Close'][k] - df['Close'][k-1]
        df['close-1%'][k] = ((df['Close'][k] / df['Close'][k-1]) * 100) - 100
        if df['Volume'][k] != 0 and df['Volume'][k-1] != 0:
            df['volume-1'][k] = df['Volume'][k] - df['Volume'][k-1]
            df['volume-1%'][k] = ((df['Volume'][k] / df['Volume'][k-1]) * 100) - 100

df['Date'] = pd.to_datetime(df['Date'])
df['year'] = df['Date'].dt.year
df['month'] = df['Date'].dt.month

corr_map = pd.DataFrame()
for stock in info['Index'].tolist():
    corr_map[stock] = df.query("Index == @stock & year >= 2012")['Close'].reset_index()['Close']

matrix = np.triu(corr_map.corr())
plt.figure(figsize = (12, 10))
sns.heatmap(corr_map.corr(), annot = True, cmap = 'Blues', fmt=".2f", mask = matrix, vmin = -1, vmax = 1, linewidths = 0.1, linecolor = 'white', cbar = False, annot_kws = {'fontsize': 11})
plt.xticks(size = 10, fontname = 'monospace')
plt.yticks(size = 11, fontname = 'monospace')
plt.figtext(0.88, 0.65, '''Correlation 
between
exchanges''', fontsize = 40, fontname = 'monospace', ha = 'right', color = '#4897d8')
plt.show()

In [None]:
dfp_1 = pd.merge(dfp, info, on = "Index" )
dfp_1['Index'] = dfp_1['Index'].astype('category')
dfp_1["Date"] = pd.to_datetime(dfp_1["Date"])
dfp_1['Region'] = dfp_1['Region'].astype('category')
dfp_1['Exchange'] = dfp_1['Exchange'].astype('category')
dfp_1['Currency'] = dfp_1['Currency'].astype('category')
sns.pairplot(dfp_1)

In [None]:
stocks = pd.unique(dfp['Index'])
stock_dfs = []
for stock in stocks:
    stock_dfs.append(dfp_1[dfp_1['Index'] == stock])

In [None]:
from matplotlib.cm import hsv
import matplotlib.patches as mpatches

fig, ax = plt.subplots(figsize=(20, 20))
patches = []

for i, stock_df in enumerate(stock_dfs):
    color = (hsv(i/len(stock_dfs)))
    sns.lineplot(ax=ax, x=stock_df['Date'], y=stock_df['CloseUSD'], color=color)
    patches.append(mpatches.Patch(color=color, label=stock_df['Index'].iloc[0]))
    
ax.legend(handles=patches)
stock_dfs = { stock_df['Index'].iloc[0]: stock_df for stock_df in stock_dfs }