In [None]:
#Stocks
#------
#^NSEI
#NATIONALUM.NS
#BHEL.NS
#YESBANK.NS 
#GAIL.NS
#MRPL.NS
#ONGC.NS
#SJVN.NS 
#TATAPOWER.NS

### Import libraries

In [12]:
import pandas as pd
import numpy as np 
import yfinance as yf
import seaborn as sns
import matplotlib.pyplot as plt
from itertools import permutations, combinations
%matplotlib inline

### Constants

In [None]:
syms = ['^NSEI', 'NATIONALUM.NS', 'BHEL.NS', 'YESBANK.NS', 'GAIL.NS', 'MRPL.NS', 'ONGC.NS', 'SJVN.NS', 'TATAPOWER.NS']
start_date = '2016-01-01'
end_date   = '2020-11-30'

### Fetching price data from Yahoo Finance

In [None]:
price_df = pd.DataFrame()
for sym in syms:
    df = yf.download(sym, start_date, end_date)
    df['sym'] = [sym] * len(df)
    price_df = pd.concat([price_df, df])
price_df.to_csv('datasets/nse_equities_20160101_20201130.csv')

### Fetching price data from local file 

In [None]:
price_df = pd.read_csv('datasets/nse_equities_20160101_20201130.csv')

### Calculating arithmetic returns for symbols 

In [None]:
price_df.reset_index(inplace=True)
filt_df = price_df[['Date', 'sym', 'Adj Close', 'Volume']].copy()
filt_df['price_ret'] = filt_df['Adj Close']/filt_df['Adj Close'].shift(1) - 1
filt_df.dropna(axis=0, how='any', inplace=True)
filt_df.reset_index(drop=True, inplace=True)

### Basic data profiling 

In [None]:
ret_df = filt_df.pivot(index='Date', columns='sym', values='price_ret' ).dropna(axis=0, how='any')

In [None]:
ret_df.corr()

In [None]:
sns.heatmap(ret_df.corr())
plt.show()

In [None]:
fig, axes = plt.subplots(4,2, figsize=(15,20))
plt.subplots_adjust(wspace=0.2, hspace=0.5)
row, col = 0, 0 

for sym in syms:
    if sym == '^NSEI':
        continue
        
    ser1 = filt_df[filt_df['sym'] == sym][['Date', 'Adj Close']].reset_index(drop=True).rename(columns={'Adj Close':sym})
    ser1[sym] = np.log(ser1[sym])
    ser2 = filt_df[filt_df['sym'] == '^NSEI'][['Date', 'Adj Close']].reset_index(drop=True).rename(columns={'Adj Close':'NSE'})
    ser2['NSE'] = np.log(ser2['NSE'])
    df = pd.merge(ser1, ser2, on='Date')
    df.plot(x='Date', y=[sym, 'NSE'], kind='line', ax=axes[row][col], grid=True, title=sym)
    if col < 1:
        col += 1
    else:
        row += 1
        col = 0 
        
plt.show()

In [None]:
fig, axes = plt.subplots(4,2, figsize=(15,20))
plt.subplots_adjust(wspace=0.2, hspace=0.5)
row, col = 0, 0 

for sym in syms:
    if sym == '^NSEI':
        continue
        
    ser1 = filt_df[filt_df['sym'] == sym]['price_ret'].reset_index(drop=True)
    ser2 = filt_df[filt_df['sym'] == '^NSEI']['price_ret'].reset_index(drop=True)
    df = pd.DataFrame({sym:ser1, 'NSE':ser2})
    df.plot(x=sym, y='NSE', kind='scatter', ax=axes[row][col], grid=True)
    if col < 1:
        col += 1
    else:
        row += 1
        col = 0 
        
plt.show()

In [None]:
fig, axes = plt.subplots(4,2, figsize=(15,20))
plt.subplots_adjust(wspace=0.2, hspace=0.5)
row, col = 0, 0 

for sym in syms:
    if sym == '^NSEI':
        continue
        
    ser1 = filt_df[filt_df['sym'] == sym]['price_ret'].reset_index(drop=True)
    ser2 = filt_df[filt_df['sym'] == '^NSEI']['price_ret'].reset_index(drop=True)
    df = pd.DataFrame({sym:np.log(ser1), 'NSE':np.log(ser2)})
    df.plot(x=sym, y='NSE', kind='scatter', ax=axes[row][col], grid=True)
    if col < 1:
        col += 1
    else:
        row += 1
        col = 0 
        
plt.show()

### Creating symbol list as per criteria

In [None]:
stats_df = pd.DataFrame(columns=['symbol', 'sharpe', 'beta', 'mean_ret', 'std_ret'])
idx = 0 
for sym in syms:
    if sym == '^NSEI':
        continue
    
    ser1 = filt_df[filt_df['sym'] == sym]['price_ret'].reset_index(drop=True)
    ser2 = filt_df[filt_df['sym'] == '^NSEI']['price_ret'].reset_index(drop=True)
    sharpe = (ser1.mean()/ser2.std()) * pow(252, 0.5)
    beta   = ser1.cov(ser2) / ser2.var()
    mean_ret = ser1.mean()
    std_ret  = ser1.std()
    stats_df.loc[idx] = [sym, sharpe, beta, mean_ret, std_ret]
    idx += 1

stats_df

In [None]:
sharpe_top_5   = stats_df.sort_values(['sharpe'], ascending=False)['symbol'].tolist()[:5]
beta_top_5     = stats_df.sort_values(['beta'], ascending=False)['symbol'].tolist()[:5]
beta_bottom_5  = stats_df.sort_values(['beta'], ascending=True)['symbol'].tolist()[:5]
mean_top_5     = stats_df.sort_values(['mean_ret'], ascending=False)['symbol'].tolist()[:5]
std_top_5      = stats_df.sort_values(['std_ret'], ascending=False)['symbol'].tolist()[:5]

In [None]:
sharpe_top_5, beta_top_5, beta_bottom_5, mean_top_5, std_top_5

In [None]:
stats_df.plot(x='symbol', y=['sharpe', 'beta', 'mean_ret', 'std_ret'], grid=True, kind='bar', figsize=(10,5))
plt.show()

### Functions to calculate optimal portfolio and corresponding stats

In [None]:
def get_weights(syms):
    print("In get_weights()")
    weights = list(np.arange(0,1,0.05))
    weights = [ round(val, 3) for val in weights ]
    weights_list = []
    for i in range(len(syms)): 
        weights_list.extend(weights)
    
    weights_comb = set(list(combinations(weights_list, 5)))
    weights_comb = [ val for val in weights_comb if sum(val) == 1]
    
    return weights_comb

In [None]:
def get_max_dd(df):
    df['max'] = df['total_value'].cummax()
    df['dd']  = df['total_value']/df['max'] - 1
    
    return abs(df['dd'].min())*100

In [None]:
def return_pval_df(weights, syms, value, df): 
    print("In return_pval_df()")
    final_df = pd.DataFrame(columns=['weights', 'mean', 'std', 'sharpe', 'max_dd'])
    for idx, wt in enumerate(weights):
        ret_cols = list(df.columns)[1:]
        pval_cols = [ 'pval_' + val.replace('ret_','') for val in ret_cols ] 
        df['total_value'] = 0
        for i in range(len(syms)):
            df.loc[:, pval_cols[i]] = df.loc[:, ret_cols[i]] * wt[i] * value
            df['total_value'] = df['total_value'] + df[pval_cols[i]]
            
        df['return'] = df['total_value']/df['total_value'].shift(1) - 1
        
        pvalue_mean   = round(df['return'].mean(), 4)
        pvalue_std    = round(df['return'].std(), 4)
        pvalue_sharpe = round(pvalue_mean / pvalue_std * pow(252, 0.5), 4)
        pvalue_max_dd = round(get_max_dd(df), 4)
        weight_names  = ','.join([ str(val1) + '-' + str(val2) for val1, val2 in zip(wt, syms) ])
        final_df.loc[idx] = [weight_names, pvalue_mean, pvalue_std, pvalue_sharpe, pvalue_max_dd] 
    
    return final_df

In [None]:
def plot_pval(pval_df): 
    fig, ax = plt.subplots(figsize=(8,5))
    sns.scatterplot(x='std', y='mean', data=pval_df)
    std_max_sharpe, mean_max_sharpe = pval_df[pval_df['sharpe'] == pval_df['sharpe'].max()]['std'].iloc[0], \
                                      pval_df[pval_df['sharpe'] == pval_df['sharpe'].max()]['mean'].iloc[0]
    ax.annotate('Max Sharpe \n(Mean: {},\n Std: {})'.format(mean_max_sharpe, std_max_sharpe), xy=(std_max_sharpe, mean_max_sharpe), xytext=(20,100), textcoords='offset points', arrowprops=dict(arrowstyle='->', color='black'), ha='center', va='center')
    plt.show()

In [None]:
def get_portfolio_data(value, syms, filt_df):
    print("In get_portfolio_data()")
    weights = get_weights(syms)
    ret_value_df = filt_df[filt_df['sym'].isin(syms)].reset_index(drop=True)
    ret_value_df = ret_value_df.pivot(index='Date', columns='sym', values='price_ret')
    ret_value_df.columns = [ 'ret_' + val for val in list(ret_value_df.columns) ]
    ret_value_df.reset_index(inplace=True)
    pval_df  = return_pval_df(weights, syms, value, ret_value_df)
    plot_pval(pval_df)
    
    return pval_df

In [None]:
def get_portfolio_stats(value, syms):
    final_df = get_portfolio_data(value, syms, filt_df)
    final_df = final_df.sort_values(['sharpe'], ascending=False).reset_index(drop=True)
    print("Weights : {}".format(final_df.loc[0,'weights']))
    print("Mean    : {}".format(final_df.loc[1,'mean']))
    print("Std     : {}".format(final_df.loc[2,'std']))
    print("Sharpe  : {}".format(final_df.loc[3,'sharpe']))
    print("Max DD  : {}".format(final_df.loc[4,'max_dd']))

### Function calls to calculate portfolio stats for symbol groups 

In [None]:
get_portfolio_stats(1, sharpe_top_5)

In [None]:
get_portfolio_stats(1, beta_top_5)

In [None]:
get_portfolio_stats(1, beta_bottom_5)

In [None]:
get_portfolio_stats(1, mean_top_5)

In [None]:
get_portfolio_stats(1, std_top_5)