In [10]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy import stats
from datetime import datetime
from matplotlib import pyplot as plt

# Data Loading

In [11]:
# Reading stock price data 
pp = pd.read_csv("Pairs_Price.csv")     # Maybe we should use this one?
pp = pp.dropna()
pr = pd.read_csv("Pairs_RI.csv")        # This one is used
pr = pr.dropna()

In [12]:
# Grouping data by year
pr['date'] = pd.to_datetime(pr['date'], infer_datetime_format=True, dayfirst = True)
eoy = pr.groupby(pd.DatetimeIndex(pr.date).to_period('Y')).nth(-1)
eoy = eoy.drop(eoy.columns[0], axis=1)      # Not sure why, but without there is a duplicate of dates
print(eoy.to_markdown())

| date   |   APMOLLER_B |   APMOLLER_A |   INDUSTRIVARDEN_A |   INDUSTRIVARDEN_C |   INVESTOR_B |   INVESTOR_A |   SVENSKAHANDBKN_A |   SVENSKAHANDBKN_B |   VOLVO_B |   VOLVO_A |   VW_PREF |       VW |   HYUNDAI |   HYUNDAI_PREF |   STORAENSO_R |   STORAENSO_A |
|:-------|-------------:|-------------:|-------------------:|-------------------:|-------------:|-------------:|-------------------:|-------------------:|----------:|----------:|----------:|---------:|----------:|---------------:|--------------:|--------------:|
| 2004   |      1144.92 |      1385.29 |             513.52 |              93.75 |       229.79 |       655.89 |             796.98 |             191.24 |    733.2  |    698.23 |    246.59 |  1491.93 |      2.45 |           0.17 |        679.98 |        518.22 |
| 2005   |      1441.34 |      1701.95 |             569.16 |             106.07 |       322.83 |       918.31 |             786.03 |             189.27 |    906.25 |    874.63 |    295.29 |  1791.23 |      4.45

# Running Strategy

In [13]:
# Creating securities lists
securities_A = []
securities_B = []
for col in eoy.columns[0::2]:
    securities_A.append(col)
for col in eoy.columns[1::2]:
    securities_B.append(col)
# Dataframe with all trades
yearly_returns = pd.DataFrame(index=np.arange(2005, 2015))

for k in range(len(securities_A)):
    # Setting securities to analyze
    name_security_A = securities_A[k]
    name_security_B = securities_B[k]

    # Creating Result Arrays, Yearly returns
    trades_temp_a = []
    trades_temp_b = []

    # Calculating Trades
    for i in range(1, len(eoy)):
        # Going Long
        if eoy[name_security_A].values[i-1] < eoy[name_security_B].values[i-1]:
            # Long on A
            temp = (eoy[name_security_A].values[i]) / (eoy[name_security_A].values[i - 1])
            trades_temp_a.append(temp)
            # Short on B
            temp = (eoy[name_security_B].values[i - 1]) / (eoy[name_security_B].values[i])
            trades_temp_b.append(temp)
        # Going Short
        else:
            # Long on B
            temp = (eoy[name_security_B].values[i]) / (eoy[name_security_B].values[i - 1])
            trades_temp_b.append(temp)
            # Short on A
            temp = (eoy[name_security_A].values[i - 1]) / (eoy[name_security_A].values[i])
            trades_temp_a.append(temp)

    # Saving results in the Dataframe
    yearly_returns[name_security_A] = trades_temp_a
    yearly_returns[name_security_B] = trades_temp_b

print(yearly_returns.to_markdown()) # The table is in easy format to find arithmetic and geometric restults. For geometric just multiply all, for arithmetic sum of each element-1

|      |   APMOLLER_B |   APMOLLER_A |   INDUSTRIVARDEN_A |   INDUSTRIVARDEN_C |   INVESTOR_B |   INVESTOR_A |   SVENSKAHANDBKN_A |   SVENSKAHANDBKN_B |   VOLVO_B |   VOLVO_A |   VW_PREF |       VW |   HYUNDAI |   HYUNDAI_PREF |   STORAENSO_R |   STORAENSO_A |
|-----:|-------------:|-------------:|-------------------:|-------------------:|-------------:|-------------:|-------------------:|-------------------:|----------:|----------:|----------:|---------:|----------:|---------------:|--------------:|--------------:|
| 2005 |     1.2589   |     0.813943 |           0.902242 |           1.13141  |     1.40489  |     0.714236 |           1.01393  |           0.989699 |  0.809048 |  1.25264  |  1.19749  | 0.832908 |  0.550562 |       2.41176  |      1.09003  |      0.897148 |
| 2006 |     0.922267 |     1.09413  |           0.667355 |           1.53022  |     1.44008  |     0.700188 |           0.792305 |           1.28298  |  0.652246 |  1.62492  |  2.00735  | 0.456177 |  1.30882  |      

# Analysing Pairs

In [19]:
for k in range(len(securities_A)):
    # Setting securities to analyze
    name_security_A = securities_A[k]
    name_security_B = securities_B[k]
    # Finding values for returns
    yearly_excessive_returns_pair = ((yearly_returns.loc[:, name_security_A] + yearly_returns.loc[:, name_security_B]) / 2) - 1
    yearly_excessive_returns_pair_std = np.std(yearly_excessive_returns_pair)
    # Calculating and Outputting 
    print(name_security_A.split("_")[0], "Pair")
    print("Yearly excess returns in %:\n", round(yearly_excessive_returns_pair*100, 2))                     # We assume there is no risk free rate
    print("Overall returns in %:", yearly_excessive_returns_pair.sum()*100)                                 # Overall returns is fixed arithmetic sum
    print("Overall returns STD:", yearly_excessive_returns_pair_std)
    print("Overall returns SR:", yearly_excessive_returns_pair.sum()/yearly_excessive_returns_pair_std)     # We assume there is no risk free rate
    # Statistical Significance
    t2, p2 = stats.ttest_ind(yearly_excessive_returns_pair, np.zeros(len(yearly_excessive_returns_pair)))
    print("t = " + str(t2))
    print("p = " + str(p2/2)) # one sided t test
    if p2 < 0.05:
        print("The results are statistically significant")
    else:
        print("The results are not statistically significant")

    print("\n")

APMOLLER Pair
Yearly excess returns in %:
 2005     3.64
2006     0.82
2007    -0.60
2008    23.64
2009     6.77
2010     3.07
2011     6.38
2012     1.35
2013     7.54
2014     1.07
dtype: float64
Overall returns in %: 53.674447650540856
Overall returns STD: 0.06639666670694569
Overall returns SR: 8.08390696590888
t = 2.425172089772664
p = 0.013021791701834337
The results are statistically significant


INDUSTRIVARDEN Pair
Yearly excess returns in %:
 2005     1.68
2006     9.88
2007    -0.08
2008    35.88
2009    24.79
2010    12.29
2011     3.33
2012     8.08
2013     0.01
2014     0.47
dtype: float64
Overall returns in %: 96.32106034666774
Overall returns STD: 0.11409205044570268
Overall returns SR: 8.442398920028849
t = 2.5327196760086546
p = 0.010419249084856683
The results are statistically significant


INVESTOR Pair
Yearly excess returns in %:
 2005    5.96
2006    7.01
2007    1.01
2008    8.63
2009    2.39
2010    2.50
2011    0.83
2012    6.61
2013    5.06
2014   -0.17
dtyp

# Analysing a whole portfolio

In [15]:
yearly_excessive_returns_portfolio_list = []
for k in range(len(yearly_returns)):
    # Setting securities to analyze

    # Finding values for returns
    yearly_excessive_returns_portfolio = ((yearly_returns.iloc[k, :].sum()) / 16) - 1
    yearly_excessive_returns_portfolio_list.append(yearly_excessive_returns_portfolio)
    
    # Calculating and Outputting 
    print("Year:", 2005+k)
    print("Yearly excess returns in %: ", round(yearly_excessive_returns_portfolio*100, 2))                         # We assume there is no risk free rate

print("Overall returns in %:", np.sum(yearly_excessive_returns_portfolio_list)*100)                                 # Overall returns is fixed arithmetic sum
print("Overall returns STD:", np.std(yearly_excessive_returns_portfolio_list))
print("Overall returns SR:", np.sum(yearly_excessive_returns_portfolio_list)/np.std(yearly_excessive_returns_portfolio_list))     # We assume there is no risk free rate

Year: 2005
Yearly excess returns in %:  7.94
Year: 2006
Yearly excess returns in %:  7.67
Year: 2007
Yearly excess returns in %:  2.29
Year: 2008
Yearly excess returns in %:  19.54
Year: 2009
Yearly excess returns in %:  41.53
Year: 2010
Yearly excess returns in %:  9.57
Year: 2011
Yearly excess returns in %:  3.88
Year: 2012
Yearly excess returns in %:  4.95
Year: 2013
Yearly excess returns in %:  7.67
Year: 2014
Yearly excess returns in %:  1.67
Overall returns in %: 106.7138633766124
Overall returns STD: 0.11348278531359854
Overall returns SR: 9.403528745062003


In [17]:
t2, p2 = stats.ttest_ind(yearly_excessive_returns_portfolio_list, np.zeros(len(yearly_excessive_returns_portfolio_list)))
print("t = " + str(t2))
print("p = " + str(p2/2)) # one sided t test

if p2 < 0.05:
    print("The results are statistically significant")
else:
    print("The results are not statistically significant")

t = 2.821058623518601
p = 0.005657006496804751
The results are statistically significant
