In [9]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from datetime import datetime
from matplotlib import pyplot as plt

In [10]:
# Reading stock price data 
pp = pd.read_csv("Pairs_Price.csv")
pr = pd.read_csv("Pairs_RI.csv")        # This one is used

In [11]:
# Grouping data by year
pr['date'] = pd.to_datetime(pr['date'], infer_datetime_format=True, dayfirst = True)
eoy = pr.groupby(pd.DatetimeIndex(pr.date).to_period('Y')).nth(-1)
eoy = eoy.drop(eoy.columns[0], axis=1)      # Not sure why, but without there is a duplicate of dates
print(eoy.to_markdown())

| date   |   APMOLLER_B |   APMOLLER_A |   INDUSTRIVARDEN_A |   INDUSTRIVARDEN_C |   INVESTOR_B |   INVESTOR_A |   SVENSKAHANDBKN_A |   SVENSKAHANDBKN_B |   VOLVO_B |   VOLVO_A |   VW_PREF |       VW |   HYUNDAI |   HYUNDAI_PREF |   STORAENSO_R |   STORAENSO_A |
|:-------|-------------:|-------------:|-------------------:|-------------------:|-------------:|-------------:|-------------------:|-------------------:|----------:|----------:|----------:|---------:|----------:|---------------:|--------------:|--------------:|
| 2004   |      1144.92 |      1385.29 |             513.52 |              93.75 |       229.79 |       655.89 |             796.98 |             191.24 |    733.2  |    698.23 |    246.59 |  1491.93 |      2.45 |           0.17 |        679.98 |        518.22 |
| 2005   |      1441.34 |      1701.95 |             569.16 |             106.07 |       322.83 |       918.31 |             786.03 |             189.27 |    906.25 |    874.63 |    295.29 |  1791.23 |      4.45

In [12]:
# Creating securities lists
securities_A = []
securities_B = []
for col in eoy.columns[0::2]:
    securities_A.append(col)
for col in eoy.columns[1::2]:
    securities_B.append(col)
# Dataframe with all trades
yearly_returns = pd.DataFrame(index=np.arange(2005, 2015))

for k in range(len(securities_A)):
    # Setting securities to analyze
    name_security_A = securities_A[k]
    name_security_B = securities_B[k]

    # Creating Result Arrays, Yearly returns
    trades_temp_a = []
    trades_temp_b = []

    # Calculating Trades
    for i in range(1, len(eoy)):
        # Going Long
        if eoy[name_security_A].values[i-1] < eoy[name_security_B].values[i-1]:
            # Long on A
            temp = (eoy[name_security_A].values[i]) / (eoy[name_security_A].values[i - 1])
            trades_temp_a.append(temp)
            # Short on B
            temp = (eoy[name_security_B].values[i - 1]) / (eoy[name_security_B].values[i])
            trades_temp_b.append(temp)
        # Going Short
        else:
            # Long on B
            temp = (eoy[name_security_B].values[i]) / (eoy[name_security_B].values[i - 1])
            trades_temp_b.append(temp)
            # Short on A
            temp = (eoy[name_security_A].values[i - 1]) / (eoy[name_security_A].values[i])
            trades_temp_a.append(temp)

    # Saving results in the Dataframe
    yearly_returns[name_security_A] = trades_temp_a
    yearly_returns[name_security_B] = trades_temp_b

print(yearly_returns.to_markdown())

|      |   APMOLLER_B |   APMOLLER_A |   INDUSTRIVARDEN_A |   INDUSTRIVARDEN_C |   INVESTOR_B |   INVESTOR_A |   SVENSKAHANDBKN_A |   SVENSKAHANDBKN_B |   VOLVO_B |   VOLVO_A |   VW_PREF |       VW |   HYUNDAI |   HYUNDAI_PREF |   STORAENSO_R |   STORAENSO_A |
|-----:|-------------:|-------------:|-------------------:|-------------------:|-------------:|-------------:|-------------------:|-------------------:|----------:|----------:|----------:|---------:|----------:|---------------:|--------------:|--------------:|
| 2005 |     1.2589   |     0.813943 |           0.902242 |           1.13141  |     1.40489  |     0.714236 |           1.01393  |           0.989699 |  0.809048 |  1.25264  |  1.19749  | 0.832908 |  0.550562 |       2.41176  |      1.09003  |      0.897148 |
| 2006 |     0.922267 |     1.09413  |           0.667355 |           1.53022  |     1.44008  |     0.700188 |           0.792305 |           1.28298  |  0.652246 |  1.62492  |  2.00735  | 0.456177 |  1.30882  |      

In [13]:
for k in range(len(securities_A)):
    # Setting securities to analyze
    name_security_A = securities_A[k]
    name_security_B = securities_B[k]
    # Average Yearly Arithmetic Return (We invested 1$ in each stock in pair)
    sum_A = yearly_returns[name_security_A].mean()
    sum_B = yearly_returns[name_security_B].mean()

    print("Average Arithmetic Return on", name_security_A.split("_")[0], "Pair: ", (sum_A+sum_B)/2)       # Not sure, how to find excessive return

Average Arithmetic Return on APMOLLER Pair:  1.0536744476505409
Average Arithmetic Return on INDUSTRIVARDEN Pair:  1.0963210603466678
Average Arithmetic Return on INVESTOR Pair:  1.0398261821009416
Average Arithmetic Return on SVENSKAHANDBKN Pair:  1.0573781536020093
Average Arithmetic Return on VOLVO Pair:  1.139598677165328
Average Arithmetic Return on VW Pair:  1.1826585694267542
Average Arithmetic Return on HYUNDAI Pair:  1.238178256359412
Average Arithmetic Return on STORAENSO Pair:  1.046075560361245
