In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

In [3]:
# reading the wig_20 (market) data
wig_20 = pd.read_csv("wig20_m.csv")

In [4]:
#showing first 5 column
wig_20.head()

Unnamed: 0,Data,Otwarcie,Najwyzszy,Najnizszy,Zamkniecie,Wolumen
0,2003-05-31,1111.37,1192.46,1109.49,1192.46,113660282.0
1,2003-06-30,1194.94,1257.84,1187.26,1252.03,72644966.0
2,2003-07-31,1242.96,1397.07,1227.9,1387.1,166610522.0
3,2003-08-31,1384.29,1641.65,1353.9,1637.53,196557768.0
4,2003-09-30,1668.99,1742.68,1477.25,1477.25,152003802.0


In [5]:
#creating new column 'return' it is percentage change between current and last month close price
wig_20['return'] = wig_20.Zamkniecie.pct_change()

In [6]:
wig_20.head()

Unnamed: 0,Data,Otwarcie,Najwyzszy,Najnizszy,Zamkniecie,Wolumen,return
0,2003-05-31,1111.37,1192.46,1109.49,1192.46,113660282.0,
1,2003-06-30,1194.94,1257.84,1187.26,1252.03,72644966.0,0.049956
2,2003-07-31,1242.96,1397.07,1227.9,1387.1,166610522.0,0.107881
3,2003-08-31,1384.29,1641.65,1353.9,1637.53,196557768.0,0.180542
4,2003-09-30,1668.99,1742.68,1477.25,1477.25,152003802.0,-0.097879


In [7]:
#reading the Bank Polska Kasa Opieki (PEKAO) data
peo = pd.read_csv('peo_m.csv')

In [8]:
peo.head()

Unnamed: 0,Data,Otwarcie,Najwyzszy,Najnizszy,Zamkniecie,Wolumen
0,2003-05-31,41.1699,44.3923,39.8689,44.3923,6237197.0
1,2003-06-30,44.3461,48.6325,44.3007,48.6325,6552683.0
2,2003-07-31,47.7639,54.6506,47.187,51.7586,9592962.0
3,2003-08-31,51.2818,61.389,48.6325,60.9122,10207710.0
4,2003-09-30,61.389,64.5198,51.7586,54.1681,11381120.0


In [9]:
#creating new column 'return' it is percentage change between current and last month close price
peo['return'] = peo.Zamkniecie.pct_change()

In [10]:
peo.head()

Unnamed: 0,Data,Otwarcie,Najwyzszy,Najnizszy,Zamkniecie,Wolumen,return
0,2003-05-31,41.1699,44.3923,39.8689,44.3923,6237197.0,
1,2003-06-30,44.3461,48.6325,44.3007,48.6325,6552683.0,0.095517
2,2003-07-31,47.7639,54.6506,47.187,51.7586,9592962.0,0.06428
3,2003-08-31,51.2818,61.389,48.6325,60.9122,10207710.0,0.176852
4,2003-09-30,61.389,64.5198,51.7586,54.1681,11381120.0,-0.110718


In [11]:
# now we can calculate Beta, we will use liner regression method to calculate
x = np.array(wig_20['return'][1:]).reshape((-1,1)) # first value is NaN so [1:]
y = np.array(peo['return'][1:]) # first value is NaN so [1:]

model = LinearRegression().fit(x, y)

In [12]:
# Beta value is simply the coef of LinearRegression model.
# printing beta value
print(f"Beta: {model.coef_}")

Beta: [1.15371447]


In [21]:
# another method to calculate Beta is to use formula B = cov/var(market)
y = np.array(peo['return'][1:]) #stock
x = np.array(wig_20['return'][1:]) #market

#creating a dataframe
data = {'X': x, 'Y': y}
df = pd.DataFrame(data)
df

Unnamed: 0,X,Y
0,0.049956,0.095517
1,0.107881,0.064280
2,0.180542,0.176852
3,-0.097879,-0.110718
4,0.070916,0.022223
...,...,...
235,0.061518,0.065896
236,-0.028550,-0.027332
237,-0.048367,-0.047056
238,0.093560,0.123099


In [22]:
#calculating covariance
c = df.cov()
c

Unnamed: 0,X,Y
X,0.003999,0.004614
Y,0.004614,0.008161


In [23]:
# cov of stock and market
c.X.Y

0.004614167111485362

In [24]:
# calculating variance
v = df.var()
v

X    0.003999
Y    0.008161
dtype: float64

In [25]:
# variance of market
v.X

0.003999401255009894

In [26]:
c/v

Unnamed: 0,X,Y
X,1.0,0.565416
Y,1.153714,1.0


In [28]:
beta = c.X.Y/v.X
beta

1.1537144730615303

As you can see, values of betas are the same in two method.

In [29]:
# calculating roling beta with windows size = 36 (3 years in monthly data)
cov = df.X.rolling(window=36).cov(df.Y)
var = df.X.rolling(window=36).var()
cov/var

0           NaN
1           NaN
2           NaN
3           NaN
4           NaN
         ...   
235    1.320562
236    1.368290
237    1.234932
238    1.314584
239    1.327663
Length: 240, dtype: float64

In [41]:
# roll beta function

def roll_beta(window_size, stock, market):
    data = {'X': market, 'Y': stock}
    df = pd.DataFrame(data)
    cov = df.X.rolling(window=window_size).cov(df.Y)
    var = df.X.rolling(window=window_size).var()
    roling_beta = cov/var
    return np.array(roling_beta[window_size - 1:])

In [42]:
out = roll_beta(36, y, x)

In [43]:
out

array([0.9772635 , 0.97014216, 1.00021326, 1.00341387, 0.99066662,
       1.00436244, 0.99490991, 1.02171093, 1.03098369, 1.03367474,
       1.051477  , 1.0441973 , 1.06097197, 1.06000448, 1.03501676,
       1.04392938, 1.05036511, 1.06179624, 1.04459305, 1.05446398,
       1.04846115, 1.0680065 , 1.07123908, 1.07525705, 1.06774364,
       1.08922898, 1.13356561, 1.12231121, 1.11750739, 1.11221958,
       1.11222089, 1.15435166, 1.18812062, 1.26028061, 1.23078474,
       1.43892495, 1.44945814, 1.44257389, 1.49245589, 1.48736234,
       1.48676661, 1.49808716, 1.51245944, 1.49931122, 1.49980717,
       1.5056696 , 1.46281084, 1.46229541, 1.45473297, 1.45703139,
       1.42105982, 1.42173266, 1.42370527, 1.43551197, 1.44929252,
       1.42299995, 1.45985696, 1.45545443, 1.44527129, 1.4413922 ,
       1.44229289, 1.45366658, 1.43358319, 1.40905135, 1.40282212,
       1.47843879, 1.45342916, 1.42107272, 1.42991674, 1.33643484,
       1.37197186, 1.06860085, 1.05577774, 1.06599178, 0.92549