## Calculating Covariance and Correlation

In [1]:
import numpy as np
import pandas as pd
from pandas_datareader import data as wb

In [2]:
#Obtaining data for Walmart and Facebook

tickers = ['WMT', 'FB']
data = pd.DataFrame()
for t in tickers:
    data[t] = wb.DataReader(t, 'yahoo', start = '1/1/2014')['Adj Close']
    
data.info()    

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1377 entries, 2014-01-02 to 2019-06-21
Data columns (total 2 columns):
WMT    1377 non-null float64
FB     1377 non-null float64
dtypes: float64(2)
memory usage: 32.3 KB


In [3]:
#Getting our returns

data_returns = np.log(data / data.shift(1))
data_returns.tail()

Unnamed: 0_level_0,WMT,FB
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-06-17,0.000825,0.041481
2019-06-18,0.004479,-0.002861
2019-06-19,-0.000274,-0.005267
2019-06-20,0.006365,0.010875
2019-06-21,0.007315,0.008459


<h1>Variance</h1>

In [4]:
#Again, modules make life easier
#In this case, we will be using numpy's .var() to calculate variance, .cov() for covariance and .corr() to get correlation

fb_var = data_returns['FB'].var() * 250
fb_var

0.09084333729865245

In [5]:
wmt_var = data_returns['WMT'].var() * 250
wmt_var

0.03582883412547254

## Covariance and Correlation

Covariance matrix:

In [6]:
data_cov = data_returns.cov() * 250
data_cov

Unnamed: 0,WMT,FB
WMT,0.035829,0.008256
FB,0.008256,0.090843


Correlation matrix:

In [7]:
#Since we will be calculating correlation between returns, we won't be multipyling our value by 250

data_cor = data_returns.corr()
data_cor

Unnamed: 0,WMT,FB
WMT,1.0,0.144719
FB,0.144719,1.0
