In [2]:
import pandas as pd
import numpy as np
import pandas_datareader.data as web
import matplotlib.pyplot as plt

In [3]:
tickers = ['TCS.NS', '^BSESN', 'INFY.BO', 'SBIN.NS']

I have chosen TCS, INFOSYS and SBI - and compared them against the BSE 30 index by calculating the log return and standard deviation

In [5]:
data = pd.DataFrame()
for t in tickers:
    data[t] = web.DataReader(t, data_source = 'yahoo', start = '2000-1-1-')['Adj Close']

# Daily Log Returns

In [10]:
log_returns = np.log(data / data.shift(1))
log_returns

Unnamed: 0_level_0,TCS.NS,^BSESN,INFY.BO,SBIN.NS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2004-08-25,,,,
2004-08-26,-0.009102,0.009173,,0.010376
2004-08-27,-0.016836,-0.003597,,-0.004022
2004-08-30,0.024731,0.013479,,0.025243
2004-08-31,0.001353,0.001085,,-0.005518
...,...,...,...,...
2020-06-05,-0.020920,0.008981,-0.005738,0.076035
2020-06-08,0.011408,0.002428,0.023592,-0.005339
2020-06-09,0.000145,-0.012115,-0.003893,-0.012660
2020-06-10,0.017557,0.008515,-0.002651,0.017467


In [13]:
type(log_returns)

pandas.core.frame.DataFrame

## Annual Mean return

In [11]:
log_returns.mean() * 250 

TCS.NS     0.221972
^BSESN     0.116344
INFY.BO    0.178060
SBIN.NS    0.209087
dtype: float64

## Annual Standard Deviation

In [12]:
log_returns.std() * 250 ** 0.5

TCS.NS     0.391195
^BSESN     0.226433
INFY.BO    0.287479
SBIN.NS    0.392722
dtype: float64

## Another way to caluculate Annual mean return - using [ [ ] ]
In this method - we are creating a two dimensional array by using two set of square brackets. 

In [15]:
log_returns[['TCS.NS', '^BSESN', 'INFY.BO', 'SBIN.NS']].mean() * 250

TCS.NS     0.221972
^BSESN     0.116344
INFY.BO    0.178060
SBIN.NS    0.209087
dtype: float64

## Variance

### Individual method

In [17]:
#Individual variances - Daily
for t in tickers:
    print(log_returns[t].var())

0.0006121327976728316
0.0002050870793666803
0.0003305765755823283
0.000616923497685173


In [18]:
#Individual variances - annually 
for t in tickers:
    print(log_returns[t].var() * 250)

0.1530331994182079
0.051271769841670076
0.08264414389558207
0.15423087442129327


### Direct Method

In [19]:
#Direct method - annual variance
log_returns.var() * 250

TCS.NS     0.153033
^BSESN     0.051272
INFY.BO    0.082644
SBIN.NS    0.154231
dtype: float64

## Covariance Matrix

In [21]:
cov_matrix = log_returns.cov() *250
cov_matrix

Unnamed: 0,TCS.NS,^BSESN,INFY.BO,SBIN.NS
TCS.NS,0.153033,0.040382,0.032597,0.033697
^BSESN,0.040382,0.051272,0.020703,0.059616
INFY.BO,0.032597,0.020703,0.082644,0.01389
SBIN.NS,0.033697,0.059616,0.01389,0.154231


## Correlation Matrix

In [23]:
corr_matrix = log_returns.corr() 
corr_matrix

Unnamed: 0,TCS.NS,^BSESN,INFY.BO,SBIN.NS
TCS.NS,1.0,0.571529,0.455603,0.275191
^BSESN,0.571529,1.0,0.417877,0.669553
INFY.BO,0.455603,0.417877,1.0,0.13521
SBIN.NS,0.275191,0.669553,0.13521,1.0


### Note we don't need to annualize it as it is not having any average value and just shows the correlation, whereas covariance is variance if we consider covariance of a variable with itself, therefore, covariance needs to be annualized