<h1>1. Import Libraries</h1>

In [85]:
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime

#Descriptives

##Corelation##
import scipy
import scipy.stats
from scipy.stats import kurtosis
from statsmodels.tsa.stattools import grangercausalitytests
from itertools import permutations
import matplotlib.pyplot as plt
from pandas.plotting import lag_plot
from statsmodels.tsa.vector_ar.var_model import VAR
from statsmodels.tsa.stattools import kpss
from statsmodels.tsa.stattools import adfuller
import warnings
warnings.filterwarnings("ignore")

<h1>2. Create Functions</h1>

<h2>2.1 Download & Read Stock Informations</h2>

In [26]:
def data(ticker_symbol: str, start_date: str, end_date: str, interval: str):
    """
    Download stock data and return as a pandas DataFrame. 
    The data will be stored in a dictionary with the ticker symbol as the key.

    Parameters:
    ticker_symbol: The stock symbol (e.g., 'AAPL').
    start_date: The start date in DD-MM-YYYY format (e.g., '01-01-2020').
    end_date: The end date in DD-MM-YYYY format (e.g., '31-12-2020').
    interval: The data interval. Valid intervals include '1m', '2m', '5m', '15m', '30m', '60m', '90m', '1h', 
              '1d', '5d', '1wk', '1mo', '3mo' (e.g., '1d' for daily data).
    """
    def convert_date_format(date_string):
        return datetime.strptime(date_string, '%d-%m-%Y').strftime('%Y-%m-%d')

    start_date = convert_date_format(start_date)
    end_date = convert_date_format(end_date)

    # Download stock data
    data = yf.download(ticker_symbol, start=start_date, end=end_date, interval=interval)

    # Calculate % growth of Adj Close
    data['Returns'] = data['Adj Close'].pct_change() * 100

    # File name
    file_name = f"{ticker_symbol}.csv"

    # Save data to a CSV file
    data.to_csv(file_name)

    # Read the data from the saved CSV file
    dataframe = pd.read_csv(file_name)

    # Create a global variable based on the part of the ticker_symbol before the '.'
    variable_name = ticker_symbol.split('.')[0]
    globals()[variable_name] = dataframe


<h2>2.2 Correlation</h2>

In [27]:
def cor(df, method="Spearman", p="F"):
    """
    Calculate the multivariable correlation matrix for a DataFrame,
    using the specified method ('Spearman' or 'Pearson') and appends significance stars next to the correlation coefficient.

    Parameters:
        df: The DataFrame to analyze.
        method: The correlation method to use ('Spearman' or 'Pearson').
        p: Whether to print the p-value matrix or not ("T" for True, "F" for False).

    Returns:
        None
    """
    def format_p_value(p_value):
        formatted = f"{p_value:0.3f}"
        if formatted.startswith("0."):
            return formatted[1:]
        return formatted

    if method == "Pearson":
        print("\n\n" + "=" * 21 + f"\n {method} Correlation\n" + "=" * 21)
    else:
        print("\n\n" + "=" * 27 + f"\n {method} Rank Correlation\n" + "=" * 27)

    # Create empty DataFrames to store the correlation coefficients and p-values
    corr_matrix = pd.DataFrame(index=df.columns, columns=df.columns)
    pmatrix = pd.DataFrame(index=df.columns, columns=df.columns)

    # Extract column names from DataFrame
    keys = df.columns.tolist()

    for i, key1 in enumerate(keys):
        for j, key2 in enumerate(keys):
            if i > j:
                continue  # Skip duplicate pairs

            data1 = df[key1].dropna()
            data2 = df[key2].dropna()

            # Find the common index between the two variables where neither is NaN
            common_index = data1.index.intersection(data2.index)

            # Use only the common indices for correlation calculation
            data1 = data1.loc[common_index]
            data2 = data2.loc[common_index]

            if len(common_index) < 2:
                # Skip if less than two common observations
                corr_matrix.at[key1, key2] = 'nan'
                corr_matrix.at[key2, key1] = 'nan'
                continue

            if method == 'Spearman':
                correlation, p_value = scipy.stats.spearmanr(data1, data2)
            elif method == 'Pearson':
                correlation, p_value = scipy.stats.pearsonr(data1, data2)
            else:
                raise ValueError("Method must be either 'Spearman' or 'Pearson'")

            # Populate the p-value matrix
            pmatrix.at[key1, key2] = format_p_value(p_value)
            pmatrix.at[key2, key1] = format_p_value(p_value)

            # Determine the significance stars
            stars = "     "
            if p_value < 0.001:
                stars = " *** "
            elif p_value < 0.01:
                stars = " **  "
            elif p_value < 0.05:
                stars = " *   "               
            elif p_value < 0.1:
                stars = " .   "
                

            # Round the results to three decimal places and append stars
            correlation_str = f"{format_p_value(correlation)}{stars}"

            # Populate the matrix
            corr_matrix.at[key1, key2] = correlation_str
            corr_matrix.at[key2, key1] = correlation_str

    # Convert DataFrame to string and add spacing for visual presentation
    corr_matrix_str = corr_matrix.to_string(sparsify=True, justify='center')

    # Add explanation for significance stars
    explanation = "\n\n--\nSignif. codes:  0.001 '***', 0.01 '**', 0.05 '*', 0.1 '.'"

    print("\n\n>> Correlation Matrix <<\n")
    print(corr_matrix_str + explanation)

    if p == "T":
        print("\n\n>> P-Value Matrix <<\n")
        print(pmatrix)
    elif p == "F":
        print("")
    else:
        raise ValueError("Method must be either 'F' for False or 'T' for True")

<h2>Variance</h2>

In [89]:
def var(data, ddof=0):
    n = len(data)
    mean = sum(data) / n
    return sum((x - mean) ** 2 for x in data) / (n - ddof)

<h1>3 Analysis</h1>

<h2>3.1 Descriptives</h2>

<h3>2.1.1 Stock: VOO</h2>

In [81]:
Stock = 'VOO'
SD = '01-01-2011'
FD = '01-01-2020'
P = '1mo'


data(Stock,SD, FD,P)


[*********************100%%**********************]  1 of 1 completed


In [82]:
VOO

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Returns
0,2011-01-01,115.940002,119.220001,115.480003,117.699997,92.772644,4402150,
1,2011-02-01,118.480003,123.220001,118.379997,121.779999,95.988548,2761400,3.466436
2,2011-03-01,122.279999,122.300003,114.639999,121.239998,95.562889,3866600,-0.443448
3,2011-04-01,122.120003,124.879997,118.440002,124.800003,98.809410,2302350,3.397261
4,2011-05-01,125.440002,125.440002,120.160004,123.339996,97.653473,2455100,-1.169865
...,...,...,...,...,...,...,...,...
103,2019-08-01,273.279999,276.279999,258.700012,268.600006,250.420197,76101400,-1.640514
104,2019-09-01,266.829987,277.980011,265.679993,272.600006,254.149414,45894200,1.489184
105,2019-10-01,273.440002,279.690002,261.589996,278.549988,260.932861,52876500,2.669078
106,2019-11-01,280.049988,289.779999,279.910004,288.649994,270.394012,43686600,3.625895


In [106]:
av_returns_VOO = np.mean(VOO['Returns'])
print('\nAverage of Returns: ',round(av_returns_VOO,2),'\n')

VOO['Returns']=np.array(VOO['Returns'])/100
var_returns_VOO = np.var(VOO['Returns'], ddof=1)
print('Variance of Returns: ',round(var_returns_VOO,7),'\n')

std_returns_VOO = np.std(VOO['Returns'])/100
print('Variance of Returns: ',round(std_returns_VOO,5),'\n')

risk_free_VOO = 0.19


Average of Returns:  1.09 

Variance of Returns:  0.0011865 

Variance of Returns:  0.00034 



<h3>2.1.1 Stock: BLV</h2>

In [50]:
Stock = 'BLV'
SD = '01-01-2011'
FD = '01-01-2020'
P = '1mo'


data(Stock,SD, FD,P)

[*********************100%%**********************]  1 of 1 completed


In [51]:
av_returns_BLV = np.mean(BLV['Returns'])
print('\nAverage Returns: ',round(av_returns_BLV,3),'\n')

var_returns_BLV = BLV['Returns'].var()/100
print('Variance of Returns: ',round(var_returns_BLV,3),'\n')

std_returns_BLV = BLV['Returns'].var()/100
print('Standard Deviation of Returns: ',round(std_returns_BLV,3),'\n')

risk_free_BLV = 0.19


Average Returns:  0.623 

Variance of Returns:  0.069 

Standard Deviation of Returns:  0.069 



<h2>3.2 Model</h2>

<h3>3.2.1 Sharp Ratios</h3>

In [52]:
sharp_VOO = (av_returns_VOO - risk_free_VOO)/std_returns_VOO
print('\nSharp Ratio of VOO: ',round(sharp_VOO,3),'\n')

sharp_BLV = (av_returns_BLV - risk_free_BLV)/std_returns_BLV
print('Sharp Ratio of BLV: ',round(sharp_BLV,3),'\n')


Sharp Ratio of VOO:  26.149 

Sharp Ratio of BLV:  6.303 



<h3>3.2.2 Covariances & Correlations</h3>

<h4>3.2.2.1 Covariance</h4>

In [61]:
covariance = VOO['Returns'].cov(BLV['Returns'])
print('\nCovariance: ',round(covariance,3),'\n')


Covariance:  -1.566 



<h4>3.2.2.2 Correlation</h4>

<h5>Create Dataframe</h5>

In [54]:
df = pd.DataFrame({'VOO': VOO['Returns'], 'BLV': BLV['Returns']})

<h5>Main Correlation</h5>

In [55]:
cor(df, method="Spearman", p="T")



 Spearman Rank Correlation


>> Correlation Matrix <<

        VOO          BLV     
VOO   1.000 ***   -0.067     
BLV  -0.067        1.000 *** 

--
Signif. codes:  0.001 '***', 0.01 '**', 0.05 '*', 0.1 '.'


>> P-Value Matrix <<

      VOO   BLV
VOO  .000  .491
BLV  .491  .000


<h2>Portfolio</h2>

In [60]:
portfolio = {
    'VOO': [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1],
    'BLV': [1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0]  # Adjusted to match the length of 'VOO'
}
portfolio = pd.DataFrame(portfolio)


portfolio['MEAN'] = av_returns_VOO * portfolio['VOO'] + av_returns_BLV * portfolio['BLV']
portfolio['VAR'] = (portfolio['VOO'] ** 2) * var_returns_VOO + (portfolio['BLV'] ** 2) * var_returns_BLV + 2*covariance*portfolio['BLV']*portfolio['VOO']


round(portfolio,3)

Unnamed: 0,VOO,BLV,MEAN,VAR
0,0.0,1.0,0.623,0.069
1,0.1,0.9,0.669,-0.225
2,0.2,0.8,0.716,-0.452
3,0.3,0.7,0.762,-0.613
4,0.4,0.6,0.808,-0.708
5,0.5,0.5,0.855,-0.736
6,0.6,0.4,0.901,-0.698
7,0.7,0.3,0.947,-0.594
8,0.8,0.2,0.994,-0.423
9,0.9,0.1,1.04,-0.186


In [41]:
av_returns_VOO

1.0864950374809077

In [42]:
av_returns_BLV

1.0864950374809077

In [107]:
import numpy as np

# List of return percentages
returns = [
    3.47, -0.44, 3.40, -1.17, -2.16, -1.58, -5.52, -7.31, 11.36, -0.30, 0.52, 5.10,
    4.28, 2.84, -0.19, -6.00, 3.59, 1.73, 2.51, 2.03, -1.45, 0.57, 0.29, 5.93,
    1.33, 3.12, 2.58, 2.32, -2.00, 5.84, -3.08, 2.87, 4.99, 3.00, 2.08, -3.00,
    4.57, 0.42, 1.18, 2.29, 1.63, -0.93, 3.97, -1.85, 2.89, 2.76, -0.86, -2.33,
    5.58, -2.07, 1.52, 1.25, -2.40, 2.65, -6.14, -2.98, 9.03, 0.43, -2.32, -4.34,
    -0.21, 6.30, 0.89, 1.75, -0.18, 4.20, 0.12, -0.42, -1.35, 3.73, 1.44, 2.42,
    3.88, -0.33, 1.51, 1.40, 0.18, 2.52, 0.29, 1.52, 2.85, 3.06, 0.80, 6.09,
    -3.73, -2.91, 0.81, 2.42, 0.29, 4.05, 3.22, 0.12, -6.42, 1.89, -9.34, 8.50,
    3.25, 1.36, 4.62, -6.35, 6.44, 1.99, -1.64, 1.49, 2.67, 3.63, 2.48
]

# Converting percentages to fractions
returns = np.array(returns) / 100

# Calculating the sample variance
sample_variance = np.var(returns, ddof=1)
sample_variance_rounded = round(sample_variance, 5)
sample_variance_rounded

0.00118

In [110]:
list=[VOO['Returns']]
print(list)

[0           NaN
1      0.034664
2     -0.004434
3      0.033973
4     -0.011699
         ...   
103   -0.016405
104    0.014892
105    0.026691
106    0.036259
107    0.024770
Name: Returns, Length: 108, dtype: float64]


In [109]:
print(returns)

[ 0.0347 -0.0044  0.034  -0.0117 -0.0216 -0.0158 -0.0552 -0.0731  0.1136
 -0.003   0.0052  0.051   0.0428  0.0284 -0.0019 -0.06    0.0359  0.0173
  0.0251  0.0203 -0.0145  0.0057  0.0029  0.0593  0.0133  0.0312  0.0258
  0.0232 -0.02    0.0584 -0.0308  0.0287  0.0499  0.03    0.0208 -0.03
  0.0457  0.0042  0.0118  0.0229  0.0163 -0.0093  0.0397 -0.0185  0.0289
  0.0276 -0.0086 -0.0233  0.0558 -0.0207  0.0152  0.0125 -0.024   0.0265
 -0.0614 -0.0298  0.0903  0.0043 -0.0232 -0.0434 -0.0021  0.063   0.0089
  0.0175 -0.0018  0.042   0.0012 -0.0042 -0.0135  0.0373  0.0144  0.0242
  0.0388 -0.0033  0.0151  0.014   0.0018  0.0252  0.0029  0.0152  0.0285
  0.0306  0.008   0.0609 -0.0373 -0.0291  0.0081  0.0242  0.0029  0.0405
  0.0322  0.0012 -0.0642  0.0189 -0.0934  0.085   0.0325  0.0136  0.0462
 -0.0635  0.0644  0.0199 -0.0164  0.0149  0.0267  0.0363  0.0248]
