In [9]:
import pandas as pd
import yfinance as yf
import numba as nb
from math import exp
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt 
import statsmodels.api as sm
import scipy.stats as stats
from scipy.optimize import minimize

def get_data(list_of_stocks: list, start_date:str, end_date:str , columns:list, interval:str, resample=None) -> pd.DataFrame:
    lst_returns = []
    lst_log_returns = []
    for stock in list_of_stocks:
        equity=yf.Ticker(stock)
        equity_data=equity.history(start=start_date, end=end_date, interval=interval)
        if resample != None:
            equity_data = equity_data.resample(resample).last()
        equity_data['returns']=equity_data['Close'].pct_change()
        equity_data['log_returns'] = np.log(equity_data['Close'].shift(1)) - np.log(equity_data['Close'])
        a = equity_data['returns']
        b = equity_data["log_returns"]
        lst_returns.append(a)
        lst_log_returns.append(b)
    
    #simple returns
    df = pd.concat(lst_returns, axis=1, join= 'outer')
    df.columns= columns
    df.dropna(axis=0,inplace=True)
    
    #log returns
    df_lr = pd.concat(lst_log_returns, axis=1, join= 'outer')
    df_lr.columns= columns
    df_lr.dropna(axis=0,inplace=True)
    
    return df, df_lr

get_data_nb = nb.jit(get_data)

# def half_kelly_criterion(data: pd.DataFrame, f:float) -> pd.Series:
#     equ = "equity_{:.2f}".format(f)
#     cap = "capital_{:.2f}".format(f)
#     data[equ] = 1
#     data[cap] = data.equ * f
#     for i, t in enumerate(data.index[1:]):
#         t_1 = data.index[i]
#         data.loc[t, cap] = data[cap].loc[t_1] * data["our_portfolio"].loc[t]
#         data.loc[t, equ] = data[cap].loc[t] - data[cap].loc[t_1] + data[equ].loc[t_1]
#         data.loc[t, cap] = data[equ].loc[t] * f
        # except Exception:
        #     print("Please run the previous cells")

#### bet sizing
def half_kelly_Thorp(rf:float, returns: pd.DataFrame, stocks: list) -> np.array:
    excess_returns = returns - rf
    sizing = np.linalg.inv(excess_returns.cov().to_numpy()*len(stocks)) @ (excess_returns.mean().to_numpy()*len(stocks))
    return sizing/2
        
def losses(p : pd.DataFrame) -> pd.Series:
    """
    :param p: portfolio time series
    :return: portfolio losses (returns) in absolute value
    """
    return p.where(p < 0).dropna().abs()

def gains(p: pd.DataFrame) -> pd.Series:
    """
    :param p: portfolio time series
    :return: portfolio gains (returns)
    """
    return p.where(p > 0).dropna()

#### risk metrics
def non_parametric_VAR(p:pd.Series, q:float) -> float:
    """
    :param p: portfolio
    :param q: quantile
    :return: Value at risk of the portfolio
    """
    return losses(p).quantile(q)

def non_parametric_ES(p: pd.DataFrame, q: float):
    """
    :param p: portfolio
    :param q: quantile
    :return: Conditional value at risk E(X1| X1 <= VAR(X))
    """
    loss = losses(p)
    return loss.where(loss >= non_parametric_VAR(p, q)).dropna().mean()


def present_risk_metric(description, metric_func, **kwargs) -> str:
    
    print("The " + description + f" is: {round(metric_func(**kwargs), 4)}")


### parametric with normal distribution
def parametric_var(p: pd.DataFrame, weights: list, alpha: float) -> float:
    """
    :param p: portfolio returns
    :param alpha: significance level
    :param weights: portfolio weights
    :return: Parametric Var
    """
    std = np.sqrt(weights.T @ p.cov() @ weights)
    return  std * stats.norm.ppf(1-alpha)
    
def parametric_expected_shortfall(p: pd.DataFrame, weights: list, alpha: float) -> float:
    """
    :param p: portfolio returns
    :param alpha: significance level
    :param weights: portfolio weights
    :return: Parametric ES
    """
    std = np.sqrt(weights.T @ p.cov() @ weights)  
    return  std * stats.norm.pdf(stats.norm.ppf(1-alpha))/(alpha)
    
#### parametric with student t
def neg_log_likelihood(params: list, returns:pd.Series) -> float:
    df, loc, scale = params
    return -stats.t.logpdf(returns, df, loc, scale).sum()

def obtain_parameters(guess: list, returns:pd.Series) -> tuple:
    """
    :param guess: [0, 1, 0], our guess to the parameters of student t distribution
    :return: Degrees of Freedom, Location, Scale:
    """
    return minimize(neg_log_likelihood, guess, args=(returns, ), method='Nelder-Mead').x
#### risk metrics EVT


#### plots
def correlation_matrix(p: pd.DataFrame, stocks:list) -> sns.heatmap:
    # create correlation matrix
    corr_matrix = p[stocks].corr()
    # Generate a mask for the upper triangle
    mask = np.triu(np.ones_like(corr_matrix, dtype=bool))
    # Generate a custom diverging colormap
    cmap = sns.color_palette("coolwarm", as_cmap=True)
    
    plt.figure(figsize=(8, 8))
    
    sns.heatmap(corr_matrix, mask = mask, cmap = cmap, square = True, annot = True, cbar_kws  = {"shrink": .4})
    
    plt.show()
        

def MSplot(data, p=4):
    x = data.abs()
    fig, axs = plt.subplots(nrows=int(p/2), ncols=2, figsize=(8, 6))
    r = []
    for i in range(1, p + 1):
        y = x**i
        S = y.cumsum()
        M = y.cummax()
        r.append(M/S)
    
    axs[0, 0].plot(range(len(x)), r[0], color="orangered")
    axs[0, 0].set_title("MS plot, p=1")
    axs[0, 0].set_xlabel("n")
    axs[0, 0].set_ylabel("Rn")
    axs[0, 1].plot(range(len(x)), r[1], color ="orangered")
    axs[0, 1].set_xlabel("n")
    axs[0, 1].set_title("MS plot, p=2")
    axs[1, 0].plot(range(len(x)), r[2], color="orangered")
    axs[1, 0].set_xlabel("n")
    axs[1, 0].set_ylabel("Rn")
    axs[1, 0].set_title("MS plot, p=3")
    axs[1, 1].plot(range(len(x)), r[3], color="orangered")
    axs[1, 1].set_xlabel("n")
    axs[1, 1].set_title("MS plot, p=4")
    
    plt.tight_layout()
    plt.show()
    
def ACF(**kwargs):
    sm.graphics.tsa.plot_acf(**kwargs)
    plt.xlabel('Lags')
    plt.ylabel('Autocorrelation')
    plt.title('Autocorrelation Function (ACF)')
    plt.tight_layout()
    plt.show()

def PACF(**kwargs):
    sm.graphics.tsa.plot_pacf(**kwargs)
    plt.xlabel('Lags')
    plt.ylabel('Partial Autocorrelation')
    plt.title('Partial Autocorrelation Function (PACF)')
    plt.tight_layout()
    plt.show()

def qqplot(**kwargs):
    sm.qqplot(**kwargs)
    plt.title("QQ Plot")
    plt.xlabel("Theoretical Quantiles")
    plt.ylabel("Sample Quantiles")
    plt.grid(True)
    plt.show()