In [None]:
import pandas as pd
df = pd.read_csv("MORTGAGE30US.csv",usecols=['DATE','MORTGAGE30US'], parse_dates=['DATE']) #Read the MORTGAGE30US weekly data to pandas dataframe
df['DATE']=df['DATE'].dt.to_period('M') #Stripped out the day from date time column to get monthly granularity
df =df.groupby(['DATE'], as_index=False).mean() #Converted weekly data into monthly
df['Avg_Year_rate']=df.groupby(df['DATE'].dt.year)['MORTGAGE30US'].transform('mean')/100 #Grouped by mean interest rate in that year
df.set_index('DATE').to_clipboard()


In [4]:
from pandas import Series 
from numpy import log
from typing import List

def get_ecl_weights(q0:List[Series], q1:List[Series]) -> dict:
  return {
    "DeltaECL": log(q1[0])/log(q0[0]),    
    "DeltaPD": log(q1[1])/log(q0[1]),
    "DeltaLGD": log(q1[2])/log(q0[2]),
    "DeltaEAD": log(q1[3])/log(q0[3]),
  }
  
  
  

In [None]:
from helper import timeit
from scipy.stats import f
from scipy.stats import f_oneway, levene, gaussian_kde
from numpy import random
import numpy as np

# "F test for Variance"

@timeit
def test_equality_of_variance(data1, data2):
    n1 = len(data1)
    n2 = len(data2)
    var1 = sum([(x - sum(data1)/n1)**2 for x in data1])/(n1 - 1)
    print("var1 summed")
    var2 = sum([(x - sum(data2)/n2)**2 for x in data2])/(n2 - 1)
    print("var2 summed")
    F = var1/var2
    print('F-ratio:', F)
    
    alpha = 0.05
    if F < 1:
        F = 1/F
    df1 = n1 - 1
    df2 = n2 - 1
    p_value = 1 - f.cdf(F, df1, df2)
    print('p-value:', p_value)
    
    if p_value > alpha:
        print('The variance in the two groups is equal (fail to reject H0)')
    else:
        print('The variance in the two groups is not equal (reject H0)')


In [None]:
@timeit
def test_equality_of_mean_and_variance(data1, data2, alpha=0.05):
    statistic, pvalue_anova = levene(data1, data2)
    statistic, pvalue_levene = levene(data1, data2)
    if pvalue_levene > alpha:
        print('The variance is likely equal (fail to reject H0)')
    else:
        print('The variance is likely not equal (reject H0)')
   


In [None]:
data1 = tuple(random.normal(0,2,25000))
data2 = tuple(random.normal(40,2,25000))

In [None]:
test_equality_of_mean_and_variance(data1, data2)

In [None]:
# Function test_equality_of_variance took 36.279090 seconds
sample = np.random.normal(10,10,10)
print(gaussian_kde(sample).weights)


In [None]:
100*(1+0.03)**10

In [None]:
import math, numpy
from helper import timeit

interquartileRange = lambda array: numpy.quantile(array, 0.75)-numpy.quantile(array, 0.25)

@timeit
def silvermansBandwidth(data):
  return 0.9 * min(numpy.std(data), interquartileRange(data)) * (len(data) ** -0.20)

@timeit
def epanechnikov(bandwidth):
  def kernel(x):
    if abs(x/bandwidth) <= 1:
      return 0.75 * (1 - x * x) / bandwidth
    else:
      return 0
  return kernel


generate_thresholds = lambda array: numpy.linspace(numpy.min(array), numpy.max(array), 10)

@timeit
def kde(kernel, thresholds, data):
  def calc_mean(t):
    return sum(kernel(t - d) for d in data) / len(data)
  return [(t, calc_mean(t)) for t in thresholds]

In [None]:
data = numpy.random.normal(0,1,1_000_000)

In [None]:
bandwidth = silvermansBandwidth(data)
thresholds = sorted(generate_thresholds(data))

In [None]:
kde(epanechnikov(bandwidth),thresholds, data)

In [None]:
from scipy.stats import gaussian_kde


In [None]:
tuple(zip(thresholds,gaussian_kde(data, bandwidth).evaluate(thresholds)))