In [39]:
import numpy as np
import matplotlib 
matplotlib.use('nbagg')
import matplotlib.pyplot as plt

In [60]:
# Defining the functions to compute various bounds and growth function approximation
def mh_approx(a,N,dvc,delta):
    return np.log((a*np.power(N,dvc))/delta)
    
def VC_bound(dvc,delta,N):
    epsilon = np.power((8/N)*(mh_approx(4,N,dvc,delta)),0.5)
    return epsilon

def Rademacher_penalty_bound(dvc,delta,N):
    epsilon = np.power((2/N)*(mh_approx(2,N/2,dvc,1)),0.5) + np.power((2/N)*np.log(1/delta),0.5) + (1/N)
    return epsilon

def parrondo_vd_bound(dvc,delta,N):
    x = mh_approx(6,N,dvc,delta)
    epsilon = (1 + np.power((N*x) + 1,0.5))/N
    return epsilon

def devroye_bound(dvc,delta,N):
    N_mod = (np.power(N,2))/2
    x =  mh_approx(4,N_mod,dvc,delta)
    epsilon = np.power((1 + (1 + (N/2 - 1)*x)),0.5)/(N-2)
    return epsilon

In [54]:
dvc = 10
delta = 0.05
N = np.arange(5,100,5)

array([  9.43348392,  12.20607265,  13.82793308,  14.97866137,
        15.87123557,  16.6005218 ,  17.21712452,  17.75125009,
        18.22238223,  18.6438243 ,  19.02506501,  19.37311052,
        19.69328135,  19.98971324,  20.26568473,  20.52383881,
        20.7663373 ,  20.99497095,  21.21123984])

In [None]:
# Defining a function to visualize the variation of generalization error with respect to sample size
def visual_bounds(dvc,delta=0.05,N):
    """
    Inputs
    dvc: Scalar, VC dimension of the hypotheses set 
    delta: scalar, 1 - confidence level (defaults to 95% confidence i.e. delta = 0.05)
    N: a 1D numpy array containing the sample sizes 
    
    output: Plots the generalization error with respect to the elements in N
    """
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.set_title("epsilon vs N")
    ax.scatter()
    ax.plot(data_complete[:,1],(-1/weight_3D[2,0])*(weight_3D[0,0] + (weight_3D[1,0])*data_complete[:,1]))
    plt.show()
    return

In [61]:
# Bound comparison for higher N
N = 100
bounds_com_list = [VC_bound(dvc,delta,N),Rademacher_penalty_bound(dvc,delta,N),parrondo_vd_bound(dvc,delta,N),devroye_bound(dvc,delta,N)]
bounds_com_list
# Clearly devroye is lower for higher values of N

  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until


[inf, 1.1471129287444777, inf, 0.67610368904755169]

In [11]:
# Bound comparison for lower N
N = 5
bounds_com_list = [VC_bound(dvc,delta,N),Rademacher_penalty_bound(dvc,delta,N),parrondo_vd_bound(dvc,delta,N),devroye_bound(dvc,delta,N)]
bounds_com_list
# Clearly Parrondo and Van den Broek bound is lower for lower values of N

[13.828161484991483,
 6.9925615113259081,
 5.1013619819899922,
 5.5931255431826692]