In [2]:
%load_ext autoreload
%autoreload 2

In [15]:
import matplotlib as mpl
from matplotlib import rc
# rc('text',usetex=True)
rc('font',**{'family':'serif','serif':['Times New Roman'],'size':8})
rc('mathtext',fontset='cm')
import matplotlib.pyplot as plt
% matplotlib inline
% config InlineBackend.figure_format = 'svg'
% config InlineBackend.rc = {'figure.figsize': (5,3.5)}

In [4]:
import numpy as np
from numpy import sqrt,log,array as ar
import numpy.linalg as la
import cvxpy as cvx
from helper import *
from cd.model.utility import LipschitzExpUtility as leu, RiskNeutralUtility as rnu
from cd.model.distrs import RademacherDistribution,UniformDistribution
import cd.model.synth_data as synth
import scaler
from itertools import product
from scipy.optimize import curve_fit

## Borne sur l'erreur de généralisation

In [0]:
u = leu(1)
p = 2
R = RademacherDistribution()
Xs = [RademacherDistribution() for _ in range(p)]

In [0]:
M = synth.GaussianMarket(Xs,R)
t_true = M.sample_t(5_000)

In [0]:
m = 150
ns = np.arange(10,151)
ts = create_sample(t_true,n=max(ns),m=m)

In [0]:
error = np.zeros(len(ns))
bound = np.zeros(len(ns))
errorce = np.zeros(len(ns))
boundce = np.zeros(len(ns))
compin = np.zeros(len(ns))
compout = np.zeros(len(ns))

for i,n in enumerate(ns):
    if i%5 == 0: print(i)
    t = ts[:,:n]
    qs = solve(t,u,lamb=1)
    inerror = in_error(qs,u,t,lamb=1)
    outerror = out_error(qs,u,t_true,lamb=1)
    error[i] = np.percentile(inerror - outerror,95)
    bound[i] = bound_genu(n,sqrt(p),lamb=1)
    errorce[i] = np.percentile(u.inverse(inerror) - u.inverse(outerror),95)
    boundce[i] = bound[i]*u.subinverse(errorce[i])
    idx = np.argmin(np.abs(error[i] - (inerror - outerror)))
    compin[i] = inerror[idx]
    compout[i] = outerror[idx]

0


5


10


15


20


25


30


35


40


45


50


55


60


65


70


75


80


85


90


95


100


105


110


115


120


125


130


135


140


In [0]:
fig,axarr = plt.subplots(2,figsize=(6,8))
fig.subplots_adjust(hspace=.3)
pltu1,pltce1 = axarr[0],axarr[1]

# Utility
pltu1.plot(ns[0],error[0],'k--',label='Borne théorique $(\delta=5\%)$')
pltu1.plot(ns,error,'C0',label='95e percentile d\'erreur')
pltu1.tick_params('y',colors='C0')
pltu1.axis(ymax=0.4,ymin=0)
pltu1.set_yticks([0,0.05,0.1,0.15,0.2,0.25])
pltu1.set_xticks(np.arange(10,160,10))
pltu1.legend();
pltu1.set_title("a) Utilité")
pltu1.set_xlabel('$n$')
pltu1.set_ylabel('Erreur de généralisation (util)')

pltu2 = pltu1.twinx()
pltu2.plot(ns,bound,'k--')
pltu2.axis(ymax=3.5,ymin=-1)
pltu2.set_yticks([0.5,1,1.5,2,2.5,3,3.5])

# CE
pltce1.plot(ns[0],errorce[0],'k--',label='Borne théorique $(\delta=5\%)$')
pltce1.plot(ns,errorce,'C0',label='95e percentile d\'erreur')
pltce1.tick_params('y',colors='C0')
pltce1.axis(ymax=0.6,ymin=0)
pltce1.set_yticks(np.arange(0,0.4,0.05))
pltce1.set_xticks(np.arange(10,160,10))
pltce1.legend();
pltce1.set_title("b) Équivalent certain")
pltce1.set_xlabel('$n$')
pltce1.set_ylabel('Erreur de généralisaiton (rendement)')

pltce2 = pltce1.twinx()
pltce2.plot(ns,boundce,'k--')
pltce2.axis(ymin=-2,ymax=4.75)
pltce2.set_yticks(np.arange(0.5,5.5,.5));

# plt.savefig('fig/bound_errgen.pdf',bbox_inches='tight',pad_inches=0)


<matplotlib.figure.Figure at 0x10eeb68d0>

In [0]:
fig,pltin = plt.subplots(1)

pltin.plot(ns,u.inverse(compin),'C0--',label='Équivalent certain en échantillon')
pltin.plot(ns,compin,'C0',label='Utilité moyenne en échantillon')
pltin.plot(ns,u.inverse(compout),'C2--',label='Équivalent certain hors échantillon')
pltin.plot(ns,compout,'C2',label='Utilité moyenne hors échantillon')
pltin.set_xlabel('$n$')
pltin.set_ylabel('Utilité et rendement')
pltin.set_xticks(ns[::10])
pltin.legend()


plt.savefig('fig/bound_gencomps.pdf',bbox_inches='tight',pad_inches=0)


<matplotlib.figure.Figure at 0x1123b0be0>

## Erreur de Sous optimalité I

In [5]:
p = 2
R = RademacherDistribution()
Xs = [RademacherDistribution() for _ in range(p)]
u = leu(1)

In [6]:
M = synth.GaussianMarket(Xs,R)
t_true = M.sample_t(5_000)

In [7]:
q_true = solve(t_true,u=leu(1),lamb=0)
eu_true = u(t_true@q_true[0]).mean()
lim = 1/2*la.norm(q_true)**2
limce = lim * u.subinverse(u.inverse(eu_true))

In [10]:
m = 150
ns = np.arange(10,151)
ts = create_sample(t_true,n=max(ns),m=m)

In [11]:
hold = np.zeros(len(ns))
error = np.zeros(len(ns))
errorce = np.zeros(len(ns))
bound = np.zeros(len(ns))
boundce = np.zeros(len(ns))
for i,n in enumerate(ns):
    if i%5 == 0: print(i)
    t = ts[:,:n]
    qs = solve(t,u,lamb=1)
    outerror = out_error(qs,u,t_true,lamb=1)
    error[i] = np.percentile(eu_true - outerror,95)
    errorce[i] = np.percentile(u.inverse(eu_true) - u.inverse(outerror),95)
    bound[i] = bound_sou(n,sqrt(p),q_true)
    boundce[i] = bound[i] * u.subinverse(u.inverse(eu_true))

140


135


130


125


120


115


110


105


100


95


90


85


80


75


70


65


60


55


50


45


40


35


30


25


20


15


10


5


0


In [53]:
fig,axarr = plt.subplots(2,figsize=(6,8))
fig.subplots_adjust(hspace=.3)
pltu1,pltce1 = axarr[0],axarr[1]

# Utility
pltu1.plot(ns[0],error[0],'k--',label='Borne théorique $(\delta = 5\%)$')
pltu1.plot(ns[0],error[0],'k:',alpha=0.5,label='Limite de la borne')
pltu1.set_zorder(2)
pltu1.patch.set_visible(False)
pltu1.tick_params('y',colors='C0')
pltu1.axis(ymax=0.35,ymin=0.237)
pltu1.set_yticks(np.arange(0.24,0.31,0.01))
pltu1.set_xticks(ns[::10])
pltu1.set_xlabel('$n$')
pltu1.set_ylabel('Erreur de sous optimalité (util)')

pltu1.plot(ns,error,'white',linewidth=8,zorder=3)
pltu1.plot(ns,error,'C0',label='95e percentile d\'erreur',zorder=5)
pltu1.legend();

pltu2 = pltu1.twinx()
pltu2.set_zorder(1)
pltu2.patch.set_visible(True)
plt.plot(ns,bound,'k--')
plt.plot(ns,ns*0+lim,'k:',alpha=0.5,zorder=2)
pltu2.axis(ymin=-bound.max() + 2*lim,ymax=bound.max())
pltu2.set_yticks([25,50,75,100,125,150]+[lim]);

ticks = pltu2.get_yticks().tolist()
ticks[-1] = '$\lambda\|q^\star\|^2= % 2.2f$' % lim
pltu2.set_yticklabels(ticks)

plt.title("a) Utilité")

# CE
pltce1.set_zorder(2)
pltce1.patch.set_visible(False)
pltce1.tick_params('y',colors='C0')
pltce1.axis(ymax=0.5,ymin=errorce.min())
pltce1.set_yticks(np.arange(0.34,0.44,0.01))

pltce1.plot(ns[0],errorce[0],'k--',label='Borne théorique $(\delta=5\%)$')
pltce1.plot(ns[0],errorce[0],'k:',alpha=0.5,label='Limite de la borne')
pltce1.plot(ns,errorce,'white',linewidth=8)
pltce1.plot(ns,errorce,'C0',label='95e percentile d\'erreur')
pltce1.set_xticks(ns[::10])
pltce1.set_ylabel('Erreur de sous optimalité (rendement)')
pltce1.legend();

pltce2 = pltce1.twinx()
pltce2.set_zorder(1)
pltce2.patch.set_visible(True)
pltce2.plot(ns,boundce,'k--')
pltce2.plot(ns,ns*0+limce,'k:',alpha=0.5)
pltce2.axis(ymin=-boundce.max() + 2*limce)
pltce2.axis(ymax=boundce.max())
yo = pltce2.get_yticks()
pltce2.set_yticks([el for el in yo if el >= boundce.min()]+[limce]);

ticks = pltce2.get_yticks().tolist()
ticks[-1] = "$\lambda\|q^\star\|^2 / \partial u(CE^\star)= % 2.2f$" % limce
pltce2.set_yticklabels(ticks)

pltce1.set_title("b) Équivalent certain");
pltce1.set_xlabel('$n$')

plt.savefig('fig/bound_errso.pdf',bbox_inches='tight',pad_inches=0)


<matplotlib.figure.Figure at 0x111978e48>

## Erreur de sous optimalité II

In [5]:
p = 2
R = RademacherDistribution()
Xs = [RademacherDistribution() for _ in range(p)]
u = leu(1)

In [8]:
M = synth.GaussianMarket(Xs,R)
t_true = M.sample_t(5_000)

In [9]:
q_true = solve(t_true,u=leu(1),lamb=0)
eu_true = u(t_true@q_true[0]).mean()

In [10]:
q_true = solve(t_true,u=leu(1),lamb=0)
lim = 1/2*la.norm(q_true)**2
limce = lim * u.subinverse(u.inverse(eu_true))
eu_true = u(t_true@q_true[0]).mean()

In [11]:
m = 150
ns = np.arange(10,151)
ts = create_sample(t_true,n=max(ns),m=m)
ls = (10/ns)**(1/4)
lims = ls/2 * la.norm(q_true)**2
limces = lims * u.subinverse(u.inverse(eu_true))

In [13]:
error = np.zeros(len(ns))
bound = np.zeros(len(ns))
errorce = np.zeros(len(ns))
boundce = np.zeros(len(ns))
for i,(n,l) in enumerate(zip(ns,ls)):
    if i%5 == 0: print(i)
    t = ts[:,:n]
    qs = solve(t,u,lamb=l)
    outerror = out_error(qs,u,t_true,lamb=l)
    error[i] = np.percentile(eu_true - outerror,95)
    errorce[i] = np.percentile(u.inverse(eu_true) - u.inverse(outerror),95)
    bound[i] = bound_sou(n,sqrt(p),q_true,lamb=l)
    boundce[i] = bound[i]*u.subinverse(u.inverse(eu_true))

140


135


130


125


120


115


110


105


100


95


90


85


80


75


70


65


60


55


50


45


40


35


30


25


20


15


10


5


0


In [47]:
fig,axarr = plt.subplots(2,figsize=(6,8))
fig.subplots_adjust(hspace=.25)
plt1,plt2 = axarr
fig.suptitle('Erreur de sous optimalité — Régularisation décroissante $\lambda = O(n^{1/4})$',fontsize=10,y=0.935)

plt1.plot(ns[0],error[0],'k--',label='Borne théorique $(\delta=5\%)$')
plt1.plot(ns,error,label='95e percentile d\'erreur')
plt1.axis(ymax=0.45)
plt1.set_yticks(np.arange(0.18,0.33,0.02))
plt1.tick_params('y',colors='C0')

plt12 = plt1.twinx()
plt12.plot(ns,bound,'k--')
plt12.axis(ymin=-80)
plt12.set_yticks(np.arange(20,140,10))

plt1.legend()
plt1.set_xticks(ns[::10])
plt1.set_xlabel('$n$')
plt1.set_ylabel('util')
plt1.set_title('a) Utilité')

# plt2.plot(ns[0],error[0],'k:',alpha=0.5,label='Limite $n=\infty$ de la borne théorique')
plt2.plot(ns[0],errorce[0],'k--',label='Borne théorique $(\delta=5\%)$')
plt2.plot(ns,errorce,label='95e percentile d\'erreur')
plt2.axis(ymax=0.60)
plt2.set_yticks(np.arange(0.26,0.45,0.02))
plt2.tick_params('y',colors='C0')


plt2.set_xticks(ns[::10])
plt2.set_xlabel('$n$')
plt2.legend()
plt2.set_title('b) Équivalent certain')
plt2.set_ylabel('Rendement')

plt2 = plt2.twinx()
# plt22.plot(ns,lims,'k:',alpha=0.5)
plt2.plot(ns,boundce,'k--')
plt2.axis(ymin=-160)
plt2.set_yticks(np.arange(25,250,25));

# plt.savefig('fig/bound_errso3.pdf',bbox_inches='tight',pad_inches=0)
# 


<matplotlib.figure.Figure at 0x1139a25c0>

In [69]:
fig,axarr = plt.subplots(2,sharex=True,figsize=(6,8))
pltu1,pltce1 = axarr[0],axarr[1]

# Utility
pltu1.plot(ns[0],error[0],'k--',label='Borne théorique $(\delta = 5\%)$')
# pltu1.plot(ns[0],error[0],'k:',alpha=0.5,label='Limite de la borne')
pltu1.plot(ns,lims,'k:',alpha=0.5)
pltu1.set_zorder(2)
pltu1.patch.set_visible(False)
pltu1.tick_params('y',colors='C0')
pltu1.axis(ymax=2*error.max() - error.min())
pltu1.axis(ymin=error.min())
yo = pltu1.get_yticks()
pltu1.set_yticks([el for el in yo if el <= error.max()])

pltu1.plot(ns,error,'white',linewidth=8,zorder=3)
pltu1.plot(ns,error,'C0',label='$Q(EU(q^\star) - EU(\hat q),95\%)$',zorder=5)
pltu1.legend();

pltu2 = pltu1.twinx()
pltu2.set_zorder(1)
pltu2.patch.set_visible(True)
pltu2.plot(ns,bound,'k--')
# pltu2.plot(ns,lims,'k:',alpha=0.5,zorder=2)
# pltu2.axis(ymin=-bound.max() + 0.5*lims.min())
# pltu2.axis(ymax=bound.max())
yo =pltu2.get_yticks()
pltu2.set_yticks([el for el in yo if el >= bound.min()]+[0]);

plt.title("a) Utilit\\'e")

# CE
pltce1.set_zorder(2)
pltce1.patch.set_visible(False)
pltce1.tick_params('y',colors='C0')
pltce1.axis(ymax=2*errorce.max() - errorce.min())
pltce1.axis(ymin=errorce.min())
yo = pltce1.get_yticks()
pltce1.set_yticks([el for el in yo if el <= errorce.max()])

pltce1.plot(ns[0],errorce[0],'k--',label='Borne $\delta=5\%$')
pltce1.plot(ns[0],errorce[0],'k:',alpha=0.5,label='Limite de la borne')
pltce1.plot(ns,errorce,'white',linewidth=8)
pltce1.plot(ns,errorce,'C0',label='$Q(CE(q^\star) - CE(\hat q),95\%)$')
pltce1.legend();

pltce2 = pltce1.twinx()
pltce2.set_zorder(1)
pltce2.patch.set_visible(True)
pltce2.plot(ns,boundce,'k--')
pltce2.plot(ns,limces,'k:',alpha=0.5)
pltce2.axis(ymin=-boundce.max() + 2.1*limces.min())
pltce2.axis(ymax=boundce.max())
yo = pltce2.get_yticks()
pltce2.set_yticks([el for el in yo if el >= boundce.min()]+[0]);

yo = pltce1.get_xticks().tolist()
yo = ["$(0,\infty)$"] + ["(%d, %0.3f)" % (n,0.5*sqrt(10/n)) for n in yo[1:]]
pltce1.set_xticklabels(yo)


pltce1.set_title("b) \\'Equivalent certain");
pltce1.set_xlabel("$(n,\lambda)$")
# plt.savefig('fig/bound_errso_lambda.pdf',bbox_inches='tight',pad_inches=0)


<matplotlib.figure.Figure at 0x1117615c0>

<matplotlib.text.Text at 0x10c57c780>