In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import matplotlib.pylab as plt

import seaborn as sns

from skspatial.objects import Line, Plane
from skspatial.plotting import plot_3d


from skspatial.objects import Line, Cylinder, Point, Points
from skspatial.plotting import plot_3d

import phasespace

import tensorflow

import bisect
import numpy as np
import matplotlib.pylab as plt
import pandas as pd

import seaborn as sns

import numpy as np
from sklearn.mixture import GaussianMixture
from scipy.stats import multivariate_normal

import numpy as np
from scipy.interpolate import griddata
from scipy.integrate import quad, trapezoid
from scipy.interpolate import CubicSpline

import matplotlib.pylab as plt
from scipy import stats
from matplotlib import cm
from matplotlib.ticker import LinearLocator
import matplotlib

from scipy.interpolate import LinearNDInterpolator
import scipy



import dm_generation_tools as dgt
import detector_simulation_tools as dst


import time

####################################
import warnings
# Suppress all warnings
warnings.filterwarnings("ignore")


import pickle

In [None]:
print(f'{np.__version__ = }')
print(f'{matplotlib.__version__ = }')
print(f'{pd.__version__ = }')
print(f'{scipy.__version__ = }')

In [None]:
# We need this file. Make sure there is a soft link to it
infilename_for_eloss = 'muons_summary_from_GEANT4_simulations.parquet'
df_eloss = pd.read_parquet(infilename_for_eloss)
df_eloss

In [None]:
# Make some plots
e_initials = df_eloss['e_initial'].unique()
e_initials


plt.figure(figsize=(12,4))

for idx,ei in enumerate(e_initials[0:2]):
    print(f"ei: {ei}")
    filter = df_eloss['e_initial']==ei

    z = df_eloss[filter]['z']
    ef = ei - df_eloss[filter]['e']

    print(len(z))
    print(len(ef))

    plt.subplot(1,2,idx+1)
    plt.plot(z,ef,'.',markersize=0.1)
    plt.xlabel('z (m)')
    plt.ylabel('Final energy (GeV)')
    plt.title(f'Initial energy: {ei} GeV')

In [None]:
def find_neighbors(sorted_list, x):
    """
    Given sorted_list (ascending) and a value x,
    return (low, high) where:
      - low  = the largest element <= x (or None if x < sorted_list[0])
      - high = the smallest element >= x (or None if x > sorted_list[-1])
    """
    idx = bisect.bisect_left(sorted_list, x)
    # idx is the insertion point to keep the list sorted.
    if idx == 0:
        # x is <= first element
        return None, sorted_list[0]
    elif idx == len(sorted_list):
        # x is greater than all elements
        return sorted_list[-1], None
    else:
        # sorted_list[idx-1] < x <= sorted_list[idx]
        return sorted_list[idx-1], sorted_list[idx]

#####################################################################################

def energy_after_traveling_distance(e_initials, zvals, eivals, efvals, E_muon, distance, make_plots=False, ngendata=1, verbose=False):
    
    start = time.time()

    # My own 
    d = distance
    dwidth = 0.01 * d

    #print(e_initials)
    #E_muon = 45040
    elo,ehi = find_neighbors(e_initials, E_muon)
    delta_e = ehi - elo

    if verbose:
        print(f"In function: time to run A: {time.time() - start:.2f} seconds")

    #print(elo, ehi, delta_e)
    
    frequencies = []
    eis = []
    efs = []
    
    #plt.figure(figsize=(16,8))
    
    # First find the ranges
    elo_min = [1e99, 1e99]
    ehi_max = [-1e99, -1e99]
    
    #filter_distance = (zvals > d-dwidth) & (zvals < d+dwidth)
    
    for i,Ei in enumerate([elo,ehi]):
        
        filter = (eivals==Ei)# & filter_distance
        #filter = filter & 
        
        #vals = Ei - efvals[filter]
        vals = efvals[filter]
    
        if len(vals) < 10:
            if verbose:
                print("NO DATA TO WORK WITH")
            return -1*np.ones(ngendata,dtype=int)
            #continue
            
        test_min = min(vals)
        test_max = max(vals)
        if test_min <= elo_min[i]:
            elo_min[i] = test_min
        if test_max >= ehi_max[i]:
            ehi_max[i] = test_max
    if verbose:
        print(f"In function: time to run B: {time.time() - start:.2f} seconds")
    
    for i,Ei in enumerate([elo,ehi]):
        #Ei = 30000
        
        filter = (eivals==Ei)# & filter_distance
        #filter = filter & (zvals > d-dwidth) & (zvals < d+dwidth)
        
        #vals = Ei - efvals[filter]
        vals = efvals[filter]

        if make_plots:
            plt.subplot(2,2,1)
            plt.hist(vals, bins=100, range=(elo_min[i], ehi_max[i]),label=f'{Ei}', alpha=0.5);
            plt.legend()

        if len(vals) < 10:
            if verbose:
                print("NO DATA TO WORK WITH")
            return -1*np.ones(ngendata,dtype=int)
            #continue

        kde = stats.gaussian_kde(vals)
        xpts = np.linspace(elo_min[i], ehi_max[i], 100)
        ypts = kde(xpts)
        #frequencies = kde.evaluate(xpts)
    
        # Shift up the lower one
        #if i==0:
        #    xpts += delta_e
    
        # Normalize the xpts
        e_range = ehi_max[i] - elo_min[i]
        xpts -= elo_min[i]
        xpts /= e_range

        if make_plots:
            plt.subplot(2,2,2)
            plt.plot(xpts,ypts, label=f'{Ei}')
            #plt.xlim(0,1.1*ehi)
            plt.legend()
    
        efs += xpts.tolist()
        eis += (Ei * np.ones_like(xpts)).tolist()
        frequencies += ypts.tolist()

    if verbose:
        print(f"In function: time to run C: {time.time() - start:.2f} seconds")

    data = np.array([eis, efs])
    #print(data.shape, len(frequencies))
    interp = LinearNDInterpolator(data.T, frequencies )

    if verbose:
        print(f"In function: time to run D: {time.time() - start:.2f} seconds")

    #xpts_temp = np.linspace(elo_min,ehi_max,100)
    xpts_temp = np.linspace(0,1,100)
    
    ypts_temp = interp(E_muon,xpts_temp);
    
    filter = ypts_temp==ypts_temp
    
    # Cut out the nans since some of the points are out of range
    xpts = xpts_temp[filter]
    ypts = ypts_temp[filter]
    
    # Shift the xpoints down
    #xpts -= (ehi - E_muon)

    if make_plots:
        #plt.figure()
        plt.plot(xpts,ypts, label=f'{E_muon}')
        plt.legend()
    
    # Sample points
    dx = xpts[1] - xpts[0]
    #print(dx)
    #print(ypts)
    
    cdf = np.cumsum(ypts)*dx
    #print(cdf)
    
    cdf /= cdf[-1]
    
    #print(cdf)
    if verbose:
        print(f"In function: time to run E: {time.time() - start:.2f} seconds")

    if make_plots:
        plt.figure(figsize=(12,4))
        plt.subplot(1,2,1)
        plt.plot(xpts, cdf, label='CDF')
        #print(cdf)
        plt.legend()
    
    #filter = (xpts>=elo_min) & (xpts<=ehi_max)
    #print(cdf)
    #print(cdf[filter])
    
    spl = CubicSpline(xpts, cdf)

    if verbose:
        print(f"In function: time to run F: {time.time() - start:.2f} seconds")

    gendata = []
    #ngendata = 5000
    nfail = 0
    
    # Scaling
    frac_of_diff = (E_muon - elo) / (ehi - elo)
    #print(f"{frac_of_diff = }")
    e_range_0 = ehi_max[0] - elo_min[0]
    e_range_1 = ehi_max[1] - elo_min[1]
    #print(f"{e_range_0 = }")
    #print(f"{e_range_1 = }")
    
    e_muon_range = e_range_0 + ((e_range_1 - e_range_0)*frac_of_diff)
    e_muon_lo   = elo_min[0] + ((elo_min[1] - elo_min[0])*frac_of_diff)
    #print(f"{e_muon_range = }")
    #print(f"{e_muon_lo = }")
    
    #ngendata = 1
    icount = 0
    
    while icount < ngendata:
        #print(icount, ngendata)
        u = np.random.random() # Generates a float between 0.0 and 1.0
    
        #print(f"In function: time to run G - a: {time.time() - start:.2f} seconds")

        ynew = spl.solve(u)
        #print(f"In function: time to run G - b: {time.time() - start:.2f} seconds")
        
        #xnew = max(ynew)
        #filter = (ynew>0) & (ynew<ehi_max)
        filter = (ynew>0) & (ynew<1)
    
        xnew = ynew[filter]
        if len(xnew) == 1:
            good_val = xnew[0]
            good_val *= e_muon_range
            good_val += e_muon_lo
        else:
            print(u, ynew, xnew)
            nfail += 1
            continue
        #print(good_val)
        #plt.subplot(1,3,2)
        #print(xnew,u)
        #plt.plot(xnew, u, 'ro', markersize=5)
        
        gendata.append(good_val)
        icount += 1

    if make_plots:
        plt.subplot(1,2,2)
        plt.hist(gendata, bins=200)#, range=(0,1));
    
    if verbose:
        print(f"In function: time to run G: {time.time() - start:.2f} seconds")
        print(f'{nfail = }')

    return gendata
    

In [None]:
# Test the energy loss code
e_initials = df_eloss['e_initial'].unique()
zvals = df_eloss['z'].values
eivals = df_eloss['e_initial']
efvals = df_eloss['e']
    
start = time.time()
distance = 1000
distance_width = 0.01 * distance
E_muon = 45000

elo,ehi = find_neighbors(e_initials, E_muon)
filter = (eivals==elo) | (eivals==ehi)
filter = filter & (zvals>distance - distance_width) & (zvals<distance+distance_width)

print("Here")

delta_e = energy_after_traveling_distance(e_initials, zvals[filter], eivals[filter], efvals[filter], E_muon=E_muon, distance=distance, make_plots=False, ngendata=1)
print(f"Time to run: {time.time() - start:.2f} seconds")
de = delta_e[0]
e_final = E_muon - de
print(f"{E_muon:.2f}  {de:.2f}   {e_final:.2f}")


In [None]:
# Make some plots
e_initials = df_eloss['e_initial'].unique()
e_initials


plt.figure(figsize=(12,4))

for idx,ei in enumerate(e_initials[0:2]):
    print(f"ei: {ei}")
    filter = df_eloss['e_initial']==ei

    z = df_eloss[filter]['z']
    ef = ei - df_eloss[filter]['e']

    print(len(z))
    print(len(ef))

    plt.subplot(1,2,idx+1)
    plt.plot(z,ef,'.',markersize=0.1)
    plt.xlabel('z (m)')
    plt.ylabel('Final energy (GeV)')
    plt.title(f'Initial energy: {ei} GeV')

In [None]:
find_neighbors(e_initials, 150)


In [None]:
e_initials = df_eloss['e_initial']
z_vals = df_eloss['z']
ef_vals = df_eloss['e']

In [None]:
from scipy.stats import gaussian_kde
from sklearn.neighbors import KernelDensity


In [None]:
e_initials_unique = df_eloss['e_initial'].unique()


data = {'e_i':[], 'dist':[], 'de_min':[], 'de_max':[]}

for d in [50, 75, 100, 200, 500, 1000]:
    #d = 75 # meters
    dwidth = 0.05 * d
    print(d,dwidth)
    
    for e_i in e_initials_unique:
    
        #############################################
        
        filter_e1 = (e_initials==e_i)
        filter_d1 = (z_vals>=d-dwidth) & (z_vals<=d+dwidth)
        
        de1 = df_eloss[filter_e1 & filter_d1]['e']

        de_min = 0
        de_max = 0
        if len(de1)>0:
            de_min = min(de1)
        if len(de1)>0:
            de_max = max(de1)
    
        #print(e_i, de_min, de_max)
    
        data['e_i'].append(e_i)
        data['de_min'].append(de_min)
        data['de_max'].append(de_max)
        data['dist'].append(d)
    
df_min_max = pd.DataFrame.from_dict(data)

df_min_max

In [None]:
fig,axes = plt.subplots(1,2,figsize=(12,5))

plt.sca(axes[0])
sns.scatterplot(data=df_min_max, x='e_i', y='de_min', hue='dist')

plt.yscale('log')
plt.xscale('log')


plt.sca(axes[1])
sns.scatterplot(data=df_min_max, x='e_i', y='de_max', hue='dist')

plt.yscale('log')
plt.xscale('log')

In [None]:
# Faster

distance = 75
# This needs to be greater than 100, since that is the lowest value we have
E_muon = 190

#e_initials = [110, 120]
e_initials_unique = df_eloss['e_initial'].unique()

#############################################

# Find window of E

# My own 
d = distance
dwidth = 0.05 * d
print(d,dwidth)

# Find the range in which we will look

#print(e_initials)
elo,ehi = find_neighbors(e_initials_unique, E_muon)
delta_e = ehi - elo

print(E_muon, delta_e, elo, ehi)

#############################################

filter_e1 = (e_initials==elo)
filter_e2 = (e_initials==ehi)
filter_d1 = (z_vals>=d-dwidth) & (z_vals<=d+dwidth)

de1 = df_eloss[filter_e1 & filter_d1]['e']
de2 = df_eloss[filter_e2 & filter_d1]['e']


#####################################

kde1 = gaussian_kde(de1)
kde2 = gaussian_kde(de2)

#####################################
print("ranges")
#eranges = ehi_vals - elo_vals
#print(elo_vals,'\n', ehi_vals, '\n', eranges)

def find_de_ranges(de1, de2):
    elo_vals = np.array([min(de1), min(de2)])
    ehi_vals = np.array([max(de1), max(de2)])

    
    elo_min = min(elo_vals)
    ehi_max = max(ehi_vals)

    #print(elo_min, ehi_max)

    return elo_vals, ehi_vals, elo_min, ehi_max
################################################################

elo_vals, ehi_vals, elo_min,ehi_max = find_de_ranges(de1, de2)

xpts = np.linspace(elo_min,E_muon,100)

kde1_vals = kde1.pdf(xpts)
kde2_vals = kde2.pdf(xpts)


fig,axes = plt.subplots(1,3,figsize=(12,4))
axes[0].hist(de1, range=(elo_min,ehi_max), bins=200, density=True)
axes[0].plot(xpts, kde1_vals, lw=2)
axes[0].set_xlabel(r'$\Delta$ E (GeV)')
axes[0].set_ylabel('Final energy (GeV)')
axes[0].set_title(f'Initial energy: {elo} GeV')

axes[1].hist(de2, range=(elo_min,ehi_max), bins=200, density=True)
axes[1].plot(xpts, kde2_vals, lw=2)
axes[1].set_xlabel(r'$\Delta$ E (GeV)')
axes[1].set_ylabel('Final energy (GeV)')
axes[1].set_title(f'Initial energy: {ehi} GeV')

axes[2].hist(de1, range=(elo_min,ehi_max), bins=200, density=True, label=f'E$_i$={elo} GeV')
axes[2].plot(xpts, kde1_vals, lw=2)
axes[2].hist(de2, range=(elo_min,ehi_max), bins=200, density=True, label=f'E$_i$={ehi} GeV')
axes[2].plot(xpts, kde2_vals, lw=2)
axes[2].set_xlabel(r'$\Delta$ E (GeV)')
axes[2].set_ylabel('Final energy (GeV)')
axes[2].legend()



In [None]:
######################################################################
# Normaize the x-scale
e_ranges = ehi_vals - elo_vals
# de1
npts = 500
xpts1 = np.linspace(elo_vals[0], ehi_vals[0], npts)
xpts2 = np.linspace(elo_vals[1], ehi_vals[1], npts)

kde1_norm_vals = kde1.pdf(xpts1)
kde2_norm_vals = kde2.pdf(xpts2)

# Normalize the y-values so we can interpret it as probability density
kde1_norm_vals /= sum(kde1_norm_vals)
kde2_norm_vals /= sum(kde2_norm_vals)

scale_factor = (ehi - E_muon)/(ehi-elo)
print(E_muon, elo, ehi, scale_factor)
kde_between_norm_vals =  (scale_factor*kde1_norm_vals) + ((1-scale_factor)*kde2_norm_vals)
kde_between_norm_vals /= sum(kde_between_norm_vals)



elo_new = (scale_factor*elo_vals[0]) + ((1-scale_factor)*elo_vals[1])
ehi_new = (scale_factor*ehi_vals[0]) + ((1-scale_factor)*ehi_vals[1])
xpts_new = np.linspace(elo_new, ehi_new, npts)

xpts_norm = np.linspace(0,1, npts)

fig,axes = plt.subplots(3,1,figsize=(12,12))
axes[0].plot(xpts1, kde1_norm_vals, lw=2, label=f'{elo}')
axes[0].plot(xpts2, kde2_norm_vals, lw=2,label=f'{ehi}')
axes[0].legend()

axes[1].plot(xpts_norm, kde1_norm_vals, lw=2, label=f'{elo}')
axes[1].plot(xpts_norm, kde2_norm_vals, lw=2, label=f'{ehi}')
axes[1].plot(xpts_norm, kde_between_norm_vals, lw=2, label=f'{E_muon}')
axes[1].legend()

axes[2].plot(xpts1, kde1_norm_vals, lw=2, label=f'{elo}')
axes[2].plot(xpts2, kde2_norm_vals, lw=2,label=f'{ehi}')
axes[2].plot(xpts_new, kde_between_norm_vals, lw=2, label=f'{E_muon}')
axes[2].legend()


print(sum(kde1_norm_vals))
print(sum(kde2_norm_vals))
print(sum(kde_between_norm_vals))

In [None]:
def generate_cdf(xpts, ypts):

    # Sample points
    dx = xpts[1] - xpts[0]
    #print(dx)
    #print(ypts)
    
    cdf = np.cumsum(ypts)*dx
    #print(cdf)
    
    cdf /= cdf[-1]

    return cdf

cdf1 = generate_cdf(xpts1, kde1_norm_vals)
cdf2 = generate_cdf(xpts2, kde2_norm_vals)
cdf_between = generate_cdf(xpts_new, kde_between_norm_vals)

print("Making the spline........")

spl = CubicSpline(xpts_new, cdf_between)


####
spl = CubicSpline(xpts_new, cdf_between)

#new_vals = spl.solve(new_vals_rand)

#print(new_vals_rand)
#print()
enew_min = min(xpts_new)
enew_max = max(xpts_new)

print("Generating random points..........")

new_npts = 5000
ynew_pts = []
new_vals_rand = np.random.random(new_npts)

for i in range(new_npts):
    
    ynew = spl.solve(new_vals_rand[i])
    filter = (ynew>enew_min) & (ynew<enew_max)
    ynew = ynew[filter]
    if len(ynew)==1:
        ynew=ynew[0]
        #print(ynew)
        ynew_pts.append(ynew)
    


In [None]:
fig,axes = plt.subplots(3,1,figsize=(12,12))
axes[0].plot(xpts1, cdf1, lw=2, label=f'{elo}')
axes[0].plot(xpts2, cdf2, lw=2,label=f'{ehi}')
axes[0].plot(xpts_new, cdf_between, lw=2, label=f'{E_muon}')
axes[0].legend()

#axes[1].hist(de1,     range=(elo_min,ehi_max), bins=200, density=True, alpha=0.4, label=f'{elo}')
axes[1].hist(de2,     range=(elo_min,ehi_max), bins=200, density=True, alpha=0.4, label=f'{ehi}')
axes[1].hist(ynew_pts,range=(elo_min,ehi_max), bins=200, density=True, alpha=0.75, label=f'{E_muon}')
axes[1].legend()

axes[2].hist(de1,     range=(elo_min,ehi_max), bins=200, density=True, alpha=0.4, label=f'{elo}')
#axes[1].hist(de2,     range=(elo_min,ehi_max), bins=200, density=True, alpha=0.4, label=f'{ehi}')
axes[2].hist(ynew_pts,range=(elo_min,ehi_max), bins=200, density=True, alpha=0.4, label=f'{E_muon}')
axes[2].legend()


#axes[2].hist(de1, range=(elo_min,ehi_max), bins=200, density=True, alpha=0.4, label=f'{elo}')
#aes[2].hist(de1, range=(elo_min,ehi_max), bins=200, density=True, alpha=0.4, label=f'{ehi}')
#axes[2].hist(ynew_pts,range=(elo_min,ehi_max), bins=200, density=True, alpha=0.75, label=f'{E_muon}')
#axes[2].legend()

;


In [None]:
spl.solve(0.5, extrapolate='periodic')

In [None]:
######################################################################

ax = plt.figure().add_subplot(projection='3d')


for i in range(0,2):
    if i==0:
        y = elo
        z = kde1_vals
    else:
        y = ehi
        z = kde2_vals
    ax.fill_between(xpts, y, z,
                    xpts, y, 0,
                    facecolors='r', alpha=.7)


####################################################

# Frequencies


#e_range = ehi_max[i] - elo_min[i]
#xpts -= elo_min[i]
#xpts /= e_range


https://scikit-learn.org/stable/auto_examples/neighbors/plot_kde_1d.html#sphx-glr-auto-examples-neighbors-plot-kde-1d-py

In [None]:
# Example

import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm

from sklearn.neighbors import KernelDensity

# ----------------------------------------------------------------------
# Plot the progression of histograms to kernels
np.random.seed(1)
N = 20
X = np.concatenate(
    (np.random.normal(0, 1, int(0.3 * N)), np.random.normal(5, 1, int(0.7 * N)))
)[:, np.newaxis]
X_plot = np.linspace(-5, 10, 1000)[:, np.newaxis]
bins = np.linspace(-5, 10, 10)

fig, ax = plt.subplots(2, 2, sharex=True, sharey=True)
fig.subplots_adjust(hspace=0.05, wspace=0.05)

# histogram 1
ax[0, 0].hist(X[:, 0], bins=bins, fc="#AAAAFF", density=True)
ax[0, 0].text(-3.5, 0.31, "Histogram")

# histogram 2
ax[0, 1].hist(X[:, 0], bins=bins + 0.75, fc="#AAAAFF", density=True)
ax[0, 1].text(-3.5, 0.31, "Histogram, bins shifted")

# tophat KDE
kde = KernelDensity(kernel="tophat", bandwidth=0.75).fit(X)
log_dens = kde.score_samples(X_plot)
ax[1, 0].fill(X_plot[:, 0], np.exp(log_dens), fc="#AAAAFF")
ax[1, 0].text(-3.5, 0.31, "Tophat Kernel Density")

# Gaussian KDE
kde = KernelDensity(kernel="gaussian", bandwidth=0.75).fit(X)
log_dens = kde.score_samples(X_plot)
ax[1, 1].fill(X_plot[:, 0], np.exp(log_dens), fc="#AAAAFF")
ax[1, 1].text(-3.5, 0.31, "Gaussian Kernel Density")

for axi in ax.ravel():
    axi.plot(X[:, 0], np.full(X.shape[0], -0.01), "+k")
    axi.set_xlim(-4, 9)
    axi.set_ylim(-0.02, 0.34)

for axi in ax[:, 0]:
    axi.set_ylabel("Normalized Density")

for axi in ax[1, :]:
    axi.set_xlabel("x")

# ----------------------------------------------------------------------
# Plot all available kernels
X_plot = np.linspace(-6, 6, 1000)[:, None]
X_src = np.zeros((1, 1))

fig, ax = plt.subplots(2, 3, sharex=True, sharey=True)
fig.subplots_adjust(left=0.05, right=0.95, hspace=0.05, wspace=0.05)


def format_func(x, loc):
    if x == 0:
        return "0"
    elif x == 1:
        return "h"
    elif x == -1:
        return "-h"
    else:
        return "%ih" % x


for i, kernel in enumerate(
    ["gaussian", "tophat", "epanechnikov", "exponential", "linear", "cosine"]
):
    axi = ax.ravel()[i]
    log_dens = KernelDensity(kernel=kernel).fit(X_src).score_samples(X_plot)
    axi.fill(X_plot[:, 0], np.exp(log_dens), "-k", fc="#AAAAFF")
    axi.text(-2.6, 0.95, kernel)

    axi.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
    axi.xaxis.set_major_locator(plt.MultipleLocator(1))
    axi.yaxis.set_major_locator(plt.NullLocator())

    axi.set_ylim(0, 1.05)
    axi.set_xlim(-2.9, 2.9)

ax[0, 1].set_title("Available Kernels")

# ----------------------------------------------------------------------
# Plot a 1D density example
N = 100
np.random.seed(1)
X = np.concatenate(
    (np.random.normal(0, 1, int(0.3 * N)), np.random.normal(5, 1, int(0.7 * N)))
)[:, np.newaxis]

X_plot = np.linspace(-5, 10, 1000)[:, np.newaxis]

true_dens = 0.3 * norm(0, 1).pdf(X_plot[:, 0]) + 0.7 * norm(5, 1).pdf(X_plot[:, 0])

fig, ax = plt.subplots()
ax.fill(X_plot[:, 0], true_dens, fc="black", alpha=0.2, label="input distribution")
colors = ["navy", "cornflowerblue", "darkorange"]
kernels = ["gaussian", "tophat", "epanechnikov"]
lw = 2

for color, kernel in zip(colors, kernels):
    kde = KernelDensity(kernel=kernel, bandwidth=0.5).fit(X)
    log_dens = kde.score_samples(X_plot)
    ax.plot(
        X_plot[:, 0],
        np.exp(log_dens),
        color=color,
        lw=lw,
        linestyle="-",
        label="kernel = '{0}'".format(kernel),
    )

ax.text(6, 0.38, "N={0} points".format(N))

ax.legend(loc="upper left")
ax.plot(X[:, 0], -0.005 - 0.01 * np.random.random(X.shape[0]), "+k")

ax.set_xlim(-4, 9)
ax.set_ylim(-0.02, 0.4)
plt.show()

In [None]:
#X
X_plot