In [1]:
import pandas as pd
import numpy as np
import bisect
import scipy.stats as stats
from matplotlib import pyplot as plt


In [2]:
N_BINS = 30
BIN_SIZE = np.array([1, 0.5])

FIGSIZE=(10, 5)

In [3]:
def get_bin(a, x):
    ind = bisect.bisect(a, x)
    if ind == len(a):
        ind -= 1
    return ind

In [4]:
class WeigthHeightData:
    def __init__(self, name, df, dataedges=None):
        self.df = pd.DataFrame(df[['Peso', 'Altura', 'Genero']])
        self.name = str(name).lower()
        
        ## histogram from samples
        bins = dataedges if dataedges is not None else N_BINS
        self.hist, self.pedges, self.aedges = np.histogram2d(
            self.df['Peso'], self.df['Altura'], bins=bins)
        
        ## pdf assuming joint gaussians
        self.cov = df[['Peso', 'Altura']].cov()
        self.mean = df[['Peso', 'Altura']].mean()
        
        self.dist = stats.multivariate_normal(mean=self.mean, cov=self.cov)
        
        return
    
    def get_prob_gaussian(x, bin_size):
        return self.dist.cdf(x+BIN_SIZE/2)-dist.cdf(x-BIN_SIZE/2)
    
    def get_prob_hist(x):
        self.hist[get_bin(self.pedges, x[0])][get_bin(self.aedges, x[1])]
    

In [5]:
def plot_hist_and_gaussian(data):
    # create axis
    _, axes = plt.subplots(
        1, 2, figsize=FIGSIZE, gridspec_kw={'width_ratios': [1, 1]}, sharey=True)

    # plot histogram 
    axes[0].imshow(data.hist, origin='lower', extent=(
        self.pedges[0], self.pedges[-1], self.aedges[0], self.aedges[-1]))
    
    # plot gaussian pdf
    pesos, alturas = np.mgrid[data.pedges[0]:data.pedges[-1]:1,
                              data.aedges[0]:data.aedges[-1]:1]
    pa = np.dstack((pesos, alturas))
    axes[1].contourf(pesos, alturas, self.dist.pdf(pa))
    
    # final plot settings
    for i, title in enumerate(['Histograma a partir de muestras', 
                            'Curvas de nivel de pdf gaussiana']):
        axes[i].set_title(title)
        axes[i].axis('scaled')
    
    # show results
    print(f'\nGráficos correspondientes a {self.name}')
    plt.show()