In [1]:
import scipy.stats
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
negdist = scipy.stats.norm(0.4, 0.1)
posdist = scipy.stats.norm(0.7, 0.1)
buckets = np.arange(0, 1.11, 0.01)
midpoints = np.arange(0, 1.11, 0.01)-0.025
negcdfs = []
poscdfs = []
for endpoint in buckets:
    negcdfs.append(negdist.cdf(endpoint))
    poscdfs.append(posdist.cdf(endpoint))
negprobs = []
posprobs = []
negprobs.append(negcdfs[0])
posprobs.append(poscdfs[0])
for idx, midpoint in enumerate(midpoints):
    if idx == 0: continue
    negprobs.append(negcdfs[idx]-negcdfs[idx-1])
    posprobs.append(poscdfs[idx]-poscdfs[idx-1])
midpoints = np.append(midpoints, 1.125)
negprobs.append(1-negcdfs[-1])
posprobs.append(1-poscdfs[-1])

In [3]:
def generateData(ratio, total):
    p = int(ratio*total)
    n = total-p
    E_neg = []
    E_pos = []
    E_por_pos = []
    for idx, point in enumerate(midpoints):
        E_neg.append(negprobs[idx]*n)
        E_pos.append(posprobs[idx]*p)
        E_por_pos.append(E_pos[-1]/(E_neg[-1]+E_pos[-1]))
    return E_neg, E_pos, E_por_pos

In [4]:
font = {'weight' : 'bold', 'size'   : 22}
matplotlib.rc('font', **font)
_, _, E_por_const = generateData(ratio=0.2, total=10000)
for idx, ratio in enumerate(np.arange(0.2, 0.91, 0.01)):
    E_neg, E_pos, E_por_pos = generateData(ratio=ratio, total=10000)
    
    fig, ax1 = plt.subplots(figsize=(10,6))
    ax2 = ax1.twinx()
    ax1.set_title("Fraction of Positives: "+"{:.2f}".format(round(ratio, 2)))
    
    ax1.plot(midpoints, E_pos, '-', color='darkred', linewidth=4, label='expected num of positives')
    ax1.plot(midpoints, E_neg, '-', color='darkgreen', linewidth=4, label='expected num of negatives')
    ax2.plot(midpoints, E_por_pos, '-', color='black', linewidth=4, label='proportion of positives')
    ax2.plot(midpoints, E_por_const, '.', color='black', linewidth=4, label='original proportion')
    
    ax1.set_xlabel('input features')
    ax1.set_ylabel('expectation', color='darkred')
    ax2.set_ylabel('proportion', color='black')
    ax1.set_xlim(-0.1, 1.19)
    ax1.set_ylim(-18, 378)
    ax2.set_ylim(-0.05, 1.05)
    lgd = fig.legend(bbox_to_anchor=(1, 0.75), loc='center left', borderaxespad=0., prop={'size': 12})
    fig.savefig('example_figs/'+str(format(idx, '03'))+'.png', bbox_extra_artists=(lgd,), bbox_inches='tight')
    plt.close()