In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

sns.set()

# Example from Bayes rule with Python

In [None]:
N = 200
rows = 4
cols = 10

'''
symptoms_deceases =  np.zeros((rows,cols))

for n in range(N):
    
    r = np.random.randint(0,rows)
    c = np.random.randint(0,cols)
    symptoms_deceases[r,c] += 1
    
'''


symptoms_deceases = np.array([[0,0,1,0,3,5,10,7,7,4],
                            [0,1,1,10,16,11,12,7,8,5],
                            [3,5,8,9,14,10,3,3,0,0],
                            [8,9,9,5,4,1,1,0,0,0]])  

flipped = np.flip(symptoms_deceases,0)

df = pd.DataFrame(symptoms_deceases).astype(int)

plt.figure(figsize=(18,12))
plt.grid(True)
plt.xticks(range(0,11))
plt.yticks(range(1,5))
plt.xlabel('disease')
plt.ylabel('symptom')

nr_per_disease = flipped.sum(axis=0)
nr_per_symptom = flipped.sum(axis=1)

print (flipped)
print (nr_per_disease)
print (nr_per_symptom)

plt.subplot(311)
plt.grid(True)
plt.xticks(range(0,11))
plt.yticks(range(1,5))
plt.xlabel('disease')
plt.ylabel('symptom')

for r in range(len(flipped[:,0])):
    
    for c in range(len(flipped[0,:])):
        
        ndots = flipped[r,c]
        
        for i in range(0,ndots):
            
            rr = 0.1 + r + 0.8 * np.random.random()
            cc = 0.1 + c + 0.8 * np.random.random()
            
            plt.scatter(cc,rr,color='orange')
 
plt.subplot(312)
plt.bar(range(len(flipped[0,:])),nr_per_disease,color='orange')
plt.xticks(range(len(flipped[0,:])),range(1,11))
plt.xlabel('disease')
plt.ylabel('number')

plt.subplot(313)
plt.barh(range(len(flipped[:,0])),nr_per_symptom,color='orange')
plt.yticks(range(len(flipped[:,0])),range(1,5))
plt.ylabel('symptom')
plt.xlabel('number')
plt.savefig('joint_margin_plot.jpg',format='jpg')


df['All'] = df.sum(axis=1)
df.loc['All'] = df.sum(axis=0)
df.columns = range(1,12)
df.rename(columns={11:'All'},inplace=True)
df.index = range(4,-1,-1)
df.rename(index={0:'All'},inplace=True)

df.index.name='symptom'  
df.columns.name='disease'

df

Look at disease 5: lkh p(s3|d5) for symptom[3] is 16/37, 0.43, while lkh for symptom[2] is 14/37, 0.38. 
And prior is obviously same for the same disease. Still, posterior is larger for symptom[2]. See graph below.
This is because the posterior is (prior * likelihood) / marginal marginal lkh (symptom), which is the last column
in joint probability

Having the table of numbers above, or the corresponding joint distribution, it's trivial to calculate POSTERIOR,
e.g. p(d5|s3) = 16/71 or 0.08 / 0.355 ; == 0.2253 which by Bayes rule is calculated as:
p(d5|s3) = ( p(s3|d5) * p(d5) ) / p(s3) == (16/37 * 37/200) / 71/200 == (0.43 * 0.185) / 0.355 == 0.2253

And of course, the PRIOR for each disease is given by the last row of the number table, divided by the total number of people,
and LIKELIHOOD for a symptom|disease is given by taking the number in [row,col] and dividing by 'All' for that column

In [None]:
16/37 * 37/200 / (71/200)


In [None]:
### joint probability of symptom and decease 
### each cell holds the joint propability of having that symptom AND that decease

#   p of having symptom x and disease d : p(x,d) divide entry in (x,d) by total population
### marginal probability of symptom (last column), i.e the denominator in Bayes rule, marginal likelihood,
### MLE is the highest value in each row
### marginal probability of disease (last row), also known as PRIOR probability distribution for each decease

distribution = df.iloc[:-1,:-1] / df.iloc[:-1,:-1].sum().sum()
distribution.loc[[1,2,3,4],'All'] = df.loc[[1,2,3,4],'All'] / df.loc[[1,2,3,4],'All'].sum()
distribution.loc['All',:] = df.loc['All'] / df.loc[[1,2,3,4],'All'].sum()
distribution

In [None]:
# conditional probability symptom | disease : LIKELIHOOD FUNCTION for each symptom : each col but not row sums to 1
# p(x|d) = divide entry in (x,d) with total of col d

conditional_symptom_disease = df.iloc[:-1,:-1] / df.iloc[-1,:-1] # div by all in the col for disease
conditional_symptom_disease['All'] = conditional_symptom_disease.sum(axis=1)
conditional_symptom_disease.loc['All',:] = conditional_symptom_disease.sum(axis=0)
conditional_symptom_disease

In [None]:
# conditional probability disease | symptom : POSTERIOR PROBABILITY DISTRIBUTION for each disease
# p(d|x) = divide entry in (x,d) by total in row x
# each row but not col sums to 1
# MAP for each disease given symptom

conditional_disease_symptom = df.iloc[:-1,:-1].div(df.iloc[:-1,-1],axis=0)
conditional_disease_symptom['All'] = conditional_disease_symptom.sum(axis=1)
conditional_disease_symptom.loc['All'] = conditional_disease_symptom.sum(axis=0)
conditional_disease_symptom

In [None]:
prior_dist = distribution.loc['All'][:-1]
posterior_dists = np.zeros((4,10))

for s in range(1,5):
    lkh_sympt = conditional_symptom_disease.loc[s][:-1]
    posterior_dists[s-1,:] = prior_dist * lkh_sympt

div = posterior_dists.sum(axis=1)
posterior = np.array([posterior_dists[i,:] / div[i] for i in range(4)])
posterior = pd.DataFrame(posterior)
posterior.index=range(1,5)
posterior.columns = range(1,11)
posterior.sort_index(ascending=False,inplace=True)
posterior

In [None]:
plt.figure(figsize=(18,12))
plt.title('Posterior probability disease given symptoms : MAP & MLE')
ax = plt.gca()
ax2 = plt.twinx()

ax.plot(posterior.columns,prior_dist,'o--',label='prior',color='k',lw=3,ms=10)

colors = ['blue','orange','red','green']

i = 0
for s in posterior.index:
    
    ax.plot(posterior.columns,posterior.loc[s,:],'x-',
             color=colors[i],label='posterior for symptom {}'.format(s))
    
    ax2.plot(posterior.columns,conditional_symptom_disease.loc[s,:][:-1],'.--',
             color=colors[i],alpha=1.0,label='likelihood for symptom {}'.format(s))
    i += 1
    
ax.set_xlabel('disease')
ax.set_ylabel('posterior probability')
ax2.set_ylabel('likelihood : symtom | disease')
ax.legend(loc='upper left')
ax2.legend(loc='upper right')
plt.savefig('posterior_and_likelihood.jpg',format='jpg')

In [None]:
# pandas operators
df_df = pd.DataFrame({'angles': [0, 3, 4],
                   'degrees': [360, 180, 360]},
                  index=['circle', 'triangle', 'rectangle'])
df_df



In [None]:
df_df.add([1,2,3],axis=0) #axis matches index, so the values of the list are added row-by-row for both columns.
# add 1,2,3 in 1st col, 1,2,3 in second col 


In [None]:
df_df.add([1,2],axis=1) # add 1s in 1st col, 2s in 2d col