# Gender Classifier

### The following table displays the gender, height (feet), weight (pound) and length of foot (inches) of 8 individuals.

In [23]:
from IPython.display import HTML, display
import tabulate
table = [["Gender","Height","Weight","Foot"],
         ["M",6,180,12],
         ["M",5.92,190,11],
         ["M",5.58,170,12],
         ["M",5.92,165,10],
         ["F",5,100,6],
         ["F",5.5,150,8],
         ["F",5.42,130,7],
         ["F",5.75,150,9]]
display(HTML(tabulate.tabulate(table, tablefmt='html')))

0,1,2,3
Gender,Height,Weight,Foot
M,6,180,12
M,5.92,190,11
M,5.58,170,12
M,5.92,165,10
F,5,100,6
F,5.5,150,8
F,5.42,130,7
F,5.75,150,9


### We are going to do the followings:
### (1) Divide the data set into two parts by Gender.
### (2) Find the mean and standard deviation of height, weight and foot length by gender.
### (3) Assume normality on all variables, build a simple gender classifier to predict gender of new individuals such as

### Let $X$ be a measure of an individual. Let $C$ be the gender classification variable ($C=0$ for male and $C=1$ for female).
### Suppose that $X=x|C=c$ is normally distributed with mean $\mu_{c}$ and variance $\sigma_{c}^{2}$, for $c=0,1$.
### Given measurements of $n$ individuals randomly selected from the population. Let the observations be $\{(x_{i},c_{i}), i=1,2,\ldots,n\} $
### The prior distribution of gender class is 
$$p(c) =\frac{\sum_{i=1}^{n} I(c_{i}=c) }{n}.$$
### The posterior distribution of the gender class given the observations is given by the Bayes Theorem
$$p(c|x_{1},x_{2},\ldots,x_{n}) \propto \prod_{i=1}^{n} p( X=x_{i}|C=c) p( c), $$
### where
$$p(X=x|C=c) = \frac{1}{\sqrt{2\pi \sigma_{c}^{2}}} \exp{(\frac{-1}{2 \sigma_{c}^{2}} (x-\mu_{c})^{2})}.$$

### For multiple class classification, we classify an individual to class $k$ when
$$k= argmax_{i=1,2,\ldots,n}  p(c=i|x_{1},x_{2},\ldots,x_{n}).$$
### For two class classification, we evaluate the odds ratio
$$odds = \frac{p(c=0|x_{1},x_{2},\ldots,x_{n})}{p(c=1|x_{1},x_{2},\ldots,x_{n})}.$$
### Classify an individual as male if $odds > 1$ and as female otherwise.

In [24]:
table = [["Gender","Height","Weight","Foot"],
         ["?",6,130,8],
         ["?",6.5,190,10]]
display(HTML(tabulate.tabulate(table, tablefmt='html')))

0,1,2,3
Gender,Height,Weight,Foot
?,6,130,8
?,6.5,190,10


In [7]:
from collections import defaultdict
import math

In [6]:
def avg(x):
    return sum(x)/len(x)

In [8]:
def stdev(x):
    y = [(t-avg(x))*(t-avg(x)) for t in x]
    y = sum(y)/(len(y)-1)
    return math.sqrt(y)

In [1]:
persons = [("M",6,180,12),("M",5.92,190,11),("M",5.58,170,12),("M",5.92,165,10),("F",5,100,6),("F",5.5,150,8),("F",5.42,130,7),("F",5.75,150,9)]

In [3]:
height_by_gender = defaultdict(list)
for gender, height, weight, foot in persons:
    height_by_gender[gender].append(height)

In [9]:
summary_height_by_gender = {gender: (round(avg(heights),2),round(stdev(heights),2)) for gender, heights in height_by_gender.items()}

In [10]:
list(summary_height_by_gender.items())

[('M', (5.86, 0.19)), ('F', (5.42, 0.31))]

In [11]:
record_by_gender = defaultdict(list)
for gender, height, weight, foot in persons:
    record_by_gender[gender].append((height,weight,foot))

In [12]:
mean_by_gender = {gender:(round(avg([x for x,y,z in records]),2),round(avg([y for x,y,z in records]),2),round(avg([z for x,y,z in records]),2)) for gender,records in record_by_gender.items()}
std_by_gender = {gender:(round(stdev([x for x,y,z in records]),2),round(stdev([y for x,y,z in records]),2),round(stdev([z for x,y,z in records]),2)) for gender,records in record_by_gender.items()}

In [13]:
list(mean_by_gender.items())

[('M', (5.86, 176.25, 11.25)), ('F', (5.42, 132.5, 7.5))]

In [14]:
list(std_by_gender.items())

[('M', (0.19, 11.09, 0.96)), ('F', (0.31, 23.63, 1.29))]

In [15]:
def normpdf(x, mean, sd):
    var = float(sd)**2
    pi = 3.1415926
    denom = (2*pi*var)**.5
    num = math.exp(-(float(x)-float(mean))**2/(2*var))
    return num/denom

In [16]:
def prod(x,y):
    s = 1
    for v1,v2 in zip(x,y):
        s = float(s)*v1/v2
    return(float(s))

In [18]:
def predict_class(person,p=(0.5,0.5)):
    if p[0]+p[1] != 1:
        print("wrong input")
    else:
        num_list = [normpdf(person[x],mean_by_gender["M"][x],std_by_gender["M"][x]) for x in range(3)]
        den_list = [normpdf(person[x],mean_by_gender["F"][x],std_by_gender["F"][x]) for x in range(3)]
        eval_rel_prob = (float(p[0])/float(p[1]))*prod(num_list,den_list)
        if eval_rel_prob > 1:
            return("M")
        else:
            return("F")

In [19]:
new_person = [(6,130,8),(6.5,190,10)]
class_pred = [predict_class(x) for x in new_person]

In [20]:
class_pred

['F', 'M']