In [1]:
%matplotlib notebook
import matplotlib.pyplot as plt
plt.ioff()

import numpy as np

import math

## Data Preparation

### Initialization of Values

In [2]:
M_cnt = 13
F_cnt = 10
q_cnt = 10

In [3]:
M_dist = np.array([[0, 2, 5, 4, 2],
                  [1, 2, 4, 3, 3],
                  [0, 2, 5, 6, 0],
                  [0, 2, 6, 3, 2],
                  [0, 3, 2, 3, 5],
                  [0, 4, 6, 1, 2],
                  [2, 4, 5, 1, 1],
                  [1, 4, 3, 3, 2],
                  [0, 4, 3, 4, 2],
                  [1, 5, 7, 0, 0]])

In [4]:
F_dist = np.array([[0, 0, 9, 1, 0],
                  [1, 3, 5, 1, 0],
                  [0, 0, 4, 3, 3],
                  [0, 3, 3, 4, 0],
                  [1, 3, 4, 1, 1],
                  [0, 8, 0, 1, 1],
                  [1, 5, 2, 2, 0],
                  [1, 1, 5, 1, 2],
                  [0, 3, 3, 2, 2],
                  [0, 2, 4, 4, 0]])

In [5]:
def expand_dist(X):
    return np.array([[0]*x[0] +
                     [1]*x[1] +
                     [2]*x[2] +
                     [3]*x[3] +
                     [4]*x[4] for x in X])

In [6]:
M = expand_dist(M_dist)
M

array([[1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4],
       [0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4],
       [1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3],
       [1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4],
       [1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4],
       [1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 4, 4],
       [0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 4],
       [0, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4],
       [1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4],
       [0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2]])

In [7]:
F = expand_dist(F_dist)
F

array([[2, 2, 2, 2, 2, 2, 2, 2, 2, 3],
       [0, 1, 1, 1, 2, 2, 2, 2, 2, 3],
       [2, 2, 2, 2, 3, 3, 3, 4, 4, 4],
       [1, 1, 1, 2, 2, 2, 3, 3, 3, 3],
       [0, 1, 1, 1, 2, 2, 2, 2, 3, 4],
       [1, 1, 1, 1, 1, 1, 1, 1, 3, 4],
       [0, 1, 1, 1, 1, 1, 2, 2, 3, 3],
       [0, 1, 2, 2, 2, 2, 2, 3, 4, 4],
       [1, 1, 1, 2, 2, 2, 3, 3, 4, 4],
       [1, 1, 2, 2, 2, 2, 3, 3, 3, 3]])

### Checking the Validity of the Arrays

In [8]:
def check_array(X, X_dist, cnt):
    for i in range(10):
        if sum(X_dist[i]) != cnt:
            return False
        if len(X[i]) != cnt:
            return False
    return True

In [9]:
check_array(M, M_dist, M_cnt)

True

In [10]:
check_array(F, F_dist, F_cnt)

True

## Plots

## Data Analysis

##### Sum

In [11]:
M_dist_sum = M_dist.T.sum(axis=1)
M_dist_sum

array([ 5, 32, 46, 28, 19])

In [12]:
sum(M_dist_sum)

130

In [13]:
F_dist_sum = F_dist.T.sum(axis=1)
F_dist_sum

array([ 4, 28, 39, 20,  9])

In [14]:
sum(F_dist_sum)

100

##### Means

In [15]:
M.mean()

2.1846153846153844

In [16]:
F.mean()

2.02

##### Standard Deviation

In [17]:
np.std(M)

1.079776439714867

In [18]:
np.std(F)

0.9997999799959989

### Z-Test

claim: Males and Females have equally effective listening habits

H0: Males and Females have equally effective listening habits
H1: Males and Females have unequally effective listening habits

$\alpha$ = 0.05

In [19]:
def calc_z_value(X0, X1):
    return ((X0.mean() - X1.mean()) / 
            math.sqrt((np.std(X0)**2/X0.size) + (np.std(X1)**2/X1.size)))

In [20]:
z = calc_z_value(M, F)
z

1.1953598141296586

Do not reject the null hypothesis, since -1.96 < 0.7209 < 1.96

In [29]:
for i in range(10):
    z_i = calc_z_value(M[i], F[i])
    res = "Equally effective"
    if z_i < -1.96:
        res = "Females are better"
    elif z_i > 1.96:
        res = "Males are better"
    print("Habit #{}: {}".format(i+1, res))

Habit #1: Equally effective
Habit #2: Equally effective
Habit #3: Equally effective
Habit #4: Equally effective
Habit #5: Males are better
Habit #6: Equally effective
Habit #7: Equally effective
Habit #8: Equally effective
Habit #9: Equally effective
Habit #10: Females are better
