# Constructing scenarios according to the guidelines described in Allen (2014)

In [449]:
import numpy as np
import pandas as pd
import os

os.chdir('C:\\Users\\renan\\Desktop\\inequality\\segregation')
from dissimilarity import Dissim

In [450]:
J = 50
a =  0.6 # 1, 0.8, 0.6, 0.4, 0.2, 0.05  # (1 - D) ** 2 / (1 + D) ** 2
q = 1 - a
D = (1 - np.sqrt(a))/(1 + np.sqrt(a))

In [451]:
D

0.1270166537925831

In [452]:
def relation_formula(cum_c1, q):
    cum_c0 = cum_c1 / (1 - q + q * cum_c1)
    return cum_c0

In [453]:
# Building point estimations
init_1 = 1/J

cum_1 = np.zeros(J)
cum_0 = np.zeros(J)

In [454]:
cum_1[0] = init_1
cum_0[0] = relation_formula(cum_1[0], q)

for i in list(range(1,J)):
    cum_1[i] = init_1 * (i+1)
    cum_0[i] = relation_formula(cum_1[i], q)

In [455]:
point_prob_1 = np.concatenate((cum_1[0], np.diff(cum_1)), axis = None)
point_prob_0 = np.concatenate((cum_0[0], np.diff(cum_0)), axis = None)

In [456]:
point_prob_1

array([0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02,
       0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02,
       0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02,
       0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02,
       0.02, 0.02, 0.02, 0.02, 0.02, 0.02])

In [457]:
point_prob_0

array([0.03289474, 0.03204033, 0.03121878, 0.03042843, 0.02966772,
       0.02893519, 0.02822945, 0.02754922, 0.02689329, 0.0262605 ,
       0.02564979, 0.02506014, 0.0244906 , 0.02394025, 0.02340824,
       0.02289377, 0.02239608, 0.02191445, 0.02144818, 0.02099664,
       0.02055921, 0.02013531, 0.01972438, 0.01932591, 0.01893939,
       0.01856436, 0.01820035, 0.01784694, 0.01750373, 0.01717033,
       0.01684636, 0.01653148, 0.01622534, 0.01592762, 0.01563803,
       0.01535627, 0.01508205, 0.01481511, 0.01455519, 0.01430206,
       0.01405547, 0.01381521, 0.01358105, 0.0133528 , 0.01313025,
       0.01291322, 0.01270153, 0.012495  , 0.01229347, 0.01209677])

In [483]:
E_nj = 10
p = 0.20 # Global minority proportion

n = J * E_nj
n1 = n * p
n0 = n* (1 - p)

In [500]:
sim1 = np.random.multinomial(n1, point_prob_1, size = 1)
sim0 = np.random.multinomial(n0, point_prob_0, size = 1)

In [501]:
sim1

array([[1, 1, 1, 2, 0, 1, 4, 0, 0, 4, 4, 3, 1, 2, 1, 2, 1, 2, 1, 4, 3, 2,
        1, 0, 2, 1, 2, 3, 3, 0, 3, 4, 2, 2, 2, 1, 0, 3, 1, 5, 2, 1, 2, 2,
        4, 1, 4, 2, 3, 4]])

In [502]:
sim0

array([[13,  8, 12, 15, 12,  9, 13, 13, 11,  7, 16, 10,  9,  3,  7,  8,
        11, 10,  8,  5, 11,  6,  6,  8,  7,  6,  7,  3,  8,  6, 10,  1,
         9, 10,  5,  6,  7,  6,  8,  8,  7,  6,  4,  8,  7,  8,  4,  7,
         5,  6]])

In [503]:
sim1[0] + sim0[0]

array([14,  9, 13, 17, 12, 10, 17, 13, 11, 11, 20, 13, 10,  5,  8, 10, 12,
       12,  9,  9, 14,  8,  7,  8,  9,  7,  9,  6, 11,  6, 13,  5, 11, 12,
        7,  7,  7,  9,  9, 13,  9,  7,  6, 10, 11,  9,  8,  9,  8, 10])

In [504]:
df = pd.DataFrame({'group_pop_var': sim1[0], 'total_pop_var': sim1[0] + sim0[0]})

In [505]:
index = Dissim(df, 'group_pop_var', 'total_pop_var')

In [506]:
index.statistic

0.325

In [507]:
index.statistic - D

0.19798334620741692