# Constructing scenarios according to the guidelines described in Allen (2014)

In [367]:
import numpy as np
import pandas as pd
import os
import time

os.chdir('C:\\Users\\renan\\Desktop\\inequality')
from segregation.dissimilarity import Dissim
from segregation.spatial_dissimilarity import Spatial_Dissim
from segregation.entropy import Entropy
from segregation.perimeter_area_ratio_spatial_dissimilarity import Perimeter_Area_Ratio_Spatial_Dissim
from segregation.absolute_centralization import Absolute_Centralization
from segregation.absolute_concentration import Absolute_Concentration
from segregation.atkinson import Atkinson
from segregation.bias_corrected_dissimilarity import Bias_Corrected_Dissim
from segregation.boundary_spatial_dissimilarity import Boundary_Spatial_Dissim
from segregation.conprof import Con_Prof
from segregation.correlationr import Correlation_R
from segregation.delta import Delta
from segregation.density_corrected_dissimilarity import Density_Corrected_Dissim
from segregation.exposure import Exposure
from segregation.gini_seg import Gini_Seg
from segregation.isolation import Isolation
from segregation.modified_dissimilarity import Modified_Dissim
from segregation.modified_gini_seg import Modified_Gini_Seg
from segregation.relative_centralization import Relative_Centralization
from segregation.relative_clustering import Relative_Clustering
from segregation.relative_concentration import Relative_Concentration
from segregation.spatial_exposure import Spatial_Exposure
from segregation.spatial_isolation import Spatial_Isolation
from segregation.spatial_prox_profile import Spatial_Prox_Prof
from segregation.spatial_proximity import Spatial_Proximity

from segregation.infer_segregation import Infer_Segregation

In [232]:
J = 50
a =  0.6 # 1, 0.8, 0.6, 0.4, 0.2, 0.05  # (1 - D) ** 2 / (1 + D) ** 2
q = 1 - a
D = (1 - np.sqrt(a))/(1 + np.sqrt(a))

In [233]:
D

0.1270166537925831

In [234]:
def relation_formula(cum_c1, q):
    cum_c0 = cum_c1 / (1 - q + q * cum_c1)
    return cum_c0

In [235]:
# Building point estimations
init_1 = 1/J

cum_1 = np.zeros(J)
cum_0 = np.zeros(J)

In [236]:
cum_1[0] = init_1
cum_0[0] = relation_formula(cum_1[0], q)

for i in list(range(1,J)):
    cum_1[i] = init_1 * (i+1)
    cum_0[i] = relation_formula(cum_1[i], q)

In [237]:
point_prob_1 = np.concatenate((cum_1[0], np.diff(cum_1)), axis = None)
point_prob_0 = np.concatenate((cum_0[0], np.diff(cum_0)), axis = None)

In [238]:
point_prob_1

array([0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02,
       0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02,
       0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02,
       0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02,
       0.02, 0.02, 0.02, 0.02, 0.02, 0.02])

In [239]:
point_prob_0

array([0.03289474, 0.03204033, 0.03121878, 0.03042843, 0.02966772,
       0.02893519, 0.02822945, 0.02754922, 0.02689329, 0.0262605 ,
       0.02564979, 0.02506014, 0.0244906 , 0.02394025, 0.02340824,
       0.02289377, 0.02239608, 0.02191445, 0.02144818, 0.02099664,
       0.02055921, 0.02013531, 0.01972438, 0.01932591, 0.01893939,
       0.01856436, 0.01820035, 0.01784694, 0.01750373, 0.01717033,
       0.01684636, 0.01653148, 0.01622534, 0.01592762, 0.01563803,
       0.01535627, 0.01508205, 0.01481511, 0.01455519, 0.01430206,
       0.01405547, 0.01381521, 0.01358105, 0.0133528 , 0.01313025,
       0.01291322, 0.01270153, 0.012495  , 0.01229347, 0.01209677])

In [240]:
E_nj = 10
p = 0.20 # Global minority proportion

n = J * E_nj
n1 = n * p
n0 = n * (1 - p)

In [241]:
sim1 = np.random.multinomial(n1, point_prob_1, size = 1)
sim0 = np.random.multinomial(n0, point_prob_0, size = 1)

In [242]:
sim1

array([[0, 2, 5, 2, 2, 1, 5, 4, 0, 5, 1, 1, 0, 3, 2, 0, 4, 2, 1, 1, 3, 1,
        3, 4, 4, 2, 1, 1, 0, 0, 2, 1, 2, 6, 2, 2, 1, 3, 1, 0, 0, 2, 3, 2,
        2, 2, 1, 3, 2, 3]])

In [243]:
sim0

array([[19, 12, 12, 12, 17, 12, 16,  8,  8, 12, 11, 10,  7,  9, 10,  5,
        10,  7,  8,  5,  3, 14,  5, 10, 10,  4,  6, 14,  7,  6,  6,  4,
         5,  6,  7,  8,  8,  4,  5,  5,  4,  1,  6,  5,  7,  4,  7,  7,
         6,  6]])

In [244]:
sim1[0] + sim0[0]

array([19, 14, 17, 14, 19, 13, 21, 12,  8, 17, 12, 11,  7, 12, 12,  5, 14,
        9,  9,  6,  6, 15,  8, 14, 14,  6,  7, 15,  7,  6,  8,  5,  7, 12,
        9, 10,  9,  7,  6,  5,  4,  3,  9,  7,  9,  6,  8, 10,  8,  9])

In [245]:
df = pd.DataFrame({'group_pop_var': sim1[0], 'total_pop_var': sim1[0] + sim0[0]})

In [246]:
index = Dissim(df, 'group_pop_var', 'total_pop_var')

In [247]:
index.statistic

0.33999999999999997

In [248]:
index.statistic - D # Bias

0.21298334620741688

# Building generic function that builds data frame according to different hyperparameters

In [380]:
def create_data_frame(q, E_nj, p, J = 50):
    
    def relation_formula(cum_c1, q):
        cum_c0 = cum_c1 / (1 - q + q * cum_c1)
        return cum_c0
    
    # Building point estimations
    init_1 = 1/J

    cum_1 = np.zeros(J)
    cum_0 = np.zeros(J)
    
    cum_1[0] = init_1
    cum_0[0] = relation_formula(cum_1[0], q)

    for i in list(range(1,J)):
        cum_1[i] = init_1 * (i+1)
        cum_0[i] = relation_formula(cum_1[i], q)
    
    point_prob_1 = np.concatenate((cum_1[0], np.diff(cum_1)), axis = None)
    point_prob_0 = np.concatenate((cum_0[0], np.diff(cum_0)), axis = None)
    
    n = J * E_nj
    n1 = n * p
    n0 = n * (1 - p)
    
    sim1 = np.random.multinomial(n1, point_prob_1, size = 1)
    sim0 = np.random.multinomial(n0, point_prob_0, size = 1)
    
    df = pd.DataFrame({'group_pop_var': sim1[0], 'total_pop_var': sim1[0] + sim0[0]})
    
    return df

In [381]:
# q = 0, 0.2, 0.4, 0.6, 0.8, 0.95

In [382]:
q = 0.4

In [383]:
df = create_data_frame(q = q, E_nj = 10, p = 0.35, J = 50)

In [384]:
a = 1 - q
D = (1 - np.sqrt(a))/(1 + np.sqrt(a))

In [385]:
D

0.1270166537925831

In [386]:
index_D = Dissim(df, 'group_pop_var', 'total_pop_var')
index_BC = Bias_Corrected_Dissim(df, 'group_pop_var', 'total_pop_var', B = 250)
index_DC = Density_Corrected_Dissim(df, 'group_pop_var', 'total_pop_var')

In [387]:
index_D.statistic - D

0.19606026928434

In [388]:
index_BC.statistic - D

0.10725323631730704

In [389]:
index_DC.statistic - D

0.09448538898288122

---
---
---

# Building the single inference framework

In [372]:
# = [50]
q = [0, 0.2, 0.4, 0.6, 0.8, 0.95]
E_nj = [10, 30, 50]
p = [0.05, 0.10, 0.20, 0.35]

In [373]:
# Specific Function that perform simulation study:
def very_specific_segregation_single_profile(q,
                                             E_nj,
                                             p,
                                             
                                             index_name,
                                             
                                             iterations = 2, 
                                             null_approach = "systematic"):
    
    gdf_1 = create_data_frame(q, E_nj, p, J = 50)
    
    if index_name == "Dissim":
        I_1 = Dissim(gdf_1, 'group_pop_var', 'total_pop_var')

    if index_name == "Gini_Seg":
        I_1 = Gini_Seg(gdf_1, 'group_pop_var', 'total_pop_var')
        
    if index_name == "Entropy":
        I_1 = Entropy(gdf_1, 'group_pop_var', 'total_pop_var')
    
    if index_name == "Atkinson":
        I_1 = Atkinson(gdf_1, 'group_pop_var', 'total_pop_var')
        
    if index_name == "Exposure":
        I_1 = Exposure(gdf_1, 'group_pop_var', 'total_pop_var')
        
    if index_name == "Isolation":
        I_1 = Isolation(gdf_1, 'group_pop_var', 'total_pop_var')

    if index_name == "Con_Prof":
        I_1 = Con_Prof(gdf_1, 'group_pop_var', 'total_pop_var')

    if index_name == "Bias_Corrected_Dissim":
        I_1 = Bias_Corrected_Dissim(gdf_1, 'group_pop_var', 'total_pop_var')

    if index_name == "Density_Corrected_Dissim":
        I_1 = Density_Corrected_Dissim(gdf_1, 'group_pop_var', 'total_pop_var')

    if index_name == "Correlation_R":
        I_1 = Correlation_R(gdf_1, 'group_pop_var', 'total_pop_var')

    if index_name == "Modified_Dissim":
        I_1 = Modified_Dissim(gdf_1, 'group_pop_var', 'total_pop_var')
        
    if index_name == "Modified_Gini_Seg":
        I_1 = Modified_Gini_Seg(gdf_1, 'group_pop_var', 'total_pop_var')
        
    if index_name == "Spatial_Dissim":
        I_1 = Spatial_Dissim(gdf_1, 'group_pop_var', 'total_pop_var')

    if index_name == "Perimeter_Area_Ratio_Spatial_Dissim":
        I_1 = Perimeter_Area_Ratio_Spatial_Dissim(gdf_1, 'group_pop_var', 'total_pop_var')

    if index_name == "Boundary_Spatial_Dissim":
        I_1 = Boundary_Spatial_Dissim(gdf_1, 'group_pop_var', 'total_pop_var')

    if index_name == "Absolute_Centralization":
        I_1 = Absolute_Centralization(gdf_1, 'group_pop_var', 'total_pop_var')

    if index_name == "Absolute_Concentration":
        I_1 = Absolute_Concentration(gdf_1, 'group_pop_var', 'total_pop_var')

    if index_name == "Delta":
        I_1 = Delta(gdf_1, 'group_pop_var', 'total_pop_var')
    
    if index_name == "Relative_Centralization":
        I_1 = Relative_Centralization(gdf_1, 'group_pop_var', 'total_pop_var')

    if index_name == "Relative_Clustering":
        I_1 = Relative_Clustering(gdf_1, 'group_pop_var', 'total_pop_var')

    if index_name == "Relative_Concentration":
        I_1 = Relative_Concentration(gdf_1, 'group_pop_var', 'total_pop_var')

    if index_name == "Spatial_Exposure":
        I_1 = Spatial_Exposure(gdf_1, 'group_pop_var', 'total_pop_var')

    if index_name == "Spatial_Isolation":
        I_1 = Spatial_Isolation(gdf_1, 'group_pop_var', 'total_pop_var')

    if index_name == "Spatial_Prox_Prof":
        I_1 = Spatial_Prox_Prof(gdf_1, 'group_pop_var', 'total_pop_var')

    if index_name == "Spatial_Proximity":
        I_1 = Spatial_Proximity(gdf_1, 'group_pop_var', 'total_pop_var')

    t0 = time.time()

    aux = Infer_Segregation(I_1,
                            null_approach = null_approach, 
                            iterations = iterations)
    t1 = time.time()
    total = t1 - t0
    print('Total Time of {} = {}'.format(index_name, total))
    
    return aux, total

In [379]:
list_of_index_name = ['Dissim', 
                      'Gini_Seg', 
                      'Entropy', 
                      'Atkinson', 
                      'Exposure', 
                      'Isolation', 
                      'Con_Prof', 
                      'Bias_Corrected_Dissim', 
                      'Density_Corrected_Dissim', 
                      'Correlation_R', 
                      'Modified_Dissim', 
                      'Modified_Gini_Seg',
                      'Spatial_Dissim',
                      'Perimeter_Area_Ratio_Spatial_Dissim',
                      'Boundary_Spatial_Dissim',
                      'Absolute_Centralization',
                      #'Absolute_Concentration',
                      'Delta',
                      'Relative_Centralization',
                      'Relative_Clustering',
                      #'Relative_Concentration',
                      'Spatial_Exposure',
                      'Spatial_Isolation',
                      'Spatial_Prox_Prof',
                      'Spatial_Proximity']

In [376]:
result = list(map(very_specific_segregation_single_profile, 
                  #J,
                  q,
                  E_nj,
                  p,
               
                  list_of_index_name))

Total Time of Dissim = 0.03388833999633789
Total Time of Gini_Seg = 0.03390836715698242
Total Time of Entropy = 0.025754451751708984


In [378]:
?map