# Constructing scenarios according to the guidelines described in Allen (2014)

In [1]:
import numpy as np
import pandas as pd
import os

os.chdir('C:\\Users\\renan\\Desktop\\inequality')
from segregation.dissimilarity import Dissim
from segregation.spatial_dissimilarity import Spatial_Dissim
from segregation.entropy import Entropy
from segregation.perimeter_area_ratio_spatial_dissimilarity import Perimeter_Area_Ratio_Spatial_Dissim
from segregation.absolute_centralization import Absolute_Centralization
from segregation.absolute_concentration import Absolute_Concentration
from segregation.atkinson import Atkinson
from segregation.bias_corrected_dissimilarity import Bias_Corrected_Dissim
from segregation.boundary_spatial_dissimilarity import Boundary_Spatial_Dissim
from segregation.conprof import Con_Prof
from segregation.correlationr import Correlation_R
from segregation.delta import Delta
from segregation.density_corrected_dissimilarity import Density_Corrected_Dissim
from segregation.exposure import Exposure
from segregation.gini_seg import Gini_Seg
from segregation.isolation import Isolation
from segregation.modified_dissimilarity import Modified_Dissim
from segregation.modified_gini_seg import Modified_Gini_Seg
from segregation.relative_centralization import Relative_Centralization
from segregation.relative_clustering import Relative_Clustering
from segregation.relative_concentration import Relative_Concentration
from segregation.spatial_exposure import Spatial_Exposure
from segregation.spatial_isolation import Spatial_Isolation
from segregation.spatial_prox_profile import Spatial_Prox_Prof
from segregation.spatial_proximity import Spatial_Proximity

from segregation.infer_segregation import Infer_Segregation

In [2]:
J = 50
a =  0.6 # 1, 0.8, 0.6, 0.4, 0.2, 0.05  # (1 - D) ** 2 / (1 + D) ** 2
q = 1 - a
D = (1 - np.sqrt(a))/(1 + np.sqrt(a))

In [3]:
D

0.1270166537925831

In [4]:
def relation_formula(cum_c1, q):
    cum_c0 = cum_c1 / (1 - q + q * cum_c1)
    return cum_c0

In [5]:
# Building point estimations
init_1 = 1/J

cum_1 = np.zeros(J)
cum_0 = np.zeros(J)

In [6]:
cum_1[0] = init_1
cum_0[0] = relation_formula(cum_1[0], q)

for i in list(range(1,J)):
    cum_1[i] = init_1 * (i+1)
    cum_0[i] = relation_formula(cum_1[i], q)

In [7]:
point_prob_1 = np.concatenate((cum_1[0], np.diff(cum_1)), axis = None)
point_prob_0 = np.concatenate((cum_0[0], np.diff(cum_0)), axis = None)

In [8]:
point_prob_1

array([0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02,
       0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02,
       0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02,
       0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02,
       0.02, 0.02, 0.02, 0.02, 0.02, 0.02])

In [9]:
point_prob_0

array([0.03289474, 0.03204033, 0.03121878, 0.03042843, 0.02966772,
       0.02893519, 0.02822945, 0.02754922, 0.02689329, 0.0262605 ,
       0.02564979, 0.02506014, 0.0244906 , 0.02394025, 0.02340824,
       0.02289377, 0.02239608, 0.02191445, 0.02144818, 0.02099664,
       0.02055921, 0.02013531, 0.01972438, 0.01932591, 0.01893939,
       0.01856436, 0.01820035, 0.01784694, 0.01750373, 0.01717033,
       0.01684636, 0.01653148, 0.01622534, 0.01592762, 0.01563803,
       0.01535627, 0.01508205, 0.01481511, 0.01455519, 0.01430206,
       0.01405547, 0.01381521, 0.01358105, 0.0133528 , 0.01313025,
       0.01291322, 0.01270153, 0.012495  , 0.01229347, 0.01209677])

In [10]:
E_nj = 10
p = 0.20 # Global minority proportion

n = J * E_nj
n1 = n * p
n0 = n* (1 - p)

In [11]:
sim1 = np.random.multinomial(n1, point_prob_1, size = 1)
sim0 = np.random.multinomial(n0, point_prob_0, size = 1)

In [12]:
sim1

array([[0, 1, 0, 2, 3, 2, 4, 2, 1, 3, 3, 1, 0, 1, 3, 2, 2, 0, 1, 1, 1, 5,
        4, 2, 1, 4, 2, 2, 0, 1, 2, 2, 2, 2, 1, 1, 4, 3, 2, 2, 4, 3, 4, 1,
        2, 4, 2, 2, 1, 2]])

In [13]:
sim0

array([[13, 12, 13, 13, 14, 15,  8, 15,  8, 14, 11, 12,  8,  8, 11,  4,
        11,  4,  7,  7,  5, 11,  5,  6,  5,  8,  9,  4, 10,  7,  6,  6,
         4,  4,  7,  7,  3,  7, 13,  8,  5,  6,  6,  5,  6,  5,  3,  9,
         4,  8]])

In [14]:
sim1[0] + sim0[0]

array([13, 13, 13, 15, 17, 17, 12, 17,  9, 17, 14, 13,  8,  9, 14,  6, 13,
        4,  8,  8,  6, 16,  9,  8,  6, 12, 11,  6, 10,  8,  8,  8,  6,  6,
        8,  8,  7, 10, 15, 10,  9,  9, 10,  6,  8,  9,  5, 11,  5, 10])

In [15]:
df = pd.DataFrame({'group_pop_var': sim1[0], 'total_pop_var': sim1[0] + sim0[0]})

In [16]:
index = Dissim(df, 'group_pop_var', 'total_pop_var')

In [17]:
index.statistic

0.3075

In [18]:
index.statistic - D # Bias

0.1804833462074169

# Building generic function that builds data frame according to different hyperparameters

In [153]:
def create_data_frame(q, J, E_nj, p):
    
    def relation_formula(cum_c1, q):
        cum_c0 = cum_c1 / (1 - q + q * cum_c1)
        return cum_c0
    
    # Building point estimations
    init_1 = 1/J

    cum_1 = np.zeros(J)
    cum_0 = np.zeros(J)
    
    cum_1[0] = init_1
    cum_0[0] = relation_formula(cum_1[0], q)

    for i in list(range(1,J)):
        cum_1[i] = init_1 * (i+1)
        cum_0[i] = relation_formula(cum_1[i], q)
    
    point_prob_1 = np.concatenate((cum_1[0], np.diff(cum_1)), axis = None)
    point_prob_0 = np.concatenate((cum_0[0], np.diff(cum_0)), axis = None)
    
    n1 = n * p
    n0 = n* (1 - p)
    
    sim1 = np.random.multinomial(n1, point_prob_1, size = 1)
    sim0 = np.random.multinomial(n0, point_prob_0, size = 1)
    
    df = pd.DataFrame({'group_pop_var': sim1[0], 'total_pop_var': sim1[0] + sim0[0]})
    
    return df

In [210]:
# q = 0, 0.2, 0.4, 0.6, 0.8, 0.95

In [220]:
q = 0.4

In [221]:
df = create_data_frame(q = q, J = 50, E_nj = 10, p = 0.35)

In [222]:
a = 1 - q
D = (1 - np.sqrt(a))/(1 + np.sqrt(a))

In [223]:
D

0.1270166537925831

In [224]:
index_D = Dissim(df, 'group_pop_var', 'total_pop_var')
index_BC = Bias_Corrected_Dissim(df, 'group_pop_var', 'total_pop_var', B = 250)
index_DC = Density_Corrected_Dissim(df, 'group_pop_var', 'total_pop_var')

In [225]:
#index.statistic

In [226]:
index_D.statistic - D

0.12177455499862572

In [227]:
index_BC.statistic - D

0.019648840712911397

In [228]:
index_DC.statistic - D

0.0111905803718379

In [230]:
import libpysal
import geopandas as gpd
s_map = gpd.read_file(libpysal.examples.get_path("sacramentot2.shp"))
df = s_map[['geometry', 'HISP_', 'TOT_POP']]
index = Density_Corrected_Dissim(df, 'HISP_', 'TOT_POP')

In [231]:
index.statistic

0.2952051469981647