### Model within a limited range of errors: maximum coverage

Read individual data and convert

- Geographic level: tract
- Attributes: VOTINGAGE (2) $*$ HISPANIC (2) $*$ CENRACE (63)

In [1]:
import pandas as pd
import numpy as np

filename_hist = 'data/franklin_hist.csv'
hist = pd.read_csv(filename_hist)

# block to tract
hist['TRACT'] = hist['GEOID10'].astype(str).str[:11]
col_names = hist.columns.to_numpy()
col_names = np.delete(col_names, [0, -1])
hist = hist.groupby('TRACT').sum()[col_names]
hist = hist.reset_index()
hist

Unnamed: 0,TRACT,00000000,00000001,00000002,00000003,00000004,00000005,00000006,00000007,00000008,...,07010153,07010154,07010155,07010156,07010157,07010158,07010159,07010160,07010161,07010162
0,39049000110,438,6,0,7,0,4,5,1,0,...,0,0,0,0,0,0,0,0,0,0
1,39049000120,467,4,0,11,0,2,2,0,2,...,0,0,0,0,0,0,0,0,0,0
2,39049000210,430,11,0,6,0,4,4,1,4,...,0,0,0,0,0,0,0,0,0,0
3,39049000220,653,3,1,12,0,6,7,0,6,...,0,0,0,0,0,0,0,0,0,0
4,39049000310,313,205,0,17,0,2,19,0,4,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
279,39049010500,1821,35,1,409,0,2,5,2,22,...,0,0,0,0,0,0,0,0,0,0
280,39049010601,1408,36,0,146,0,3,12,0,15,...,0,0,0,0,0,0,0,0,0,0
281,39049010602,1585,45,3,139,0,4,7,2,9,...,0,0,0,0,0,0,0,0,0,0
282,39049010700,83,11,0,4,0,0,2,0,0,...,0,0,0,0,0,0,0,0,0,0


In [2]:
# HHGQ (8) $*$ VOTINGAGE (2) $*$ HISPANIC (2) $*$ CENRACE (63) to VOTINGAGE (2) $*$ HISPANIC (2) $*$ RACE (7)
n2, n3, n4 = 2, 2, 63

for y in range(n2):  # voting age
    y = '{number:0{width}d}'.format(width=2, number=y)
    col_names = [col for col in hist.columns if y in col[2:4] and len(col)==8]

    for z in range(n3):  # ethnicity
        z = '{number:0{width}d}'.format(width=2, number=z)
        col_names2 = [col for col in col_names if z in col[4:6]]

        col_two_or_more_races = []
        for x in range(n4):  # race
            if x >= 0 and x <= 5:
                x = '{number:0{width}d}'.format(width=2, number=x)
                col_names3 = [col for col in col_names2 if x in col[6:8]]
                hist[x + y + z] = hist[col_names3].sum(axis=1)
            else:
                x = '{number:0{width}d}'.format(width=2, number=x)
                col_names3 = [col for col in col_names2 if x in col[6:8]]
                col_two_or_more_races.extend(col_names3)
        hist[x + y + '06'] = hist[col_two_or_more_races].sum(axis=1)

hist.drop([col for col in hist.columns if len(col)==8], axis=1, inplace=True)
hist

Unnamed: 0,TRACT,000000,010000,020000,030000,040000,050000,620006,000001,010001,...,030100,040100,050100,620106,000101,010101,020101,030101,040101,050101
0,39049000110,438,6,0,7,0,4,1,29,0,...,29,0,2,0,17,3,0,0,0,6
1,39049000120,467,4,0,11,0,2,0,13,0,...,31,1,0,0,18,0,0,0,0,4
2,39049000210,430,11,0,6,0,4,0,10,0,...,26,0,2,0,34,0,0,0,0,4
3,39049000220,653,3,1,12,0,6,5,20,0,...,43,0,6,1,16,0,2,0,0,8
4,39049000310,313,205,0,17,0,2,3,15,1,...,53,2,5,1,40,1,1,0,0,36
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
279,39049010500,1821,35,1,409,0,2,4,27,0,...,768,1,8,0,70,0,1,0,0,19
280,39049010601,1408,36,0,146,0,3,2,16,3,...,307,0,2,1,86,0,0,0,0,13
281,39049010602,1585,45,3,139,0,4,1,13,1,...,281,0,8,0,71,2,2,0,0,25
282,39049010700,83,11,0,4,0,0,0,6,1,...,57,1,2,0,10,0,0,0,0,14


Model inputs

In [3]:
import numpy as np

# define all the input data for the model
N, K = hist.shape[0], hist.shape[1] - 1
T = 5
P = 100

V = []
for k in range(1, K+1):
    V.append(hist.index[hist.iloc[:,k] == 1].tolist())

count = 0
for listElem in V:
    count += len(listElem)  
print(count)

W = hist.iloc[:,1:].apply(lambda x: 1 + 1 / x).to_numpy()
W = np.nan_to_num(W, posinf=999) 
print(W.shape, W[0])

668
(284, 26) [  1.00228311   1.16666667 999.           1.14285714 999.
   1.25         2.           1.03448276 999.           2.
 999.         999.           2.           1.0003659    1.03225806
   2.           1.03448276 999.           1.5        999.
   1.05882353   1.33333333 999.         999.         999.
   1.16666667]


Coverage I: all except the origin and other uniques

In [4]:
import numpy as np

## define coverage aijk
A = np.ones((N, N, K))

for i in range(N): 
    for j in range(N):
        for k in range(K):
            if i == j or j in V[k]:
                A[i, j, k] = 0

Coverage II: neighboring

In [5]:
import numpy as np
import geopandas as gpd
from pysal.lib import weights

filename_gdf = 'data/franklin_tract10.json'
gdf = gpd.read_file(filename_gdf)
gdf['GEOID10'] = gdf['GEOID10'].astype(str)
wr = weights.distance.KNN.from_dataframe(gdf, k=10)
print(wr.neighbors[0])

## define coverage aijk
A = np.zeros((N, N, K))
for i in wr.neighbors:
    neighbors_idx = wr.neighbors[i]
    for j in neighbors_idx:
        geoid = gdf.loc[[j],'GEOID10'].values[0]
        idx = hist.loc[hist["TRACT"] == geoid].index[0]
        for k in range(K):
            if j not in V[k]:
                A[i, j, k] = 1
A        

Can't load requested DLL: C:\Users\10716\AppData\Local\Programs\Python\Python37\lib\site-packages\osgeo\gdalplugins\ogr_FileGDB.dll
126: The specified module could not be found.

Can't load requested DLL: C:\Users\10716\AppData\Local\Programs\Python\Python37\lib\site-packages\osgeo\gdalplugins\ogr_FileGDB.dll
126: The specified module could not be found.

Can't load requested DLL: C:\Users\10716\AppData\Local\Programs\Python\Python37\lib\site-packages\osgeo\gdalplugins\ogr_FileGDB.dll
126: The specified module could not be found.

Can't load requested DLL: C:\Users\10716\AppData\Local\Programs\Python\Python37\lib\site-packages\osgeo\gdalplugins\ogr_FileGDB.dll
126: The specified module could not be found.

  from .sqlite import head_to_sql, start_sql


[4, 90, 89, 276, 12, 123, 171, 1, 180, 13]


array([[[0., 0., 0., ..., 0., 0., 0.],
        [1., 1., 1., ..., 1., 1., 1.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [1., 1., 1., ..., 1., 1., 1.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [1., 1., 1., ..., 1., 1., 1.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0.

Run model

In [6]:
import pandas as pd
from gurobipy import Model, GRB, LinExpr, quicksum

# initialize model
m = Model('td')
# m.Params.LogToConsole = 0

# add objective function
obj = LinExpr()

# add decision variables and objective function
z, x = {}, {}     ## decision vairable
for k in range(K):
    if len(V[k]) == 0:
        continue
    for i in V[k]:
        # decision variables
        z[i, k] = m.addVar(vtype=GRB.BINARY, name="z_%d_%d"%(i, k))
        m.update()
        # objective
        obj += z[i, k]
        for j in range(N):
            x[i, j, k] = m.addVar(vtype=GRB.BINARY, name="x_%d_%d_%d"%(i, j, k))

m.setObjective(obj, GRB.MAXIMIZE)

# add constraints
for k in range(K):
    if len(V[k]) == 0:
        continue
    for i in V[k]:
        m.addConstr(quicksum(x[i, j, k] for j in range(N)) <= 1)
        m.addConstr(quicksum(A[i, j, k] * x[i, j, k] for j in range(N)) >= z[i, k])

m.addConstr(quicksum(quicksum(quicksum(W[j, k] * x[i, j, k] for i in V[k]) for k in range(K)) for j in range(N)) <= P)

for j in range(N):
    m.addConstr(quicksum(quicksum(x[i, j, k] for i in V[k]) for k in range(K)) <= T)

m.update()
m.optimize()

for var in m.getVars():
    print(var.VarName, var.X)

Academic license - for non-commercial use only - expires 2022-08-05
Using license file C:\Users\10716\gurobi.lic
Gurobi Optimizer version 9.1.1 build v9.1.1rc0 (win64)
Thread count: 2 physical cores, 4 logical processors, using up to 4 threads
Optimize a model with 1621 rows, 190380 columns and 575320 nonzeros
Model fingerprint: 0x128ce396
Variable types: 0 continuous, 190380 integer (190380 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+03]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+02]
Found heuristic solution: objective -0.0000000
Presolve removed 848 rows and 188960 columns
Presolve time: 0.55s
Presolved: 773 rows, 1420 columns, 4224 nonzeros
Variable types: 0 continuous, 1420 integer (1387 binary)

Root relaxation: objective 9.707296e+01, 360 iterations, 0.00 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node T