In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import seaborn as sns
from PIL import Image, ImageOps
from plotnine import (ggplot, aes, geom_map, geom_text, geom_label, 
                      ggtitle, element_blank, element_rect, 
                      scale_fill_manual, theme_minimal, theme) 
from pulp import (LpProblem, LpMinimize, LpVariable, lpSum, 
                  PULP_CBC_CMD, GLPK_CMD, LpStatus, value) 

In [2]:
df=pd.read_csv('census.csv')
df.head()

Unnamed: 0,county_id,county,population,COUNTYFP10,latitude,longitude
0,0,Adair,7496,1,41.328528,-94.478164
1,1,Adams,3704,3,41.021656,-94.696906
2,2,Allamakee,14061,5,43.274964,-91.382751
3,3,Appanoose,12317,7,40.744683,-92.870345
4,4,Audubon,5674,9,41.679178,-94.904312


In [3]:
df['COUNTYFP10']=df['COUNTYFP10'].astype(str).str.pad(3,fillchar='0')

In [4]:
#imports county shapefiles from MGGG
shapefile_iowa = gpd.read_file('IA_counties/IA_counties.shp')
shapefile_iowa.head()

Unnamed: 0,STATEFP10,COUNTYFP10,GEOID10,NAME10,NAMELSAD10,ALAND10,AWATER10,INTPTLAT10,INTPTLON10,TOTPOP,...,TOTVOT12,PRES12D,PRES12R,PRES12OTH,TOTVOT16,PRES16D,PRES16R,PRES16OTH,CD,geometry
0,19,127,19127,Marshall,Marshall County,1482770678,1803086,42.041691,-92.9814523,40648,...,19064,10257,8472,335,17980,7652,9146,1182,1,"POLYGON ((-92.76679 42.12346, -92.76679 42.122..."
1,19,11,19011,Benton,Benton County,1855117342,5760770,42.0925474,-92.05763,26076,...,14023,6862,6940,221,13844,4678,8232,934,1,"POLYGON ((-91.94773 41.86186, -91.95514 41.861..."
2,19,41,19041,Clay,Clay County,1469139214,13866941,43.079822,-95.1497261,16667,...,8502,3385,4951,166,8617,2249,5877,491,4,"POLYGON ((-95.26926 43.25537, -95.26140 43.255..."
3,19,165,19165,Shelby,Shelby County,1530110414,1486135,41.6790143,-95.3089173,12167,...,6483,2469,3911,103,6370,1662,4362,346,4,"POLYGON ((-95.20902 41.86371, -95.20890 41.863..."
4,19,43,19043,Clayton,Clayton County,2016405612,36586071,42.8409979,-91.3235108,18129,...,9138,4806,4164,168,9129,3237,5317,575,1,"POLYGON ((-91.25080 42.64558, -91.25160 42.645..."


In [5]:
map_population_by_county_data = shapefile_iowa.merge(df, on='COUNTYFP10')
county_populations = np.array(df['population'])
state_population = sum(county_populations)
df.sort_values('population', ascending=False).head()

Unnamed: 0,county_id,county,population,COUNTYFP10,latitude,longitude
76,76,Polk,492401,153,41.684281,-93.56972
56,56,Linn,230299,113,42.077951,-91.597673
81,81,Scott,174669,163,41.641679,-90.62229
51,51,Johnson,152854,103,41.668737,-91.588812
6,6,Black Hawk,131144,13,42.472888,-92.306059


In [19]:
#putting the * will convert the array into a list form
#np.random permutes 99 numbers
list=[*np.random.permutation(99)]

In [17]:
#create a list of permutations of 99 counties
county_perms=[]
for i in range(10):
    county_perms.append([*np.random.permutation(99)])

In [18]:
print(county_perms)

[[97, 63, 21, 77, 73, 76, 56, 52, 11, 91, 83, 84, 29, 66, 10, 92, 70, 37, 58, 22, 57, 93, 24, 38, 82, 9, 54, 61, 43, 18, 7, 95, 16, 41, 40, 30, 32, 96, 51, 72, 62, 4, 1, 12, 80, 64, 89, 59, 68, 8, 45, 85, 78, 46, 14, 27, 67, 98, 47, 35, 25, 17, 5, 31, 33, 50, 0, 48, 15, 28, 13, 69, 74, 65, 23, 81, 34, 44, 49, 75, 36, 94, 60, 6, 20, 19, 2, 42, 90, 87, 3, 53, 71, 86, 39, 88, 55, 26, 79], [66, 30, 58, 13, 76, 80, 78, 61, 12, 52, 48, 4, 45, 21, 33, 3, 73, 72, 42, 25, 70, 75, 37, 49, 89, 59, 95, 15, 0, 9, 92, 8, 24, 62, 87, 41, 53, 47, 90, 71, 63, 82, 1, 83, 44, 2, 38, 16, 27, 97, 5, 6, 85, 23, 60, 11, 64, 98, 17, 93, 86, 34, 26, 14, 65, 35, 46, 88, 40, 20, 7, 96, 31, 94, 19, 50, 43, 51, 77, 18, 69, 79, 36, 55, 91, 81, 10, 29, 84, 54, 28, 56, 74, 32, 57, 39, 67, 22, 68], [1, 18, 22, 81, 39, 51, 93, 62, 54, 90, 88, 84, 85, 79, 73, 35, 30, 60, 67, 40, 70, 46, 45, 4, 66, 7, 13, 69, 37, 17, 25, 61, 38, 29, 95, 72, 64, 19, 9, 5, 94, 74, 55, 21, 77, 10, 2, 82, 48, 57, 92, 78, 59, 49, 20, 42, 97, 

In [20]:
list

[64,
 80,
 44,
 59,
 49,
 27,
 65,
 87,
 90,
 29,
 16,
 18,
 20,
 66,
 8,
 0,
 22,
 96,
 71,
 34,
 30,
 7,
 11,
 5,
 2,
 63,
 93,
 43,
 24,
 91,
 75,
 60,
 19,
 17,
 74,
 28,
 37,
 97,
 50,
 94,
 10,
 13,
 14,
 92,
 4,
 84,
 45,
 47,
 54,
 67,
 73,
 38,
 61,
 51,
 82,
 81,
 39,
 62,
 32,
 86,
 3,
 98,
 79,
 52,
 9,
 68,
 83,
 88,
 1,
 41,
 76,
 35,
 55,
 40,
 48,
 57,
 26,
 77,
 25,
 21,
 56,
 15,
 69,
 12,
 85,
 6,
 46,
 53,
 72,
 31,
 42,
 78,
 36,
 95,
 89,
 70,
 23,
 58,
 33]

In [21]:
df.head()

Unnamed: 0,county_id,county,population,COUNTYFP10,latitude,longitude
0,0,Adair,7496,1,41.328528,-94.478164
1,1,Adams,3704,3,41.021656,-94.696906
2,2,Allamakee,14061,5,43.274964,-91.382751
3,3,Appanoose,12317,7,40.744683,-92.870345
4,4,Audubon,5674,9,41.679178,-94.904312


In [23]:
county_perms[1]

[66,
 30,
 58,
 13,
 76,
 80,
 78,
 61,
 12,
 52,
 48,
 4,
 45,
 21,
 33,
 3,
 73,
 72,
 42,
 25,
 70,
 75,
 37,
 49,
 89,
 59,
 95,
 15,
 0,
 9,
 92,
 8,
 24,
 62,
 87,
 41,
 53,
 47,
 90,
 71,
 63,
 82,
 1,
 83,
 44,
 2,
 38,
 16,
 27,
 97,
 5,
 6,
 85,
 23,
 60,
 11,
 64,
 98,
 17,
 93,
 86,
 34,
 26,
 14,
 65,
 35,
 46,
 88,
 40,
 20,
 7,
 96,
 31,
 94,
 19,
 50,
 43,
 51,
 77,
 18,
 69,
 79,
 36,
 55,
 91,
 81,
 10,
 29,
 84,
 54,
 28,
 56,
 74,
 32,
 57,
 39,
 67,
 22,
 68]

In [31]:
list2=pd.Series(list)
list2.info()

<class 'pandas.core.series.Series'>
RangeIndex: 99 entries, 0 to 98
Series name: None
Non-Null Count  Dtype
--------------  -----
99 non-null     int64
dtypes: int64(1)
memory usage: 920.0 bytes


In [33]:
list2

0     64
1     80
2     44
3     59
4     49
      ..
94    89
95    70
96    23
97    58
98    33
Length: 99, dtype: int64

In [32]:
df.sort_values(by='county_id', key=list2)

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [35]:
toy={'col1':[0,1,2,3], 'col2':[10,11,12,13], 'col3':[4,3,2,1]}
toydf=pd.DataFrame(data=toy, index=[0,1,2,3])

In [36]:
toydf

Unnamed: 0,col1,col2,col3
0,0,10,4
1,1,11,3
2,2,12,2
3,3,13,1


In [37]:
toy_perms=[]
for i in range(5):
    toy_perms.append([*np.random.permutation(4)])

In [38]:
toy_perms

[[3, 0, 2, 1], [2, 0, 1, 3], [2, 1, 3, 0], [2, 3, 0, 1], [3, 1, 2, 0]]

In [39]:
toy_perms[1]

[2, 0, 1, 3]

In [49]:
toy_series=pd.Series(toy_perms[1])
toy_series


0    2
1    0
2    1
3    3
dtype: int64

In [76]:
toydf.sort_values(by='col1', key=lambda col:np.random.permutation(4))

Unnamed: 0,col1,col2,col3
1,1,11,3
0,0,10,4
2,2,12,2
3,3,13,1


In [77]:
toydf.sort_values(by='col1', key=lambda col:np.random.permutation(4))

Unnamed: 0,col1,col2,col3
3,3,13,1
2,2,12,2
1,1,11,3
0,0,10,4


In [78]:
toydf.sort_values(by='col1', key=lambda col: toy_series)

Unnamed: 0,col1,col2,col3
1,1,11,3
2,2,12,2
0,0,10,4
3,3,13,1


In [88]:
perm=np.random.permutation(99)
print(perm)
df_ex=df.sort_values(by='county_id', key=lambda col: list2)
print(df_ex.head(30))
# df_ex=df.sort_values(by='county_id', key=lambda col: np.random.permutation(99))

[50 73 26  9 23 49 52 94 79 66 86 37  4 92 41 19 91 95 21 22 69 71 60 27
 53  0 67 58 46 90 48 88 96 20 54 32 12 74 24 40  6 57 42 75  1 31 62 64
 84 63 47 56 70 17 59  5 34  7 51 35 87 38 25 77 68 83 16 33 85 97 61 13
 18 80 76 45  2 72  8 11 55 28 10 14 82  3 98 29 44 43 15 81 89 30 65 39
 93 36 78]
    county_id        county  population COUNTYFP10   latitude  longitude
15         15        Cedar        18505        031  41.772355 -91.132190
68         68   Montgomery        10330        137  41.021735 -95.157790
24         24       Dallas        99678        049  41.685321 -94.040706
60         60      Madison        16548        121  41.330622 -94.015184
44         44       Howard         9469        089  43.365313 -92.321908
23         23     Crawford        16525        047  42.043119 -95.389090
85         85         Tama        17135        171  42.074848 -92.529411
21         21      Clayton        17043        043  42.840998 -91.323511
14         14         Cass        13127 

In [81]:
perm

array([54, 89, 27, 48,  0,  5, 34, 73, 13, 88, 45, 50, 40, 97, 28, 29, 11,
       18,  1, 30, 32, 79, 65, 55, 82, 90, 75, 59, 17, 38, 61, 77, 67, 41,
       64, 96, 51, 43, 42, 49, 26, 53, 92, 56, 23, 52, 46, 63, 94, 91, 74,
       21, 36, 68, 83, 57, 35, 12, 66, 86, 47, 85, 60, 62, 16,  7,  6, 58,
        2, 25, 95, 81,  3, 10, 87, 37, 93, 84, 14, 19, 78, 70, 71,  8, 69,
       98, 72, 31, 22, 20, 44, 39, 33,  9,  4, 76, 80, 15, 24])

In [82]:
df_ex.head()

Unnamed: 0,county_id,county,population,COUNTYFP10,latitude,longitude
4,4,Audubon,5674,9,41.679178,-94.904312
18,18,Chickasaw,12012,37,43.059741,-92.31721
68,68,Montgomery,10330,137,41.021735,-95.15779
72,72,Page,15211,145,40.73909,-95.14429
94,94,Winnebago,10679,189,43.378124,-93.743488


In [85]:
for i in range(5):
    df_dummy=df.sort_values(by='county_id', key=lambda col: np.random.permutation(99))
    print(df_dummy.head())

    county_id    county  population COUNTYFP10   latitude  longitude
25         25    Davis         9110        051  40.748089 -92.410345
13         13  Carroll        20760        027  42.039492 -94.867647
83         83    Sioux        35872        167  43.082854 -96.177929
36         36   Greene         8771        073  42.042494 -94.388703
5           5   Benton        25575        011  42.092547 -92.057630
    county_id      county  population COUNTYFP10   latitude  longitude
52         52      Jones        20646        105  42.125118 -91.116914
83         83      Sioux        35872        167  43.082854 -96.177929
51         51    Johnson       152854        103  41.668737 -91.588812
46         46        Ida         7005        093  42.391860 -95.507421
3           3  Appanoose        12317        007  40.744683 -92.870345
    county_id      county  population COUNTYFP10   latitude  longitude
78         78  Poweshiek        18662        157  41.684526 -92.522882
34         34   Fr

In [89]:
county_perms

[[97,
  63,
  21,
  77,
  73,
  76,
  56,
  52,
  11,
  91,
  83,
  84,
  29,
  66,
  10,
  92,
  70,
  37,
  58,
  22,
  57,
  93,
  24,
  38,
  82,
  9,
  54,
  61,
  43,
  18,
  7,
  95,
  16,
  41,
  40,
  30,
  32,
  96,
  51,
  72,
  62,
  4,
  1,
  12,
  80,
  64,
  89,
  59,
  68,
  8,
  45,
  85,
  78,
  46,
  14,
  27,
  67,
  98,
  47,
  35,
  25,
  17,
  5,
  31,
  33,
  50,
  0,
  48,
  15,
  28,
  13,
  69,
  74,
  65,
  23,
  81,
  34,
  44,
  49,
  75,
  36,
  94,
  60,
  6,
  20,
  19,
  2,
  42,
  90,
  87,
  3,
  53,
  71,
  86,
  39,
  88,
  55,
  26,
  79],
 [66,
  30,
  58,
  13,
  76,
  80,
  78,
  61,
  12,
  52,
  48,
  4,
  45,
  21,
  33,
  3,
  73,
  72,
  42,
  25,
  70,
  75,
  37,
  49,
  89,
  59,
  95,
  15,
  0,
  9,
  92,
  8,
  24,
  62,
  87,
  41,
  53,
  47,
  90,
  71,
  63,
  82,
  1,
  83,
  44,
  2,
  38,
  16,
  27,
  97,
  5,
  6,
  85,
  23,
  60,
  11,
  64,
  98,
  17,
  93,
  86,
  34,
  26,
  14,
  65,
  35,
  46,
  88,
  40,
  20,
  7,

In [91]:
for i in range(10):
    df_dummy=df.sort_values(by='county_id', key=lambda col: county_perms[i])
    print(df_dummy.head())

    county_id     county  population COUNTYFP10   latitude  longitude
66         66    Monona         8751        133  42.049432 -95.956566
42         42  Harrison        14582        085  41.688584 -95.827149
86         86    Taylor         5896        173  40.737949 -94.697108
90         90    Warren        52403        181  41.336768 -93.564366
41         41    Hardin        16878        083  42.389955 -93.241081
    county_id       county  population COUNTYFP10   latitude  longitude
28         28  Des Moines        38910        057  40.915339 -91.186925
42         42    Harrison        14582        085  41.688584 -95.827149
45         45    Humboldt         9597        091  42.782221 -94.202775
15         15       Cedar        18505        031  41.772355 -91.132190
11         11      Butler        14334        023  42.734708 -92.780066
    county_id      county  population COUNTYFP10   latitude  longitude
87         87      Union        12138        175  41.028550 -94.245091
0     