In [1]:
import geopandas as gpd
from tqdm import tqdm

### Get the data in the folder ./LCPS_data for pre-processing. Let's call this `retrieved data`.

In [2]:
def retrieve_data(sy):
    """
    Retrieve data for a given school year (sy)
    """
    data_dir = "./LCPS_data"
    # Read the data files
    schools = gpd.read_file('{}/LCPS_Sites_{}.shp'.format(data_dir, sy))
    students = gpd.read_file('{}/Students_{}.shp'.format(data_dir, sy))
    spas = gpd.read_file('{}/PlanningZones_{}.shp'.format(data_dir, sy))
    
    return schools, students, spas

In [3]:
sy = '2017_2018'

In [4]:
schools, students, spas = retrieve_data(sy)

#### Print out a data instance of the SPAs, SCHOOLs and STUDENTs

In [5]:
for i, spa in spas.iterrows():
    print(spa)
    break

OBJECTID                                                      1
COUNT_                                                        5
STDYAREA                                                 WL03.4
ELEM_                                                       126
INT_                                                        205
HIGH_                                                       311
ELEM_CODE                                                   LOV
INT_CODE                                                    HRM
HIGH_CODE                                                   WHS
DISTRICT                                                     WL
UTILITIES                                                   WOU
SHAPE_Leng                                              47268.8
SHAPE_Area                                          6.39938e+07
STDYAREA_1                                               WL03.4
DISTRICT_1                                                   WL
PLANNING_Z                              

In [6]:
for ind, school in schools.iterrows():
    print(school)
    break

OBJECTID                                                1
SCH_CODE                                              HCA
CLASS                                             CHARTER
SCH_NUM                                               119
NAME                            HILLSBORO CHARTER ACADEMY
DATE_OPENE                                           1966
BUILDING_D                                              7
CLASSROOMS                                              6
SPECIAL_SI                                              0
TRAILERS                                                0
SCHL_CODE                                             119
STRT_GRD                                               -1
END_GRD                                                 5
ELEM_                                                 119
INT_                                                    0
MID_                                                    0
HIGH_                                                   0
CAPACITY      

In [7]:
for index, student in students.iterrows():
    print(student)
    break

ObjectID                                                1
ARC_Single                     21365 FITZGERALD DR, 20147
Loud_ID                                            100011
Address                                             21365
Address_Ex                                           None
ADDwEXT                                             21365
Prefix                                               None
Street_Nam                                     FITZGERALD
Street_Typ                                             DR
Suffix                                               None
Town                                              ASHBURN
Zip_1                                               20147
Subdivisio                                  FARMWELL HUNT
GRID_CODE                                          100011
GRADE                                                  13
IEP_FLAG                                                N
GENDER                                                  M
ETHNIC        

### Printout the CRS of the SPAs, SCHOOLs and STUDENTs

In [8]:
spas.crs    # Printout the CRS

{'proj': 'lcc',
 'lat_1': 38.03333333333333,
 'lat_2': 39.2,
 'lat_0': 37.66666666666666,
 'lon_0': -78.5,
 'x_0': 3499999.999999998,
 'y_0': 2000000,
 'ellps': 'GRS80',
 'towgs84': '0,0,0,0,0,0,0',
 'units': 'us-ft',
 'no_defs': True}

In [9]:
students.crs

{'proj': 'lcc',
 'lat_1': 38.03333333333333,
 'lat_2': 39.2,
 'lat_0': 37.66666666666666,
 'lon_0': -78.5,
 'x_0': 3499999.999999998,
 'y_0': 2000000,
 'ellps': 'GRS80',
 'towgs84': '0,0,0,0,0,0,0',
 'units': 'us-ft',
 'no_defs': True}

In [10]:
schools.crs

{'proj': 'lcc',
 'lat_1': 38.03333333333333,
 'lat_2': 39.2,
 'lat_0': 37.66666666666666,
 'lon_0': -78.5,
 'x_0': 3499999.999999998,
 'y_0': 2000000,
 'ellps': 'GRS80',
 'towgs84': '0,0,0,0,0,0,0',
 'units': 'us-ft',
 'no_defs': True}

### Read the data (supplied with the algorithm) in the folder './data'. Let's call this `new data`.

In [11]:
spas_new = gpd.read_file('./data/SPAs.json')

In [12]:
for i, s in spas_new.iterrows():
    print(s)
    break

OBJECTID                                                      1
COUNT_                                                        5
SPA                                                      WL03.4
ELEM_                                                       126
INT_                                                        205
HIGH_                                                       311
ELEM_CODE                                                   LOV
ELEM_POP                                                     31
MID_CODE                                                    HRM
MID_POP                                                      12
HIGH_CODE                                                   WHS
HIGH_POP                                                     13
DISTRICT                                                     WL
UTILITIES                                                   WOU
SHAPE_Leng                                              47261.2
SHAPE_Area                              

#### We have to modify the SPAs in `retrieved data` to match the entries in `new data`

In [13]:
fields = ['ELEM_1', 'MID_1', 'HIGH_1', 'DISTRICT_3', 'MID_2_2019', 'HIGH__2019',
          'PLANNING_1', 'UTILITIE_2', 'STUDENTS_P', 'Field39', 'Field40']
spas.drop(fields, axis=1, inplace=True)

In [14]:
# Adding new fields and renaming some existing ones
spas = spas.rename(columns={"INT_CODE": "MID_CODE"})
new_fields = ['ELEM_POP', 'MID_POP', 'HIGH_POP', 'TOT_POP']    # Population of ELEM, MID, HIGH and TOTAL

for f in new_fields:
    spas[f] = 0

##### Do point-in-polygon test to find out number of students attending public schools in LCPS and residing within Loudoun county, VA.

In [15]:
set(students['GRADE'])    # Possible values of grade

{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}

In [16]:
count = 0
others = 0
for index, student in tqdm(students.iterrows()):
    student_location = student.geometry
    student_grade = student['GRADE']
    legit = True
    for ind, spa in spas.iterrows():
        
        if student_location.within(spa.geometry):
            count += 1
            if 0 < student_grade < 6 or student_grade == 13:    # ELEMENTARY School has grades KG-5
                spas.at[ind, 'ELEM_POP'] = spas['ELEM_POP'][ind] + 1
            elif 5 < student_grade < 9:    # MIDDLE School has grades 6-8
                spas.at[ind, 'MID_POP'] = spas['MID_POP'][ind] + 1
            elif 8 < student_grade < 13:    # HIGH School has grades 9-12
                spas.at[ind, 'HIGH_POP'] = spas['HIGH_POP'][ind] + 1
            else:
                legit = False
                others += 1
                
            if legit:
                spas.at[ind, 'TOT_POP'] = spas['TOT_POP'][ind] + 1
                
            break
            
total_students = len(students)


81427it [19:42, 68.86it/s]


In [17]:
print('{}/{} students living inside LCPS\' boundary are considered for redistricting'.format(count - others,
                                                                                             total_students))
print('{}/{} students living inside LCPS\' boundary are ignored'.format(others, total_students))
print('{}/{} students living outside LCPS\' boundary are ignored'.format(total_students - count,
                                                                         total_students))

80345/81427 students living inside LCPS' boundary are considered for redistricting
1054/81427 students living inside LCPS' boundary are ignored
28/81427 students living outside LCPS' boundary are ignored


### Transform the CRS of the `retrieved data` to match `new data`. We need to project the shapefiles into a new coordinate system to ensure that the data you are working with uses common geometric projection. For more details refer to the [link](https://geopandas.org/projections.html).

In [18]:
new_crs = spas_new.crs

In [19]:
new_crs

{'init': 'epsg:2924'}

In [20]:
spas = spas.to_crs(new_crs)

  return _prepare_from_string(" ".join(pjargs))


In [21]:
schools = schools.to_crs(new_crs)

  return _prepare_from_string(" ".join(pjargs))


In [22]:
# Check if the projections have been reprojected 
print(spas.crs, schools.crs)

{'init': 'epsg:2924'} {'init': 'epsg:2924'}


### Write out the updated data as geojson files

In [23]:
def write_data(schools, spas, sy):
    """
    Retrieve data for a given school year (sy)
    """
    data_dir = "./LCPS_data"
    # Read the data files
    schools.to_file('{}/Schools_{}.json'.format(data_dir, sy), driver='GeoJSON')
    spas.to_file('{}/SPAs_{}.json'.format(data_dir, sy), driver='GeoJSON')


In [24]:
write_data(schools, spas, sy)