### Here we assign schools for students whose age between 7 and 17 and put this info to school_id

In [3]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from shapely.geometry import Point
from tqdm import tqdm
from geopy.distance import great_circle
import random

In [4]:
# SOME AUXILIARY FUNCTIONS
def isSchoolAge(row):
    return row['age'] >= 7 and row['age'] <= 17


def isWorkingAge(row):
    return row['age'] >= 18 and (row['sex'] == 'F' and row['age'] <= 60 or row['sex'] == 'M' and row['age'] <= 55)


def rowPointLocation(row):
    return Point(float(row['latitude']), float(row['longitude']))


def findDistToObj(row, point):
    # finds distances from the selected point to the object represented by DataFrame row
    obj_point = rowPointLocation(row)
    dist = great_circle((point.x, point.y), (obj_point.x, obj_point.y)).km
    return dist

In [5]:
city_name = 'chelyabinsk'

input_dir_schools = '../data/' + city_name + '/schools_' + city_name + '_2023.txt'

output_dir = '../data/' + city_name + '/people_' + city_name + '_2023_assigned_schools.txt'
input_dir_people = '../data/' + city_name + '/people_' + city_name + '_2023_assigned_workplaces.txt'
input_dir_households = '../data/' + city_name + '/households_' + city_name + '_2023_splitted_dwellings.txt'

people_df = pd.read_csv(input_dir_people, sep='\t')
households_df = pd.read_csv(input_dir_households, sep='\t')
schools_df = pd.read_csv(input_dir_schools, sep='\t')


hh_points_dic = {}

list_keys = households_df['sp_id'].tolist()
list_values = [Point([row['latitude'], row['longitude']])
               for idx, row in tqdm(households_df.iterrows())]

for key, value in zip(list_keys, list_values):
    hh_points_dic[key] = value
    


1081008it [00:47, 24269.92it/s]

In [14]:
def assignSchools(df_orig, hh_points_dic, schools_df_orig):
    assign_type = '15km' # 'closest' or any ohter string to apply less than 15km radius workplace
    close_schools_ids = []

    df = df_orig[df_orig.apply(isSchoolAge, axis=1)]
    # print(len(df))

    students_num = {}  # Counting students assigned to each school

    print("Schools, total: {}".format(schools_df_orig.shape[0]))
    print("Schoolchildren, total: {}".format(df.shape[0]))
    input("Press any key to continue calculating...")

    df_add = pd.DataFrame(list(np.zeros(schools_df_orig.shape[0])), columns=[
                          'distances'], index=schools_df_orig.index)
    schools_df = schools_df_orig.join(df_add)

    allSchoolsFilled = False

    for idx in tqdm(range(int(df.shape[0]))):
        person_row = df.iloc[idx]
        person_point = hh_points_dic[person_row['sp_hh_id']]
        # print("Student {} (out of {})".format(idx, df.shape[0]))
        person_point = hh_points_dic[person_row['sp_hh_id']]

        series_distance = schools_df.apply(
            findDistToObj, args=(person_point, ), axis=1)
        df_add = pd.DataFrame(series_distance.tolist(), columns=[
                              'distances'], index=schools_df.index)
        schools_df.update(df_add)

        isStudentAssigned = False

        while not isStudentAssigned:
            # print("Schools: \n", schools_df)
            # print("School distances: \n", schools_df['distances'])
            
            close_school = None
            min_index = 0
            ### ASSIGN CLOSEST WORKPLACE
            if (assign_type == 'closest'):
                min_index = schools_df['distances'].idxmin()
                close_school = schools_df.loc[min_index]
            ### ASSIGN FROM SOME DISTANCE    
            else: 
                schoolCloseEnough = False 
                counter = 0
                while (not schoolCloseEnough):
                    # random.seed(10)
                    min_index = np.random.choice(schools_df['distances'].index, size=1)[0]
                    close_school = schools_df.loc[min_index]
                    counter+=1
                    if counter >= 50:
                        print('Too much tries')
                        schoolCloseEnough = True
                    # print(close_workplace.shape)
                    # print(close_workplace['distances'])
                    if (close_school['distances'] <= 5): 
                        # print(close_workplace)
                        schoolCloseEnough = True

            # print(min_index)
            close_school = schools_df.loc[min_index]

            if not (close_school['sp_id'] in students_num.keys()):  # checking capacity
                students_num[close_school['sp_id']] = 1
            else:
                students_num[close_school['sp_id']
                             ] = students_num[close_school['sp_id']] + 1

            # print('Success!')
            # print("Student distribution ", students_num)

            # school filled to capacity
            if students_num[close_school['sp_id']] == close_school['size']:
                # print("Before: {}".format(schools_df.shape[0]))
                # (schools_df.index[min_index])
                schools_df = schools_df.drop(min_index)
                # print("After: {}".format(schools_df.shape[0]))

                if schools_df.shape[0] == 0:
                    allSchoolsFilled = True

            isStudentAssigned = True

        close_schools_ids.append(close_school['sp_id'])

        if allSchoolsFilled:
            print("All schools filled to capacity!")
            break

    # print(pd.Series(close_schools_ids, name="school_id", index = df.index[:len(close_schools_ids)]))
    # print(close_schools_ids)
    df_orig.update(pd.DataFrame(close_schools_ids, columns=[
                   'school_id'], index=df.index[:len(close_schools_ids)]))
    df_orig.to_csv(output_dir, sep='\t')
    return df_orig

# schools_df['size'] = schools_df['size']
schools_df['size'].sum()
df_assign = assignSchools(people_df, hh_points_dic, schools_df)

Schools, total: 810
Schoolchildren, total: 567780


  0%|          | 6/567780 [00:00<7:04:15, 22.30it/s]

Too much tries
Too much tries
Too much tries
Too much tries
Too much tries


  0%|          | 9/567780 [00:00<7:38:28, 20.64it/s]

Too much tries
Too much tries
Too much tries
Too much tries
Too much tries


  0%|          | 15/567780 [00:00<7:22:10, 21.40it/s]

Too much tries
Too much tries
Too much tries


  0%|          | 21/567780 [00:01<7:38:56, 20.62it/s]

Too much tries
Too much tries
Too much tries
Too much tries
Too much tries


  0%|          | 24/567780 [00:01<7:43:13, 20.43it/s]

Too much tries
Too much tries
Too much tries
Too much tries


  0%|          | 30/567780 [00:01<7:32:23, 20.92it/s]

Too much tries
Too much tries
Too much tries
Too much tries
Too much tries


  0%|          | 33/567780 [00:01<7:41:31, 20.50it/s]

Too much tries
Too much tries
Too much tries
Too much tries
Too much tries


  0%|          | 39/567780 [00:01<7:30:16, 21.01it/s]

Too much tries
Too much tries
Too much tries
Too much tries
Too much tries


  0%|          | 45/567780 [00:02<7:39:18, 20.60it/s]

Too much tries
Too much tries
Too much tries
Too much tries


  0%|          | 51/567780 [00:02<7:36:15, 20.74it/s]

Too much tries
Too much tries
Too much tries
Too much tries


  0%|          | 54/567780 [00:02<7:38:45, 20.63it/s]

Too much tries
Too much tries
Too much tries
Too much tries
Too much tries


  0%|          | 60/567780 [00:02<7:47:02, 20.26it/s]

Too much tries
Too much tries
Too much tries
Too much tries
Too much tries


  0%|          | 66/567780 [00:03<7:39:59, 20.57it/s]

Too much tries
Too much tries
Too much tries
Too much tries
Too much tries


  0%|          | 69/567780 [00:03<7:38:22, 20.64it/s]

Too much tries
Too much tries
Too much tries
Too much tries
Too much tries


  0%|          | 75/567780 [00:03<7:35:07, 20.79it/s]

Too much tries
Too much tries
Too much tries
Too much tries
Too much tries


  0%|          | 81/567780 [00:03<7:30:31, 21.00it/s]

Too much tries
Too much tries
Too much tries


  0%|          | 84/567780 [00:04<7:36:55, 20.71it/s]

Too much tries
Too much tries
Too much tries
Too much tries
Too much tries


  0%|          | 90/567780 [00:04<7:41:28, 20.50it/s]

Too much tries
Too much tries
Too much tries
Too much tries
Too much tries


  0%|          | 96/567780 [00:04<7:35:58, 20.75it/s]

Too much tries
Too much tries
Too much tries
Too much tries


  0%|          | 100/567780 [00:04<7:39:11, 20.60it/s]

Too much tries
Too much tries
Too much tries





KeyboardInterrupt: 

In [26]:
schools_df['size_new'] = schools_df['size'] + 2150
schools_df['size_new'].sum()

164845