<a href="https://colab.research.google.com/github/stphnclysmth/notebooks/blob/main/Assigning_Students_to_Teachers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
student_addresses = [
    ('Student 1', '3710 Pio Pico St, San Diego, CA, 92106'),
    ('Student 2', '1600 PARADISE HILLS RD, SAN DIEGO, CA 92114-7883'),
    ('Student 3', '1100 BEYER WAY, SAN DIEGO, CA 92154-4622'),
    ('Student 4', '9900 PINEKNOLL LN, SAN DIEGO, CA 92124-1810'),
    ('Student 5', '4716 Ladner St, San Diego, CA, 92113'),
    ('Student 6', '4482 48TH ST APT 4, SAN DIEGO, CA 92115-4534'),
    ('Student 7', '4505 LIMERICK AVE, SAN DIEGO, CA 92117-3219'),
    ('Student 8', '12300 RAGWEED ST, SAN DIEGO, CA 92129-4107'),
    ('Student 9', '9889 HIBERT ST, SAN DIEGO, CA 92131-1062'),
    ('Student 10', '5000 COVE VIEW PL, SAN DIEGO, CA 92154-8447'),
    ('Student 11', '3241 ISLAND AVE, SAN DIEGO, CA 92102-4245'),
    ('Student 12', '8851 LA CINTURA CT, SAN DIEGO, CA 92129-3313'),
    ('Student 13', '4336 W POINT LOMA BLVD, SAN DIEGO, CA 92107-1182'),
    ('Student 14', '10378 SCRIPPS POWAY PKWY, SAN DIEGO, CA 92131-5126'),
]

teacher_addresses = [
    ('Teacher 1', '2501 HALLER ST, SAN DIEGO, CA 92104-5330'),
    ('Teacher 2', '701 ALBION ST, SAN DIEGO, CA 92106-3212'),
    ('Teacher 3', '4480 JUTLAND DR, SAN DIEGO, CA 92117-3647'),
]


In [None]:
import math
import geopy
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
from collections import defaultdict
from pprint import pprint

geolocator = Nominatim(user_agent="student-address-geocoder")

from geopy.extra.rate_limiter import RateLimiter
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)

# Convert an address to geographical coordinates using OpenStreetMap's Nominatim
def address_to_coordinates(address):
    try:
        location = geocode(address, timeout=1000)
    except GeocoderTimedOut:
        return None

    if location:
        return location.latitude, location.longitude
    else:
        return None

def people_addresses_to_people_coordinates(people_addresses):
    people_coordinates = {}
    for name, address in people_addresses:
        coordinates = address_to_coordinates(address)
        if coordinates:
            people_coordinates[name] = coordinates
        else:
            print(f"Unable to find coordinates for {name}'s address: [{address}]")
    return people_coordinates

# You can use the Haversine formula to calculate the distance between two points 
# on Earth's surface, given their geographical coordinates (latitude and longitude). 
def haversine_distance(lat1, lon1, lat2, lon2):
    R = 6371  # Earth radius in km

    d_lat = math.radians(lat2 - lat1)
    d_lon = math.radians(lon2 - lon1)

    a = math.sin(d_lat / 2) * math.sin(d_lat / 2) + \
        math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * \
        math.sin(d_lon / 2) * math.sin(d_lon / 2)
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

    return R * c


# Map students to the closest teacher using the Haversine distance formula
def map_teachers_to_students(students, teachers, top_n_teachers=5, max_students_per_teacher=None):
    top_n_teachers = min(top_n_teachers, len(teachers.items()))
  
    if max_students_per_teacher is None:
        max_students_per_teacher = len(students) // len(teachers)
        
    teacher_student_map = {teacher_name: [] for teacher_name in teachers.keys()}
    student_teacher_map = {}

    for student_name, student_coordinates in students.items():
        teacher_distances = []

        for teacher_name, teacher_coordinates in teachers.items():
            distance = haversine_distance(*student_coordinates, *teacher_coordinates)
            teacher_distances.append((teacher_name, distance))

        teacher_distances.sort(key=lambda x: x[1])

        # Assign students to teachers with roughly equal distribution
        for teacher_name, distance in teacher_distances:
            if len(teacher_student_map[teacher_name]) < max_students_per_teacher:
                teacher_student_map[teacher_name].append(student_name)
                break
            elif teacher_name == teacher_distances[-1][0]:
                # Assign student to the least populated teacher if all closest teachers are full
                min_students = float("inf")
                min_teacher = None
                for t_name, t_students in teacher_student_map.items():
                    if len(t_students) < min_students:
                        min_students = len(t_students)
                        min_teacher = t_name
                teacher_student_map[min_teacher].append(student_name)
                break

        # Store the top_n_teachers for each student in student_teacher_map
        student_teacher_map[student_name] = [teacher_name for teacher_name, _ in teacher_distances[:top_n_teachers]]

    return teacher_student_map, student_teacher_map


students = people_addresses_to_people_coordinates(student_addresses)
teachers = people_addresses_to_people_coordinates(teacher_addresses)


teacher_student_map, student_teacher_map = map_teachers_to_students(students, teachers)

pprint(teacher_student_map)
print()
pprint(student_teacher_map)