Make sure the required packages are installed
(e.g., pip install pyyaml)

In [312]:
import urllib
import yaml
import pandas as pd
import random
import numpy as np
from opcode import haslocal
from pprint import pprint


In [313]:
url = 'datasetC_with_roommate_pref.csv'
student_df = pd.read_csv(url, index_col= False)

In [314]:
url = 'final_room_data.csv'
rooms_df = pd.read_csv(url, index_col= False)

In [315]:
freshman_set = student_df['student_year'].isin(['Freshman', 'Sophomore'])

# splits the data
priority_students = student_df[freshman_set]
other_students = student_df[~freshman_set]

# randomly shuffle each list
priority_students = priority_students.sample(frac=1, random_state=np.random.seed()).reset_index()
other_students = other_students.sample(frac=1, random_state=np.random.seed()).reset_index()

# restore the dataframe
student_df = pd.concat([priority_students, other_students], ignore_index=True)

# ++ the lottery number
student_df['lottery_number'] = range(1, len(student_df) + 1)

# Set indices to RUID
student_df.set_index('RUID', inplace = True)

In [316]:
# Splits up price range as two fields, min_preferred_price and max_preferred_price
student_df[['min_preferred_price', 'max_preferred_price']] = student_df['preferred_price_range'].str.split('-', expand=True)

# clean up whitespace
student_df['min_preferred_price'] = student_df['min_preferred_price'].str.strip()
student_df['max_preferred_price'] = student_df['max_preferred_price'].str.strip()

# make integer columns
student_df['min_preferred_price'] = student_df['min_preferred_price'].astype(int)
student_df['max_preferred_price'] = student_df['max_preferred_price'].astype(int)

students_list_df = student_df.copy()

In [317]:
allocating = dict()
available_rooms = rooms_df['hall_id'].value_counts().to_dict()
#adding new column (if the room is available or not)
rooms_df.loc[0:5330,['is_available']] = [1]

In [318]:
preference_list = dict()
for RUID, row in student_df.iterrows():
    preference_list[RUID] = row['preferred_hall_ids'].replace(' ', ",").replace('[','').replace(']','').replace('\'','').split(',') #string split

In [319]:
roommate_list = dict()
for RUID, row in student_df.iterrows():
    roommate_list[RUID] = row['roommate_preferences'].replace(' ', ",").replace('[','').replace(']','').replace('\'','').split(',') #string split

mutual_pairs = []

# Finds mutual roommates
for RUID, preferences in roommate_list.items():
    for pref in preferences:
        if pref and int(pref) in roommate_list:  
            if str(RUID) in roommate_list[int(pref)]:
                pair = tuple(sorted([RUID, int(pref)])) # Sort to avoid dupes
                mutual_pairs.append(pair)

mutual_pairs = list(set(mutual_pairs))


In [320]:
result = dict()

In [321]:
# List making functions
def create_accessbility_list(df):
    selected_students = df[df['accessibility_need'] == 1] #gets the value
    df.drop(selected_students.index, inplace = True)
    return selected_students, df

def create_low_income_list(df):
  selected_students = df[df['low_income_status'] == 1] #gets the value
  df.drop(selected_students.index, inplace = True)
  return selected_students, df

In [322]:
# Define who's an undergrad or grad
ug_student_yrs = ['Freshman', 'Sophomore', 'Junior', 'Senior']
pg_student_yrs = ['Masters', 'Doctorate']

# Allocates first available room.
def allocate_first_available_room(df, ruid, res):
    allocated_room = df.iloc[0]
    room_details = [allocated_room['hall_id'], allocated_room['building_id'], allocated_room['room_id']]
    rooms_df.loc[allocated_room.name, 'is_available'] = 0
    res[ruid] = room_details
    return res

# Finds JUST RIGHT room -- does it meet all their criteria?
def find_rooms_based_on_student_requirements(rooms_df, student, ug_student_yrs, pg_student_yrs, accessibility_required=False):
    filtered_rooms_df = rooms_df.loc[
        (rooms_df['is_available'] == 1) &
        (rooms_df['has_private_bathroom'] == student['is_private_bathroom_preferred']) &
        (rooms_df['has_laundry'] == student['laundry_availibility']) &
        (rooms_df['room_type'] == ('Single' if student['is_single_preferred'] else rooms_df['room_type'])) &
        (rooms_df['residence_type'] == student['preferred_residence_type']) &
        ((rooms_df['price'] <= student['max_preferred_price']) | (rooms_df['price'] <= student['max_price'])) &
        # Checks if room is in year
        (
            (
                (student['student_year'] in ug_student_yrs) &
                (rooms_df['room_contract_type'] == 'Undergraduate Academic Year (Two Semesters)')
            ) |
            (
                (student['student_year'] in pg_student_yrs) &
                (rooms_df['room_contract_type'] == 'Graduate Full Calendar Year – 12 Month Contracts (Two semesters and all breaks)')
            )
        ) &
        ((not accessibility_required) | (rooms_df['has_accessibility_ramps'] == 1))
    ]
    return filtered_rooms_df

# Next, just filter based on type, single, and price (accessibility if applicable)
def find_rooms_based_on_type(rooms_df, student, ug_student_yrs, pg_student_yrs, accessibility_required=False):
    filtered_rooms_df = rooms_df.loc[
        (rooms_df['is_available'] == 1) &
        (rooms_df['room_type'] == ('Single' if student['is_single_preferred'] else rooms_df['room_type'])) &
        (rooms_df['residence_type'] == student['preferred_residence_type']) &
        # Pivots to max price here
        ((rooms_df['price'] <= student['max_preferred_price']) | (rooms_df['price'] <= student['max_price'])) &
        (
            (
                (student['student_year'] in ug_student_yrs) &
                (rooms_df['room_contract_type'] == 'Undergraduate Academic Year (Two Semesters)')
            ) |
            (
                (student['student_year'] in pg_student_yrs) &
                (rooms_df['room_contract_type'] == 'Graduate Full Calendar Year – 12 Month Contracts (Two semesters and all breaks)')
            )
        ) &
        ((not accessibility_required) | (rooms_df['has_accessibility_ramps'] == 1))
    ]
    return filtered_rooms_df

# otherwise just do max price and year
def find_price_and_year_rooms(rooms_df, student, ug_student_yrs, pg_student_yrs, accessibility_required=False):
    filtered_rooms_df = rooms_df.loc[
        (rooms_df['is_available'] == 1) &
        ((rooms_df['price'] <= student['max_preferred_price']) | (rooms_df['price'] <= student['max_price'])) &
        (
            (
                (student['student_year'] in ug_student_yrs) &
                (rooms_df['room_contract_type'] == 'Undergraduate Academic Year (Two Semesters)')
            ) |
            (
                (student['student_year'] in pg_student_yrs) &
                (rooms_df['room_contract_type'] == 'Graduate Full Calendar Year – 12 Month Contracts (Two semesters and all breaks)')
            )
        ) &
        ((not accessibility_required) | (rooms_df['has_accessibility_ramps'] == 1))
    ]
    return filtered_rooms_df

# Or just find something that's around
def find_fallback_rooms(rooms_df, accessibility_required=False):
  # fallback logic if a suitable room isn't found
    filtered_rooms_df = rooms_df.loc[(rooms_df['is_available'] == 1) &
      ((not accessibility_required) | (rooms_df['has_accessibility_ramps'] == 1))]
    if not filtered_rooms_df.empty:
      return filtered_rooms_df
    else:
        # Just pick anything that's available
        available_rooms_df = rooms_df.loc[(rooms_df['is_available'] == 1)]
        if not filtered_rooms_df.empty:
          return filtered_rooms_df
    return pd.DataFrame()

# Runs serial dictatorship:
def allocate_rooms_for_students(students_df, needs_accessibility, preference_list, rooms_df, ug_student_yrs, pg_student_yrs, result):
    for ruid, student in students_df.iterrows():
        is_pref_found = False
        available_rooms_df = pd.DataFrame()

        # Check if preferences exist
        if ruid in preference_list:
            preferences = preference_list[ruid]

            # Iterating through each preferred hall
            for pref in preferences:
                available_rooms_df = rooms_df.loc[
                    (rooms_df['hall_id'] == pref) &
                    (rooms_df['is_available'] == 1) &
                    (rooms_df['has_accessibility_ramps'] == needs_accessibility) &
                    ((rooms_df['price'] <= student['max_preferred_price']) | (rooms_df['price'] <= student['max_price']))
                ]
                if not available_rooms_df.empty:
                    is_pref_found = True
                    break

        are_amenities_there = False
        if not is_pref_found:
            available_rooms_df = find_rooms_based_on_student_requirements(rooms_df, student, ug_student_yrs, pg_student_yrs, accessibility_required=needs_accessibility)
            if not available_rooms_df.empty:
                are_amenities_there = True

        if not are_amenities_there:
            available_rooms_df = find_rooms_based_on_type(rooms_df, student, ug_student_yrs, pg_student_yrs, accessibility_required=needs_accessibility)

        if not available_rooms_df.empty:
            result = allocate_first_available_room(available_rooms_df, ruid, result)
            preference_list.pop(ruid, None)
        else:
            available_rooms_df = find_price_and_year_rooms(rooms_df, student, ug_student_yrs, pg_student_yrs, accessibility_required=needs_accessibility)
            if not available_rooms_df.empty:
                result = allocate_first_available_room(available_rooms_df, ruid, result)
                preference_list.pop(ruid, None)
            else:
                available_rooms_df = find_fallback_rooms(rooms_df, accessibility_required=needs_accessibility)
                if not available_rooms_df.empty:
                  result = allocate_first_available_room(available_rooms_df, ruid, result)
                  preference_list.pop(ruid, None)

    return result

In [323]:
# Call SD on accessible students
accessible_students, remaining_students = create_accessbility_list(student_df)
result = allocate_rooms_for_students(accessible_students, True, preference_list, rooms_df, ug_student_yrs, pg_student_yrs, result)

In [324]:
# Call on low income students
low_income_students, lottery_students = create_low_income_list(remaining_students)
result = allocate_rooms_for_students(low_income_students, False, preference_list, rooms_df, ug_student_yrs, pg_student_yrs, result)

In [325]:
# One last go: try to allocate for lottery students (NOT accessible or low income)
result = allocate_rooms_for_students(lottery_students, False, preference_list, rooms_df, ug_student_yrs, pg_student_yrs, result)

In [326]:
# Anyone else who hasn't gotten housing, we just put somewhere.
for ruid, preferences in preference_list.items():
    is_pref_found = False
    room_details = []
    available_rooms_df = pd.DataFrame()

    for pref in preferences:
        available_rooms_df = rooms_df.loc[(rooms_df['hall_id'] == pref) & rooms_df['is_available'] == 1]
        if not available_rooms_df.empty:
            #try allocating first room in the df
            is_pref_found = True
            break

    if not is_pref_found:
        #allocate random available room
        available_rooms_df = rooms_df.loc[rooms_df['is_available'] == 1]

    if not available_rooms_df.empty:
        #try allocating first room in the df
        result = allocate_first_available_room(available_rooms_df, ruid, result)
        preference_list.pop(ruid, None)

In [327]:
for ruid, preferences in preference_list.items():
    result[ruid] = 'nan'

In [328]:
# extract only the numeric part of the room id
def extract_numeric_part_of_room_id(room_id):
    return room_id.split('-')[0]

# stores students with respect to SHARED rooms
roomwise_students = {}

# populate the roomwise_students
for student_id, room in result.items():
    if room[0] == "nan":
        continue
    # gets only the numeric part of the room id
    numeric_room_id = extract_numeric_part_of_room_id(room[2])
    room_key = (room[0], room[1], numeric_room_id) # Looks at room without number

    # people share a room
    if room_key in roomwise_students:
        roomwise_students[room_key].append(student_id)
    else:
        roomwise_students[room_key] = [student_id]

# singles can't have roommates, filter them out
shared_rooms = {room: students for room, students in roomwise_students.items() if len(students) > 1}


In [329]:
def find_acceptable_roommate_room(rooms_df, student, ug_student_yrs, pg_student_yrs, accessibility_required=False):
    filtered_rooms_df = rooms_df.loc[
        ((rooms_df['price'] <= student['max_preferred_price']) | (rooms_df['price'] <= student['max_price'])) &
        (
            (
                (student['student_year'] in ug_student_yrs) &
                (rooms_df['room_contract_type'] == 'Undergraduate Academic Year (Two Semesters)')
            ) |
            (
                (student['student_year'] in pg_student_yrs) &
                (rooms_df['room_contract_type'] == 'Graduate Full Calendar Year – 12 Month Contracts (Two semesters and all breaks)')
            )
        ) &
        ((not accessibility_required) | (rooms_df['has_accessibility_ramps'] == 1))
    ]
    return filtered_rooms_df

def can_swap(student, target_room, ug_student_yrs, pg_student_yrs):
    return not find_acceptable_roommate_room(target_room, student, ug_student_yrs, pg_student_yrs, accessibility_required=(student['accessibility_need'] == 1)).empty

for pair in mutual_pairs:
    student1 = students_list_df.loc[pair[0]]
    student2 = students_list_df.loc[pair[1]]

    if pair[0] in preference_list or pair[1] in preference_list:
        continue

    room_a = result[pair[0]]
    room_b = result[pair[1]]

    room_df_a = rooms_df.loc[
        (rooms_df['hall_id'] == room_a[0]) &
        (rooms_df['building_id'] == room_a[1]) &
        (rooms_df['room_id'] == room_a[2])
    ]
    
    room_df_b = rooms_df.loc[
        (rooms_df['hall_id'] == room_b[0]) &
        (rooms_df['building_id'] == room_b[1]) &
        (rooms_df['room_id'] == room_b[2])
    ]

    accepted_student = student2
    if not room_a == None:
        possible_room = find_acceptable_roommate_room(room_df_a, student2, ug_student_yrs, pg_student_yrs, accessibility_required=(student2['accessibility_need'] == 1))
    if possible_room.empty and not room_b == None:
        possible_room = find_acceptable_roommate_room(room_df_b, student1, ug_student_yrs, pg_student_yrs, accessibility_required=(student1['accessibility_need'] == 1))
        accepted_student = student1

    accepted_student_id = pair[0] if accepted_student is student2 else pair[1]

    if not possible_room.empty:
        room_id_numeric = extract_numeric_part_of_room_id(possible_room['room_id'].iloc[0])
        room_key = (possible_room['hall_id'].iloc[0], possible_room['building_id'].iloc[0], room_id_numeric)
        potential_roommates = roomwise_students.get(room_key, [])
        
        for roommate_id in potential_roommates:
            roommate = students_list_df.loc[roommate_id]
            
            # ensures that the accepted_student and roommate aren't already in mutual_pairs - no kicking roommates
            if (accepted_student_id, roommate_id) in mutual_pairs or (roommate_id, accepted_student_id) in mutual_pairs:
                continue  # Skip this iteration

            # checks if the potential roommate can swap rooms with the accepted student's original room
            if can_swap(roommate, room_df_a if accepted_student is student2 else room_df_b, ug_student_yrs, pg_student_yrs):
                # Swap their rooms
                if accepted_student is student2:
                    result[pair[0]], result[roommate_id] = result[roommate_id], result[pair[0]]
                else:
                    result[pair[1]], result[roommate_id] = result[roommate_id], result[pair[1]]
                break

In [330]:
df = pd.DataFrame(list(result.items()), columns=['RUID', 'Assigned Rooms'])
df.to_csv('results/datasetC_result_3.csv')