In [1]:
# read in preplacements.csv into a dataframe

import pandas as pd
import numpy as np

df = pd.read_csv('preplacements.csv')


# Dorm Mapping
# 1 = East
# 2 = North
# 3 = South
# 4 = West
# 5 = Atwood
# 6 = Sontag
# 7 = Case
# 8 = Drinkward
# 9 = Linde
dorm_mapping = {
    'East': 1,
    'North': 2,
    'South': 3,
    'West': 4,
    'Atwood': 5,
    'Sontag': 6,
    'Case': 7,
    'Drinkward': 8,
    'Linde': 9,
    'Garrett House': 10
}

# remove any rows that don't have email, Dorm, or Room
df = df[df['Email'].notna() & df['Dorm'].notna() & df['Room'].notna()]

# remove any uppercase letters
df['Email'] = df['Email'].str.lower()
# if email ends in @hmc.edu, replace with @g.hmc.edu
df['Email'] = df['Email'].str.replace('@hmc.edu', '@g.hmc.edu')

# print the first 5 rows of the dataframe

# strip any whitespace from first name and last name and email and dorm and room
df['First Name'] = df['First Name'].str.strip()
df['Last Name'] = df['Last Name'].str.strip()
df['Email'] = df['Email'].str.strip()
df['Dorm'] = df['Dorm'].str.strip()
df['Room'] = df['Room'].str.strip()

# remove any rows that are not in the dorm mapping
df = df[df['Dorm'].isin(dorm_mapping.keys())]

print(df)


             ID First Name   Last Name                  Email    Dorm Room
0    40210325.0      Ethan     Vazquez     evazquez@g.hmc.edu  Atwood  110
1    40224654.0      Livia     Ordonez     lordonez@g.hmc.edu  Atwood  114
2    40224478.0      Anika      Pandey     anpandey@g.hmc.edu  Atwood  114
3    40213260.0      Arman      Khasru      akhasru@g.hmc.edu  Atwood  123
4    40217298.0     Edward      Donson      edonson@g.hmc.edu  Atwood  125
..          ...        ...         ...                    ...     ...  ...
226  40221969.0   Adrianne        Baik        abaik@g.hmc.edu    West  423
227  40217934.0    McKenna    McMurray    mmcmurray@g.hmc.edu    West  471
228  40223717.0       Emmy       Hoorn       ehoorn@g.hmc.edu    West  473
229  40215698.0      Diane        Park       dipark@g.hmc.edu    West  475
230  40221268.0       Adam  Vanluvanee  avanluvanee@g.hmc.edu   Linde  144

[223 rows x 6 columns]


In [2]:
roster_df = pd.read_csv('roster.csv')

roster_df = roster_df[roster_df['HMC Email'].notna()]

roster_df['HMC Email'] = roster_df['HMC Email'].str.lower()

roster_df['HMC Email'] = roster_df['HMC Email'].str.replace('@hmc.edu', '@g.hmc.edu')

roster_df = roster_df[roster_df['HMC Email'].notna()]

roster_df['HMC Email'] = roster_df['HMC Email'].str.strip()

# unique values for 'Class Code for SP 2025'
roster_df['Class Code for SP 25'].unique()

# this is for the future year so they are shifted by one year
class_code_mapping = {
    'FF': 'sophomore',
    'FR': 'sophomore',
    'SO': 'junior',
    'JR': 'senior',
    'SR': 'senior',
}

# for each row, create a dictionary where key is email and value is class mapped to sophomore, junior, or senior
class_mapping = {}
for index, row in roster_df.iterrows():
    class_mapping[row['HMC Email']] = class_code_mapping[row['Class Code for SP 25']]

# print the first 5 rows of the dataframe
print(df.head())
print(class_mapping)

           ID First Name Last Name               Email    Dorm Room
0  40210325.0      Ethan   Vazquez  evazquez@g.hmc.edu  Atwood  110
1  40224654.0      Livia   Ordonez  lordonez@g.hmc.edu  Atwood  114
2  40224478.0      Anika    Pandey  anpandey@g.hmc.edu  Atwood  114
3  40213260.0      Arman    Khasru   akhasru@g.hmc.edu  Atwood  123
4  40217298.0     Edward    Donson   edonson@g.hmc.edu  Atwood  125
{'jabdelmalek@g.hmc.edu': 'senior', 'babraham@g.hmc.edu': 'junior', 'aacker@g.hmc.edu': 'senior', 'badili@g.hmc.edu': 'sophomore', 'radorkor@g.hmc.edu': 'junior', 'cake@g.hmc.edu': 'sophomore', 'kaldamtajima@g.hmc.edu': 'senior', 'falemayehu@g.hmc.edu': 'senior', 'mialexander@g.hmc.edu': 'senior', 'hallen@g.hmc.edu': 'sophomore', 'eallgor@g.hmc.edu': 'senior', 'calmond@g.hmc.edu': 'sophomore', 'eambrizlazo@g.hmc.edu': 'junior', 'games@g.hmc.edu': 'sophomore', 'thanderson@g.hmc.edu': 'junior', 'avanderson@g.hmc.edu': 'senior', 'fandradefernandes@g.hmc.edu': 'senior', 'mandrews@g.hmc.edu

In [3]:
# find all preplaced users whose email is not in the roster
preplaced_df = df
preplaced_df = preplaced_df[~preplaced_df['Email'].isin(roster_df['HMC Email'])]
print(preplaced_df)


Empty DataFrame
Columns: [ID, First Name, Last Name, Email, Dorm, Room]
Index: []


In [4]:
reslife_df = pd.read_csv('reslife.csv')

reslife_df = reslife_df[reslife_df['Email'].notna()]

reslife_df['Email'] = reslife_df['Email'].str.lower()

reslife_df['Email'] = reslife_df['Email'].str.replace('@hmc.edu', '@g.hmc.edu')

reslife_df['Email'] = reslife_df['Email'].str.strip()

# if any row has Notes property of na, throw an error
if reslife_df['Notes'].isna().any():
    raise Exception("Notes column has NaN values")

# print(reslife_df)

# Remove string 'Res Life' from Notes column
reslife_df['Notes'] = reslife_df['Notes'].str.replace('Res Life', '')

# Remove any whitespace from Notes column
reslife_df['Notes'] = reslife_df['Notes'].str.strip()

# make sure notes only contains the following values: Mentor Pull, Mentor, Proctor Pull, Proctor
for index, row in reslife_df.iterrows():
    if row['Notes'] not in ['Mentor Pull', 'Mentor', 'Proctor Pull', 'Proctor']:
        print(row['Notes'])
        raise Exception("Notes column contains invalid values")

# make sure Notes column is all lowercase
reslife_df['Notes'] = reslife_df['Notes'].str.lower()

print(reslife_df)


email_to_reslife_mapping = {}

for index, row in reslife_df.iterrows():
    email_to_reslife_mapping[row['Email']] = row['Notes']

print(email_to_reslife_mapping)




            ID First Name      Last Name                    Email    Dorm  \
0   40221424.0   Cameron       Warmerdam     cwarmerdam@g.hmc.edu  South    
1   40227166.0    Alisha            Wong        aliwong@g.hmc.edu  South    
2   40228262.0    Stephen  Kanti Mahanty  skantimahanty@g.hmc.edu  South    
3   40217934.0   McKenna        McMurray      mmcmurray@g.hmc.edu   West    
4   40215698.0     Diane            Park         dipark@g.hmc.edu   West    
..         ...        ...            ...                      ...     ...   
68  40216037.0   Kimberly          Lopez         klopez@g.hmc.edu    Case   
70  40215531.0     Simone        Johnson      sijohnson@g.hmc.edu   Linde   
71  40215674.0      Grace         Everts        geverts@g.hmc.edu   Linde   
72  40215659.0     Avery        Anderson     avanderson@g.hmc.edu   Linde   
73  40221826.0      Kyra             Roy           kroy@g.hmc.edu   Linde   

    Room         Notes Accepeted Pre-Placement  
0   304A        mentor    

In [5]:
from sqlalchemy import create_engine
from sqlalchemy.sql import text

# import env variables
import os
from dotenv import load_dotenv
from pathlib import Path

# import libraries for ssh tunneling
import sshtunnel

dotenv_path = os.path.join(os.getcwd(), '.env')
print(dotenv_path)

load_dotenv(dotenv_path=dotenv_path, verbose=True)

sql_pass = os.environ.get('SQL_PASS')
sql_ip = os.environ.get('SQL_IP')
sql_db_name = os.environ.get('SQL_DB_NAME')
sql_user = os.environ.get('SQL_USER')

tunnel_host = os.environ.get('TUNNEL_HOST')
tunnel_port = os.environ.get('TUNNEL_PORT')
tunnel_user = os.environ.get('TUNNEL_USER')
tunnel_pass = os.environ.get('TUNNEL_PASS')

tunnel = sshtunnel.SSHTunnelForwarder(
    (tunnel_host, int(tunnel_port)),
    ssh_username=tunnel_user,
    ssh_password=tunnel_pass,
    remote_bind_address=(sql_ip, 5432)
)

# After starting the tunnel
tunnel.start()

# Get the local bind port that the tunnel is using
local_port = tunnel.local_bind_port

# Update connection string to use localhost and the tunneled port
CONNSTR = f'postgresql://{sql_user}:{sql_pass}@127.0.0.1:{local_port}/{sql_db_name}'

# Now create your SQLAlchemy engine with this connection string
engine = create_engine(CONNSTR)

# Test the connection
with engine.connect() as connection:
    result = connection.execute(text("SELECT 1"))
    print("Connection successful!")

/home/tomql/workspaces/roomdraw/database/.env
Connection successful!


In [6]:
with engine.connect() as connection:
    # loop through the dataframe and insert each row into the database
    for index, row in df.iterrows():
        # first select the user from the database to check if they exist
        query = f"SELECT first_name, last_name, year, draw_number, in_dorm, email, preplaced FROM Users WHERE email = '{row['Email']}'"
        result = connection.execute(text(query))
        user = result.fetchone()
        if user:
            if user[-1]:
                print("User is already preplaced, skipping")
                continue
            # wait for user to press enter to accept the user to be edited
            print("Original user: ", user)

            # edit the user by setting the values to the new values (only ones that matter are in_dorm, preplaced, and draw_number)
            values = {
                'in_dorm': 0,
                'preplaced': True,
                'draw_number': 0,
                'email': row['Email'],
            }
            if row['Email'] in class_mapping:
                values['year'] = class_mapping[row['Email']]
            else:
                raise Exception(f"User {row['Email']} not found in roster")
            
            if row['Email'] in email_to_reslife_mapping:
                values['reslife_role'] = email_to_reslife_mapping[row['Email']]
            else:
                values['reslife_role'] = "none"
            
            query = f"UPDATE Users SET in_dorm = :in_dorm, preplaced = :preplaced, draw_number = :draw_number, reslife_role = :reslife_role WHERE email = :email"
            print(query)
            input("User already exists, press enter to edit")
            connection.execute(text(query), values)
        else:
            values = {
                'first_name': row['First Name'],
                'last_name': row['Last Name'],
                'email': row['Email'],
                
                'draw_number': 0,
                'in_dorm': 0,
                'preplaced': True,
            }
            if row['Email'] in class_mapping:
                values['year'] = class_mapping[row['Email']]
            else:
                raise Exception(f"User {row['Email']} not found in roster")
            
            if row['Email'] in email_to_reslife_mapping:
                values['reslife_role'] = email_to_reslife_mapping[row['Email']]
            else:
                values['reslife_role'] = "none"
            
            # insert the user into the database
            query = f"INSERT INTO Users (year, first_name, last_name, email, draw_number, preplaced, in_dorm, reslife_role) VALUES (:year, :first_name, :last_name, :email, :draw_number, :preplaced, :in_dorm, :reslife_role)"
            connection.execute(text(query), values)
        
    connection.commit()
        
print("Successfully updated preplacements")
tunnel.stop()

Original user:  ('Landon', 'Tu', 'sophomore', Decimal('160'), 0, 'ltu@g.hmc.edu', False)
UPDATE Users SET in_dorm = :in_dorm, preplaced = :preplaced, draw_number = :draw_number, reslife_role = :reslife_role WHERE email = :email
Original user:  ('Thomas', 'Le', 'sophomore', Decimal('172'), 0, 'thole@g.hmc.edu', False)
UPDATE Users SET in_dorm = :in_dorm, preplaced = :preplaced, draw_number = :draw_number, reslife_role = :reslife_role WHERE email = :email
Original user:  ('Shadab', 'Sharif', 'junior', Decimal('11'), 0, 'ssharif@g.hmc.edu', False)
UPDATE Users SET in_dorm = :in_dorm, preplaced = :preplaced, draw_number = :draw_number, reslife_role = :reslife_role WHERE email = :email
Original user:  ('Aabhas', 'Senapati', 'junior', Decimal('120'), 0, 'asenapati@g.hmc.edu', False)
UPDATE Users SET in_dorm = :in_dorm, preplaced = :preplaced, draw_number = :draw_number, reslife_role = :reslife_role WHERE email = :email
User is already preplaced, skipping
User is already preplaced, skipping
O