In [1]:
# read in preplacements.csv into a dataframe

import pandas as pd
import numpy as np

raw_indorm_df = pd.read_csv('indorm.csv')

# print(indorm_df.head(30))

# remove rows with no email or if 'In Dorm' is not in the list of dorms names
# 1: 'East',
# 2: 'North',
# 3: 'South',
# 4: 'West',
# 5: 'Atwood',
# 6: 'Sontag',
# 7: 'Case',
# 8: 'Drinkward',
# 9: 'Linde'

dorm_mapping = {
    'East': 1,
    'North': 2,
    'South': 3,
    'West': 4,
    'Atwood': 5,
    'Sontag': 6,
    'Case': 7,
    'Drinkward': 8,
    'Linde': 9
}

# remove rows where 'In Dorm' is not in the list of dorms
indorm_df = raw_indorm_df[raw_indorm_df['In Dorm'].isin(dorm_mapping.keys())]

# remove rows where email is empty
indorm_df = indorm_df[indorm_df['Email'].notna()]

# remove rows where 'In Dorm' is empty
indorm_df = indorm_df[indorm_df['In Dorm'].notna()]

print(indorm_df.head(5))

# print rows removed
print(pd.concat([indorm_df,raw_indorm_df]).drop_duplicates(keep=False))

# all emails ending in @hmc.edu modify to @g.hmc.edu and also remove any uppercase letters
indorm_df['Email'] = indorm_df['Email'].str.replace('@hmc.edu', '@g.hmc.edu')
indorm_df['Email'] = indorm_df['Email'].str.lower()

# print the first 5 rows of the dataframe
print(indorm_df.head(5))




                         Email                 Full Name In Dorm
0        jabdelmalek@g.hmc.edu         Joseph Abdelmalek    Case
1       kaldamtajima@g.hmc.edu        Korin Aldam-Tajima  Atwood
2         falemayehu@g.hmc.edu          Fanuel Alemayehu    West
3        mialexander@g.hmc.edu             Mia Alexander   Linde
4  fandradefernandes@g.hmc.edu  Felipe Andrade Fernandes    Case
                   Email       Full Name In Dorm
24   hcolorado@g.hmc.edu  Haley Colorado     NaN
44    vglasser@g.hmc.edu   Jacob Glasser     NaN
50        than@g.hmc.edu         Tay Han     NaN
77    hknolton@g.hmc.edu  Hailey Knolton     NaN
91      hualiu@g.hmc.edu     Patrick Liu     NaN
92      jesliu@g.hmc.edu     Jessica Liu     NaN
116                  NaN   Azalea Morris     NaN
120   mynguyen@g.hmc.edu       My Nguyen     NaN
129      wpham@g.hmc.edu     Warren Pham     NaN
140      mruiz@g.hmc.edu   Marcella Ruiz     NaN
173     bozeng@g.hmc.edu        Bob Zeng     NaN
                      

In [2]:
from sqlalchemy import create_engine
from sqlalchemy.sql import text

# import env variables
import os
from dotenv import load_dotenv
from pathlib import Path

# import libraries for ssh tunneling
import sshtunnel

dotenv_path = os.path.join(os.getcwd(), '.env')
print(dotenv_path)

load_dotenv(dotenv_path=dotenv_path, verbose=True)

sql_pass = os.environ.get('SQL_PASS')
sql_ip = os.environ.get('SQL_IP')
sql_db_name = os.environ.get('SQL_DB_NAME')
sql_user = os.environ.get('SQL_USER')

tunnel_host = os.environ.get('TUNNEL_HOST')
tunnel_port = os.environ.get('TUNNEL_PORT')
tunnel_user = os.environ.get('TUNNEL_USER')
tunnel_pass = os.environ.get('TUNNEL_PASS')

tunnel = sshtunnel.SSHTunnelForwarder(
    (tunnel_host, int(tunnel_port)),
    ssh_username=tunnel_user,
    ssh_password=tunnel_pass,
    remote_bind_address=(sql_ip, 5432)
)

# After starting the tunnel
tunnel.start()

# Get the local bind port that the tunnel is using
local_port = tunnel.local_bind_port

# Update connection string to use localhost and the tunneled port
CONNSTR = f'postgresql://{sql_user}:{sql_pass}@127.0.0.1:{local_port}/{sql_db_name}'

# Now create your SQLAlchemy engine with this connection string
engine = create_engine(CONNSTR)

# Test the connection
with engine.connect() as connection:
    result = connection.execute(text("SELECT 1"))
    print("Connection successful!")

/home/tomql/workspaces/roomdraw/database/.env
Connection successful!


In [3]:
with engine.connect() as connection:
    # loop through the dataframe and insert each row into the database
    for index, row in indorm_df.iterrows():
        # first select the user from the database to check if they exist and if they are a senior
        query = f"SELECT year FROM Users WHERE email = '{row['Email']}'"
        print(query)
        result = connection.execute(text(query))
        user_year = result.fetchone()[0]
        print(user_year)
        if user_year:
            # check if the user is a senior
            if user_year == 'senior':
                # for each row, get the dorm id from the dorm mapping and update user table
                dorm_id = dorm_mapping[row['In Dorm']]
                query = f"UPDATE Users SET in_dorm = {dorm_id} WHERE email = '{row['Email']}'"
                connection.execute(text(query))
            else:
                raise Exception(f"User {row['Email']} is not a senior")
        else:
            raise Exception(f"User {row['Email']} does not exist")
        
    connection.commit()
        
print("Successfully updated users in dorm")
tunnel.stop()


SELECT year FROM Users WHERE email = 'jabdelmalek@g.hmc.edu'
senior
SELECT year FROM Users WHERE email = 'kaldamtajima@g.hmc.edu'
senior
SELECT year FROM Users WHERE email = 'falemayehu@g.hmc.edu'
senior
SELECT year FROM Users WHERE email = 'mialexander@g.hmc.edu'
senior
SELECT year FROM Users WHERE email = 'fandradefernandes@g.hmc.edu'
senior
SELECT year FROM Users WHERE email = 'eangel@g.hmc.edu'
senior
SELECT year FROM Users WHERE email = 'baraiza@g.hmc.edu'
senior
SELECT year FROM Users WHERE email = 'hbarck@g.hmc.edu'
senior
SELECT year FROM Users WHERE email = 'mbellido@g.hmc.edu'
senior
SELECT year FROM Users WHERE email = 'lbensaid@g.hmc.edu'
senior
SELECT year FROM Users WHERE email = 'ablackett@g.hmc.edu'
senior
SELECT year FROM Users WHERE email = 'abonthu@g.hmc.edu'
senior
SELECT year FROM Users WHERE email = 'jaboyle@g.hmc.edu'
senior
SELECT year FROM Users WHERE email = 'abrako@g.hmc.edu'
senior
SELECT year FROM Users WHERE email = 'ocardenas@g.hmc.edu'
senior
SELECT year