# Functions

In [64]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from urllib.parse import urljoin
import pandas as pd
import time
import random
from bs4 import BeautifulSoup
import re
from datetime import datetime

In [52]:
def login_to_islands(driver):
    """Login if on login page, otherwise pass"""
    LOGIN_URL = "https://islands.smp.uq.edu.au/login.php"
    name = "matthew.stoebe@colostate.edu"
    pas = "TheIslands"
    
    try:
        # Only login if we're on login page
        if LOGIN_URL in driver.current_url:
            wait = WebDriverWait(driver, 10)
            wait.until(EC.presence_of_element_located((By.NAME, "email")))
            driver.find_element(By.NAME, "email").send_keys(name)
            driver.find_element(By.NAME, "word").send_keys(pas)
            driver.find_element(By.CSS_SELECTOR, "input[type='submit'][value='Sign In']").click()
            return wait
            
    except Exception as e:
        print(f"Login attempt failed: {e}")
        raise
        
    # Return new wait object if not on login page
    return WebDriverWait(driver, 10)

In [53]:
def get_fresh_driver():
    """Create new driver with anti-detection measures"""
    options = webdriver.ChromeOptions()
    options.add_argument('--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
    options.add_argument('--disable-blink-features=AutomationControlled')
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option('useAutomationExtension', False)
    options.add_argument("--window-size=1920,1080")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--no-sandbox")
    options.add_experimental_option("detach", True)
    options.add_argument('--user-data-dir=/tmp/temp_profile_' + str(random.randint(1, 1000)))
    
    driver = webdriver.Chrome(options=options)
    driver.get("https://islands.smp.uq.edu.au/login.php")
    wait = login_to_islands(driver)  # Use existing login function
    
    return driver, wait

# Scrape Active Contacts

In [54]:
options = webdriver.ChromeOptions()
options.add_experimental_option("detach", True)
driver = webdriver.Chrome(options=options)

try:
    # Navigate and login
    driver.get("https://islands.smp.uq.edu.au/login.php")
    wait = login_to_islands(driver)
    
    # Click Contacts link instead of direct navigation
    contacts_link = wait.until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, "a.menu[href='project.php']"))
    )
    contacts_link.click()
    
    # Get contact cards
    cards = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div.contact")))
    
    contacts = []
    for card in cards:
        a = card.find_element(By.CSS_SELECTOR, "div.name a")
        name = a.text.replace("\n", " ")
        profile_url = urljoin(driver.current_url, a.get_attribute("href"))
        contacts.append({"name": name, "profile_url": profile_url})
    
finally:
    driver.quit()

# Scrape Contact Data

In [79]:
def get_contact_data():
    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    
    # Find all td elements with colspan="2"
    tds = soup.find_all('td', attrs={'colspan': '2'})
    
    # Extract the relevant info using regex patterns
    age_pattern = r'(\d+) years old'
    money_pattern = r'\$([0-9,]+)'
    location_pattern = r'Lives in ([A-Za-z]+) (\d+)'
    
    for td in tds:
        text = td.get_text()
        
        # Extract age
        age_match = re.search(age_pattern, text)
        if age_match:
            age = int(age_match.group(1))
            
        # Extract money    
        money_match = re.search(money_pattern, text)
        if money_match:
            money = int(money_match.group(1).replace(',', ''))
            
        # Extract location
        location_match = re.search(location_pattern, text)
        if location_match:
            city = location_match.group(1)
            house_number = int(location_match.group(2))

    return age, money, city, house_number



def do_task(section = 'Blood Tests', task_name = 'Blood Melatonin'):
    """Perform a task in the specified section"""
   
    # Click Tasks tab
    tasks_tab = wait.until(EC.element_to_be_clickable((By.ID, "t2tab")))
    tasks_tab.click()
    
    # Click Blood Tests section
    blood_tests = wait.until(EC.element_to_be_clickable(
        (By.XPATH, f"//div[@class='submenu'][contains(text(),'{section}')]")))
    blood_tests.click()
    
    # Click melatonin test
    melatonin = wait.until(EC.element_to_be_clickable(
        (By.XPATH, f"//span[@class='task'][contains(text(),'{task_name}')]")))
    melatonin.click()
    # print("Started melatonin test")

    return task_name



def get_task_result(task_name='Blood Melatonin'):
    """Get most recent result for specified task"""
    try:
        tasks_tab = wait.until(EC.element_to_be_clickable((By.ID, "t2tab")))
        tasks_tab.click()
        
        html_source = driver.page_source
        soup = BeautifulSoup(html_source, 'html.parser')
        
        task_divs = soup.find_all('div', class_='taskresult')
        
        for task_div in task_divs:
            current_task = task_div.find('div', class_='taskresulttask')
            if current_task and current_task.text == task_name:
                timestamp_div = task_div.find('div', class_='taskresulttd')
                result_div = task_div.find('div', class_='taskresultresult')
                
                if timestamp_div and result_div:
                    return (
                        task_name,
                        timestamp_div.text,
                        result_div.text
                    )
                
    except Exception as e:
        print(f"Error getting task result: {e}")

    

# Measure and Test Melatonin Levels

In [82]:


MAX_ATTEMPTS = 3 # Max number of fresh drivers
CONTACTS_TO_PROCESS = 100  # Number of successful contacts to get before stopping
DO_TASK = False  # Set to True to run the test

driver, wait = get_fresh_driver()
successful_scrapes = 0
attempt_count = 0

try:
    for contact in contacts:
        if attempt_count >= MAX_ATTEMPTS:
            print(f"Reached maximum attempts ({MAX_ATTEMPTS}). Stopping.")
            break
            
        if successful_scrapes >= CONTACTS_TO_PROCESS:
            print(f"Successfully processed {CONTACTS_TO_PROCESS} contacts. Stopping.")
            break
            
        try:
            time.sleep(random.uniform(2, 4))
            
            # Extract islander ID and navigate
            url_parts = contact['profile_url'].split('=')
            if len(url_parts) > 1:
                islander_id = url_parts[1]
                profile_url = f"https://islands.smp.uq.edu.au/islander.php?id={islander_id}"
                
                driver.get(profile_url)
                wait.until(EC.presence_of_element_located((By.ID, "content")))
                
                # Get contact data
                age, money, city, house_number = get_contact_data()
                contact['age'] = age
                contact['money'] = money
                contact['city'] = city
                contact['house_number'] = house_number


                if DO_TASK:
                    do_task(section = 'Blood Tests', task_name = 'Blood Melatonin')

                else:
                    task_name, task_timestamp, task_result = get_task_result(task_name = 'Blood Melatonin')
                    contact["task_name"] = task_name
                    contact["task_timestamp"] = task_timestamp
                    
                    if task_result:
                        contact['task_result'] = task_result
                        print(f"Successfully processed {contact['name']} with a melatonin level of {contact['task_result']}")


                contact['last_checkin'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

                successful_scrapes += 1
                
        except Exception as e:
            print(f"Error processing {contact['name']}: {e}")
            attempt_count += 1
            # driver.quit()
            if attempt_count < MAX_ATTEMPTS:
                driver, wait = get_fresh_driver()
            continue
            
finally:
    print(f"\nSummary:")
    print(f"Successful scrapes: {successful_scrapes}")
    print(f"Failed attempts: {attempt_count}")
    # driver.quit()

# Create DataFrame from processed contacts
melatonin_readings_df = pd.DataFrame(contacts[:successful_scrapes])

# melatonin_readings_df.to_csv("pre_diet_melatonin_levels.csv", index=False)
# print("Data saved to pre_diet_melatonin_levels.csv")

15.3 pg/mL


In [67]:
display(melatonin_readings_df)
melatonin_readings_df.to_csv("pre_diet_melatonin_levels.csv", index=False)
print("Data saved to pre_diet_melatonin_levels.csv")

Unnamed: 0,name,profile_url,age,money,city,house_number,last_checkin,task_name,task_timestamp,task_result
0,Akane Abels,https://islands.smp.uq.edu.au/islander.php?id=...,49,8538,Bjurholm,384,2025-04-25 11:50:19,Blood Melatonin,13/365 11:40,12.2 pg/mL
1,Leon Bager,https://islands.smp.uq.edu.au/islander.php?id=...,62,3319,Vardo,659,2025-04-25 11:50:23,Blood Melatonin,13/365 11:40,5.4 pg/mL
2,Paul Bager,https://islands.smp.uq.edu.au/islander.php?id=...,35,5095,Hofn,297,2025-04-25 11:50:28,Blood Melatonin,13/365 11:40,35.7 pg/mL
3,Nihal Bahadur,https://islands.smp.uq.edu.au/islander.php?id=...,48,9347,Bjurholm,241,2025-04-25 11:50:32,Blood Melatonin,13/365 11:40,11.6 pg/mL
4,Britt Blomgren,https://islands.smp.uq.edu.au/islander.php?id=...,29,5119,Hofn,297,2025-04-25 11:50:36,Blood Melatonin,13/365 11:40,38.6 pg/mL
...,...,...,...,...,...,...,...,...,...,...
70,Kazuki Wilson,https://islands.smp.uq.edu.au/islander.php?id=...,39,7862,Hofn,185,2025-04-25 11:54:53,Blood Melatonin,13/365 11:45,29.0 pg/mL
71,Naoto Wilson,https://islands.smp.uq.edu.au/islander.php?id=...,33,4744,Bjurholm,181,2025-04-25 11:54:58,Blood Melatonin,13/365 11:45,36.5 pg/mL
72,Blade Yamada,https://islands.smp.uq.edu.au/islander.php?id=...,68,588,Bjurholm,107,2025-04-25 11:55:02,Blood Melatonin,13/365 11:45,3.1 pg/mL
73,Deepa Zaman,https://islands.smp.uq.edu.au/islander.php?id=...,30,7132,Hofn,188,2025-04-25 11:55:06,Blood Melatonin,13/365 11:45,44.4 pg/mL


Data saved to pre_diet_melatonin_levels.csv


# Randomly Assign 

In [77]:
diets = ["Ketogenic Diet 14 days", "Vegetarian Diet 14 days", "no change"]

In [78]:
contacts_df = pd.read_csv('contacts.csv')

# Randomly assign a diet to each house number (block group)
house_numbers = contacts_df['house_number'].unique()

# Create a dictionary to map house numbers to dietÍs
house_to_diet = {house: random.choice(diets) for house in house_numbers}

# Assign the diet to each contact based on their house number
contacts_df['diet'] = contacts_df['house_number'].map(house_to_diet)

contacts_df.to_csv('contacts_diets.csv', index=False)

In [79]:
contacts_df = pd.read_csv('contacts_diets.csv')

# Convert contacts_df to a dictionary or list of dictionaries if necessary
contacts = contacts_df.to_dict(orient='records')

# Assign Diets

In [81]:
MAX_ATTEMPTS = 3  # Max number of fresh drivers
CONTACTS_TO_PROCESS = 100  # Number of successful contacts to get before stopping
DO_TASK = True  # Set to True to run the test

driver, wait = get_fresh_driver()
successful_scrapes = 0
attempt_count = 0

try:
    for contact in contacts:
        if attempt_count >= MAX_ATTEMPTS:
            print(f"Reached maximum attempts ({MAX_ATTEMPTS}). Stopping.")
            break
            
        if successful_scrapes >= CONTACTS_TO_PROCESS:
            print(f"Successfully processed {CONTACTS_TO_PROCESS} contacts. Stopping.")
            break
            
        try:
            time.sleep(random.uniform(2, 4))
            
            # Extract islander ID and navigate
            url_parts = contact['profile_url'].split('=')
            if len(url_parts) > 1:
                islander_id = url_parts[1]
                profile_url = f"https://islands.smp.uq.edu.au/islander.php?id={islander_id}"
                
                driver.get(profile_url)
                wait.until(EC.presence_of_element_located((By.ID, "content")))
                
                # Get contact data
                age, money, city, house_number = get_contact_data()
                contact['age'] = age
                contact['money'] = money
                contact['city'] = city
                contact['house_number'] = house_number

                # Assign task based on diet value
                if DO_TASK and contact['diet'] != "no change":
                    # Task name will be the diet value
                    try:
                        do_task(section='Interventions', task_name=contact['diet'])
                        print(f"Assigned {contact['name']} to {contact['diet']}")
                    except Exception as e:
                        print(e)
                        continue
                else:
                    print(f"Assigned {contact['name']} to controll")

 
                successful_scrapes += 1
                
        except Exception as e:
            print(f"Error processing {contact['name']}: {e}")
            attempt_count += 1
            # driver.quit()
            if attempt_count < MAX_ATTEMPTS:
                driver, wait = get_fresh_driver()
            continue
            
finally:
    print(f"\nSummary:")
    print(f"Successful scrapes: {successful_scrapes}")
    print(f"Failed attempts: {attempt_count}")
    # driver.quit()


Assigned Akane Abels to controll
Assigned Leon Bager to Vegetarian Diet 14 days
Message: 

Assigned Nihal Bahadur to controll
Message: 

Assigned Elias Blomgren to Ketogenic Diet 14 days
Message: 

Assigned Franziska Brandt to Vegetarian Diet 14 days
Assigned Ryan Brown to Ketogenic Diet 14 days
Assigned Arvid Carlsen to controll
Assigned Elsa Carlsen to controll
Assigned Tomas Carlsen to Ketogenic Diet 14 days
Assigned Yash Chatterjee to controll
Assigned Austin Collins to controll
Assigned Hailey Collins to Ketogenic Diet 14 days
Assigned Josh Collins to controll
Assigned Amie Connolly to Vegetarian Diet 14 days
Assigned Florian Eklund to Ketogenic Diet 14 days
Assigned Halden Eklund to Vegetarian Diet 14 days
Assigned Hanne Eklund to Ketogenic Diet 14 days
Assigned Jana Erickson to Vegetarian Diet 14 days
Assigned Petra Franke to Vegetarian Diet 14 days
Assigned Auna Hall to Ketogenic Diet 14 days
Assigned Jermaine Hall to Vegetarian Diet 14 days
Assigned Manami Hall to Ketogenic Di