In [2]:
import praw
import time
import requests
import os
from urllib.parse import urlparse
from concurrent.futures import ThreadPoolExecutor
from typing import List
from tqdm import tqdm

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from PIL import Image
import random
import glob

In [3]:
roster_path = '../data/SP25 CY1 Residents Roster.xlsx'
imgs_path = '../data/cat_memes/'

In [11]:
reddit = praw.Reddit(
    client_id='ShnBSk3RTJklCQQsOVX0iw',      # Replace with your client ID
    client_secret='kHuwyBg6kotj-dsqLqu0eHiKCXVt-w',    # Replace with your client secret
    user_agent='cat_meme_bot',        # Replace with your descriptive user agent
    read_only=True  # Run in read-only mode
)

def download_image(post) -> bool:
    if not post.url.endswith(('.jpg', '.jpeg', '.png', '.gif')):
        return False
        
    filename = os.path.basename(urlparse(post.url).path)
    filepath = os.path.join(imgs_path, filename)
    
    try:
        response = requests.get(post.url)
        if response.status_code == 200:
            with open(filepath, 'wb') as f:
                f.write(response.content)
            return True
        else:
            print(f"Failed to download {post.url}")
            return False
    except Exception as e:
        print(f"Error downloading {post.url}: {str(e)}")
        return False

# Create imgs directory if it doesn't exist
os.makedirs(imgs_path, exist_ok=True)

# Get posts from subreddit
subreddit = reddit.subreddit('CatMemes')
posts = list(subreddit.hot(limit=100))

# Download images using thread pool with progress bar
with ThreadPoolExecutor(max_workers=10) as executor:
    results = list(tqdm(
        executor.map(download_image, posts),
        total=len(posts),
        desc="Downloading images"
    ))

print(f"Successfully downloaded {sum(results)} images")

Downloading images: 100%|██████████| 100/100 [00:08<00:00, 11.96it/s]

Successfully downloaded 88 images





In [5]:
# Read the Excel file
roster_df = pd.read_excel(roster_path)

roster_df

Unnamed: 0,Bed Space,Term,DKUID,NetID,Given Name,Preferred Name,Family Name,Gender,Move In,Check In,Move Out,Check Out,Rm #
0,A-2001a,2025 Spring,13065,mx74,Maocheng,Maocheng,Xiao,Male,2025-01-03,,2025-05-17,,A-2001
1,A-2001b,2025 Spring,13220,zy213,Zeyu,Zeyu,Yu,Male,2025-01-03,,2025-05-17,,A-2001
2,A-2002a,2025 Spring,12977,kl526,Kaiming,Kaiming,Liu,Male,2025-01-03,,2025-05-17,,A-2002
3,A-2002b,2025 Spring,12995,yg271,Yuan,Yuan,Gao,Male,2025-01-03,,2025-05-17,,A-2002
4,A-2003a,2025 Spring,13192,hl546,Haozhe,Haozhe,Lou,Male,2025-01-03,,2025-05-17,,A-2003
...,...,...,...,...,...,...,...,...,...,...,...,...,...
211,A-5117b,2025 Spring,11728,zw313,Ziqi,Ziqi,Wang,Female,2025-01-03,,2025-05-17,,A-5117
212,A-5119a,2025 Spring,9828,jz411,Jingwen,Chloe,Zeng,Female,2025-01-03,,2025-05-17,,A-5119
213,A-5119b,2025 Spring,11662,tc394,Tung-Tung,Tung-Tung,Chang,Female,2025-01-03,,2025-05-17,,A-5119
214,A-5121a,2025 Spring,8546,kw336,Karen,Karen,Wang,Female,2025-01-03,,2025-05-17,,A-5121


In [6]:
# Filter residents on 5th floor (A-50xx rooms) 
myHall_df = roster_df[
    (roster_df['Bed Space'].str.startswith('A-5')) & 
    (roster_df['Gender'].str.startswith('M'))
]
myHall_df


Unnamed: 0,Bed Space,Term,DKUID,NetID,Given Name,Preferred Name,Family Name,Gender,Move In,Check In,Move Out,Check Out,Rm #
164,A-5001a,2025 Spring,11779,zh178,Ziqiao,Ziqiao,Huang,Male,2025-01-03,,2025-05-17,,A-5001
165,A-5001b,2025 Spring,11801,zl429,Zhuohang,Zhuohang,Liu,Male,2025-01-03,,2025-05-17,,A-5001
166,A-5002a,2025 Spring,10944,gs285,Guangzhi,Guangzhi,Su,Male,2025-01-03,,2025-05-17,,A-5002
167,A-5002b,2025 Spring,10873,jw850,Jiachen,Jiachen,Wu,Male,2025-01-03,,2025-05-17,,A-5002
168,A-5003a,2025 Spring,9461,mk542,Mohamed Sami,Mohamed Sami,Koudir,Male,2025-01-03,,2025-05-17,,A-5003
169,A-5003b,2025 Spring,9891,bc289,Bo,Bo,Chen,Male,2025-01-03,,2025-05-17,,A-5003
170,A-5004a,2025 Spring,9997,qy34,Qianhe,Qianhe,Yin,Male,2025-01-03,,2025-05-17,,A-5004
171,A-5004b,2025 Spring,9944,rx47,Ruicheng,Ruicheng,Xie,Male,2025-01-03,,2025-05-17,,A-5004
172,A-5005a,2025 Spring,10928,wt86,Weibo,Weibo,Tang,Male,2025-01-03,,2025-05-17,,A-5005
173,A-5006a,2025 Spring,9924,ys357,Yuchen,Yuchen,Song,Male,2025-01-03,,2025-05-17,,A-5006


In [7]:
myHall_df.shape[0]

34

In [8]:
# Get list of downloaded cat memes
cat_memes = glob.glob(f"{imgs_path}/*.jpg") + glob.glob(f"{imgs_path}/*.png") + glob.glob(f"{imgs_path}/*.jpeg")

# Number of residents 
n_residents = len(myHall_df)

# Create output directory
os.makedirs('../out', exist_ok=True)

# Create PDF with multiple pages
from matplotlib.backends.backend_pdf import PdfPages

# Randomly shuffle cat memes
random.shuffle(cat_memes)

# Ensure we have enough unique memes
if len(cat_memes) < n_residents:
    print(f"Warning: Only {len(cat_memes)} unique memes available for {n_residents} residents")
    print("Some residents will not receive a meme")
    n_residents = len(cat_memes)  # Limit to available unique memes

pdf_path = os.path.join('../out', f"SP25 CY1.pdf")
with PdfPages(pdf_path) as pdf:
    # Use enumerate to track which meme to use, limited to available unique memes
    for idx, (_, resident) in enumerate(myHall_df.iterrows()):
        if idx >= len(cat_memes):
            break
            
        # Create figure with exact A4 dimensions
        fig = plt.figure(figsize=(8.27, 11.69), dpi=300)
        
        # Set margins for better layout
        left_margin = 0.15
        right_margin = 0.85
        
        # Create main content area for image
        main_ax = fig.add_axes([left_margin, 0.2, right_margin-left_margin, 0.6])
        
        # Use indexed cat meme
        img = Image.open(cat_memes[idx])
        img_array = np.asarray(img)
        main_ax.imshow(img_array, aspect='auto')
        main_ax.axis('off')
        
        # Add decorative frame with double border
        for spine in main_ax.spines.values():
            spine.set_visible(True)
            spine.set_linewidth(3)
            spine.set_color('#1a237e')  # Deep blue
        
        # Add second decorative frame
        frame_ax = fig.add_axes([left_margin-0.02, 0.18, right_margin-left_margin+0.04, 0.64])
        frame_ax.axis('off')
        for spine in frame_ax.spines.values():
            spine.set_visible(True)
            spine.set_linewidth(1)
            spine.set_color('#5c6bc0')  # Lighter blue
            
        # Add resident info with enhanced styling
        name = resident['Preferred Name']
        bedspace = resident['Bed Space']
        
        # Title with name
        title_ax = fig.add_axes([left_margin, 0.85, right_margin-left_margin, 0.1])
        title_ax.axis('off')
        title_ax.text(0.5, 0.5, 
                     name,
                     ha='center',
                     va='center',
                     fontsize=40,
                     fontweight='bold',
                     fontfamily='DejaVu Sans',
                     color='#1a237e')
        
        # Subtitle with bedspace
        subtitle_ax = fig.add_axes([left_margin, 0.8, right_margin-left_margin, 0.05])
        subtitle_ax.axis('off')
        subtitle_ax.text(0.5, 0.5,
                        bedspace,
                        ha='center',
                        va='center',
                        fontsize=20,
                        fontfamily='DejaVu Sans',
                        color='#5c6bc0')
        
        # Add decorative header/footer lines
        for y_pos in [0.95, 0.05]:
            line_ax = fig.add_axes([0.1, y_pos, 0.8, 0.01])
            line_ax.axis('off')
            line_ax.axhline(y=0.5, color='#1a237e', linewidth=3)
            
        # Add subtle gradient background
        gradient = np.linspace(0, 1, 100).reshape(-1, 1)
        gradient = np.tile(gradient, (1, 2))
        bg_ax = fig.add_axes([0, 0, 1, 1], zorder=-1)
        bg_ax.imshow(gradient, cmap='Blues', alpha=0.1, aspect='auto')
        bg_ax.axis('off')
        
        # Save page
        pdf.savefig(fig, 
                   dpi=300,
                   bbox_inches='tight',
                   pad_inches=0.25)
        
        plt.close(fig)
