## Step 5: Finalize Research Digest

In [None]:
import sys
sys.path.append('../')

import os

from dotenv import load_dotenv
load_dotenv()

import pandas as pd

from data import ContentManager
manager = ContentManager(base_path="../data")

from components.email.gmail_sender import GmailSender
sender = GmailSender(
    credentials_path="../gmail_client_secret.json",
    token_path="../gmail_token.json"
)

INFO:data.content_manager:Loaded content index with 2633 entries
INFO:googleapiclient.discovery_cache:file_cache is only supported with oauth2client<4.0.0


âœ“ Gmail API authenticated successfully


In [None]:
df = pd.read_csv("../data/research_items.csv")

In [None]:
# Filter for recently curated items
base_date = "2025-12-28"
base_time = "20:00"  # Optional time filter (HH:MM)

# Combine date and time into a datetime string
cutoff_datetime = f"{base_date}T{base_time}:00"

# Filter for items curated after the cutoff
df_recent = df[
    (df['curated_at'] > cutoff_datetime)
    &
    (~df['curator_summary'].str.startswith("ERROR", na=False))
    &
    (df['priority_score'] >= 8.0)
].copy()

# Sort by sum of scores (descending)
df_recent['total_score'] = (
    df_recent['applicability_score'] + 
    df_recent['novelty_score'] + 
    df_recent['priority_score']
)
df_recent = df_recent.sort_values('total_score', ascending=False).drop(columns=['total_score'])

print(f"Filtered {len(df_recent)} items from {len(df)}  ")

Filtered 43 items from 4355  


### Format for email

In [None]:
def format_top_items(df, n=10):
    """
    Format top N items as a numbered list with full details.
    
    Args:
        df: DataFrame sorted by priority (highest first)
        n: Number of top items to format (default: 10)
    
    Returns:
        str: Markdown-formatted list of top items
    """
    items = []
    
    for idx, (_, row) in enumerate(df.head(n).iterrows(), 1):
        # Format published date (handle different formats)
        published = str(row['published']) if pd.notna(row['published']) else 'N/A'
        
        item = f"""**{idx}. {row['title']}**

**URL:** {row['url']}  
**Tags:** {row['curator_tags']}  
**Source:** {row['source']} - {published}

**Takeaways:**  
{row['curator_takeaways']}

**Reason to read:**  
{row['verdict_reasoning']}

---
"""
        items.append(item)
    
    return '\n'.join(items)


def format_remaining_table(df, skip_top=10):
    """
    Format remaining items as a markdown table.
    
    Args:
        df: DataFrame sorted by priority
        skip_top: Number of top items to skip (default: 10)
    
    Returns:
        str: Markdown-formatted table
    """
    if len(df) <= skip_top:
        return ""
    
    # Select rows after top N
    df_table = df.iloc[skip_top:].copy()
    
    # Select and rename columns for cleaner display
    cols = ['title', 'url', 'source', 'published', 'curator_tags', 
            'applicability_score', 'novelty_score', 'priority_score']
    df_display = df_table[cols].copy()
    
    # Truncate long text fields for table readability
    df_display['url'] = df_display['url'].apply(lambda x: f'[Link]({x})')
    
    # Convert to markdown table
    markdown_table = df_display.to_markdown(index=False)
    
    return f"\n\n## Additional Items\n\n{markdown_table}"

Format

In [None]:
# Format the digest
top_section = format_top_items(df_recent, n=10)
remaining_section = format_remaining_table(df_recent, skip_top=10)

# Combine into full digest
digest_body = f"""# ðŸ”¬ Research Digest
**Date:** {pd.Timestamp.now().strftime('%B %d, %Y')}

---

## ðŸŒŸ Top 10 Highlights

{top_section}

{remaining_section}
"""

In [None]:
# Send the digest
sender.send_email(
    to=os.getenv("EMAIL_TO"),
    subject=f"Research Digest - {pd.Timestamp.now().strftime('%B %d, %Y')}",
    content=digest_body,
    markdown_mode=True
)