In [106]:
import json
import pandas as pd
from datetime import datetime, timezone
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load JSON data
with open('data/events.json', 'r') as f:
    events = json.load(f)

with open('data/registrations.json', 'r') as f:
    registrations = json.load(f)

with open('data/users.json', 'r') as f:
    users = json.load(f)

In [None]:
def get_user_past_events(user_id):
    """Returns all events the user has registered for, regardless of status"""
    past_event_ids = [r['eventId'] for r in registrations if r['userId'] == user_id]
    return [e for e in events if e['_id'] in past_event_ids]

def filter_candidate_events(user_id, include_attended_events=True):

    current_time = datetime.now(timezone.utc)
    candidates = []
    

    registered_event_ids = {r['eventId'] for r in registrations if r['userId'] == user_id}
    
    for event in events:
        # Parse event times (timezone-aware)
        end_time = datetime.fromisoformat(event['endTime'].replace('+00:00', '+00:00')).replace(tzinfo=timezone.utc)
        is_past = end_time <= current_time
        is_registered = event['_id'] in registered_event_ids
        
        
        if is_registered:
    
            include_event = include_attended_events and is_past
        else:
            include_event = (event['status'] == 'live') and (not is_past or include_attended_events)
        if include_event:
            event_copy = event.copy()
            event_copy.update({
                'is_past': is_past,
                'is_registered': is_registered,
                'user_attended': is_registered and is_past
            })
            candidates.append(event_copy)
    
    return candidates

In [None]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import OneHotEncoder
import numpy as np

def extract_features_for_clustering(events):
    # Text features (TF-IDF)
    descriptions = [e["description"] for e in events]
    tfidf = TfidfVectorizer(max_features=50, stop_words="english")  
    tfidf_features = tfidf.fit_transform(descriptions).toarray()  
    
    event_types = [e["eventType"] for e in events]
    organizers = [e["organizerId"] for e in events]
    encoder = OneHotEncoder()
    
    combined_categorical = list(zip(event_types, organizers))
    categorical_features = encoder.fit_transform(combined_categorical).toarray()
    
    # Numeric features (Days until start)
    current_time = datetime.now(timezone.utc)
    days_until_start = [
        (datetime.fromisoformat(e["startTime"].replace("+00:00", "+00:00")) - current_time).days
        for e in events
    ]
    days_features = np.array(days_until_start).reshape(-1, 1)  # Shape: (n_events, 1)
    
    
    features = np.hstack([tfidf_features, categorical_features, days_features])
    return features

def cluster(events, n_clusters=5):
    descriptions = [e["description"] for e in events]
    tfidf = TfidfVectorizer(max_features=50, stop_words="english")
    tfidf_features = tfidf.fit_transform(descriptions).toarray()
    
    event_types = [e["eventType"] for e in events]
    organizers = [e["organizerId"] for e in events]
    encoder = OneHotEncoder(handle_unknown='ignore')
    categorical_features = encoder.fit_transform(np.column_stack([event_types, organizers])).toarray()
    

    features = np.hstack([tfidf_features, categorical_features])
    
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    clusters = kmeans.fit_predict(features)
    for i, event in enumerate(events):
        event["cluster"] = int(clusters[i])
    return events

In [None]:
def recommend_events(user_id,events, top_n=3):
    clustered_events = cluster(events)
    past_events = get_user_past_events(user_id)
    user_clusters = list(set([e["cluster"] for e in past_events])) if past_events else []
    
    candidate_events = filter_candidate_events(user_id)
    

    if candidate_events:
        event_descriptions = [e["description"] for e in candidate_events]
        past_descriptions = [e["description"] for e in past_events] if past_events else [""]
        
        tfidf = TfidfVectorizer(stop_words="english")
        tfidf_matrix = tfidf.fit_transform(event_descriptions + past_descriptions)
        candidate_vectors = tfidf_matrix[:len(candidate_events)]
        past_vectors = tfidf_matrix[len(candidate_events):]
        
        similarity_scores = cosine_similarity(past_vectors, candidate_vectors).mean(axis=0) if past_events else [0.5]*len(candidate_events)
        
        scored_events = []
        for idx, event in enumerate(candidate_events):
            cluster_score = 1 if user_clusters and event["cluster"] in user_clusters else 0.5
            type_score = 1 if past_events and event["eventType"] in [e["eventType"] for e in past_events] else 0.5
            
            start_time = datetime.fromisoformat(event["startTime"].replace("+00:00", "+00:00")).replace(tzinfo=timezone.utc)
            days_until_start = (start_time - datetime.now(timezone.utc)).days
            time_score = 1 / (1 + abs(days_until_start))
            
            score = (0.4 * similarity_scores[idx] + 
                    0.3 * cluster_score + 
                    0.2 * type_score + 
                    0.1 * time_score)
            
            scored_events.append((event, score))

        print(scored_events[0][1])

        scored_events.sort(key=lambda x: x[1], reverse=True)
        recommendations = [event for event, _ in scored_events[:top_n]]
        if recommendations:
            return recommendations
    
    event_popularity = defaultdict(int)
    for reg in registrations:
        event_popularity[reg["eventId"]] += 1
    
    user_registered_ids = {r["eventId"] for r in registrations if r["userId"] == user_id}
    
    if user_clusters:
        cluster_events = [e for e in clustered_events 
                         if e["cluster"] in user_clusters
                         and e["_id"] not in user_registered_ids]
        cluster_events.sort(key=lambda x: -event_popularity[x["_id"]])
        if cluster_events:
            return cluster_events[:top_n]
    
    all_events = [e for e in clustered_events if e["_id"] not in user_registered_ids]
    all_events.sort(key=lambda x: -event_popularity[x["_id"]])
    return all_events[:top_n]

In [125]:
user_id = "681e535ec99231a537cc9c88"  # Dr. Beth Koch
recommended_events = recommend_events(user_id, events, top_n=3)

print("Recommended Events:")
for event in recommended_events:
    print(f"- {event['title']} ({event['eventType']}, starts {event['startTime']})")

0.3569191076890392
Recommended Events:
- Alvarez, Foster and Walker (concert, starts 2025-04-20T00:00:00.000Z)
- Espinoza-Lane (virtual, starts 2025-04-14T00:00:00.000Z)
- Jackson LLC (marathon, starts 2025-01-06T00:00:00.000Z)


In [111]:
from collections import defaultdict

def recommend_events(user_id, top_n=3):
    # Step 1: Try to recommend live/upcoming events first
    past_events = get_user_past_events(user_id)
    candidate_events = filter_candidate_events(user_id)  # Live/upcoming events
    
    if candidate_events:
        # Content-based filtering (same as before)
        event_descriptions = [e['description'] for e in candidate_events]
        past_descriptions = [e['description'] for e in past_events]
        
        vectorizer = TfidfVectorizer(stop_words='english')
        tfidf_matrix = vectorizer.fit_transform(event_descriptions + past_descriptions)
        candidate_vectors = tfidf_matrix[:len(candidate_events)]
        past_vectors = tfidf_matrix[len(candidate_events):]
        
        similarity_scores = cosine_similarity(past_vectors, candidate_vectors).mean(axis=0)
        
        scored_events = []
        for idx, event in enumerate(candidate_events):
            start_time = datetime.fromisoformat(event['startTime'].replace('+00:00', '+00:00')).replace(tzinfo=timezone.utc)
            days_until_start = (start_time - datetime.now(timezone.utc)).days
            
            score = (
                0.5 * similarity_scores[idx] +  # Text similarity
                0.3 * (1 if event['eventType'] in [e['eventType'] for e in past_events] else 0) +  # Type match
                0.2 * (1 / (1 + days_until_start))  # Time relevance
            )
            scored_events.append((event, score))
        
        scored_events.sort(key=lambda x: x[1], reverse=True)
        recommended = [event for event, _ in scored_events[:top_n]]
        
        if recommended:  # Return if we found matches
            return recommended
    
    # Step 2: Fallback to ANY popular events (including past ones)
    event_popularity = defaultdict(int)
    for reg in registrations:
        event_popularity[reg['eventId']] += 1
    
    # Sort all events by popularity (regardless of status/time)
    all_events_sorted = sorted(
        events,
        key=lambda x: (-event_popularity[x['_id']], x['title']), 
    )
    
    # Exclude events the user already registered for
    user_registered_ids = {r['eventId'] for r in registrations if r['userId'] == user_id}
    fallback_events = [
        e for e in all_events_sorted
        if e['_id'] not in user_registered_ids
    ]
    
    return fallback_events[:top_n]  # Return top-N popular events

In [1]:
import time
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
from langchain_groq import ChatGroq
from threading import Thread

# === Setup Groq LLM ===
llm = ChatGroq(temperature=0.2, model_name="llama-3.3-70b-versatile")

# === Memory for storing messages ===
memory = ConversationBufferMemory()

# === LangChain conversation chain ===
conversation = ConversationChain(llm=llm, memory=memory, verbose=False)

# === Simulated live chat stream ===
chat_stream = [
    "Hey, did anyone watch the keynote?",
    "Yes! The AI section was amazing!",
    "I liked the demo on real-time voice translation.",
    "What was the name of the presenter again?",
    "I think it was Dr. Lisa Wong.",
    "Yeah, and she mentioned something about a new open-source project.",
    "Exactly, it’s called WhisperX!"
]

# === Function to simulate chat ingestion ===
def stream_chat():
    for msg in chat_stream:
        memory.chat_memory.add_user_message(msg)
        print(f"New Message: {msg}")
        time.sleep(2)  # Simulate delay between messages

# === Function to summarize chat every few seconds ===
def summarize_chat():
    while True:
        summary_prompt = "Summarize the current chat so far in 2-3 sentences."
        summary = conversation.run(summary_prompt)
        print("\n--- Live Summary ---")
        print(summary)
        print("--------------------\n")
        time.sleep(6)  # Summarize every 6 seconds

# === Start threads for streaming and summarizing ===
chat_thread = Thread(target=stream_chat)
summary_thread = Thread(target=summarize_chat)

chat_thread.start()
summary_thread.start()

chat_thread.join()
summary_thread.join()


  memory = ConversationBufferMemory()
  conversation = ConversationChain(llm=llm, memory=memory, verbose=False)
  summary = conversation.run(summary_prompt)


New Message: Hey, did anyone watch the keynote?

--- Live Summary ---
We've just started our conversation, and so far, you've asked if anyone watched the keynote, but we haven't discussed any details about it yet. You then asked me to summarize our current chat, which I'm happy to do. Our conversation is still in its early stages, and we're just getting started.
--------------------

New Message: Yes! The AI section was amazing!


KeyboardInterrupt: 

New Message: I liked the demo on real-time voice translation.
New Message: What was the name of the presenter again?

--- Live Summary ---
We've been discussing the keynote, specifically the AI section, which you thought was amazing. You mentioned enjoying the demo on real-time voice translation, but unfortunately, I don't have information on who the presenter was, as that detail wasn't mentioned in our conversation. Our chat has been focused on your positive experience with the keynote's AI segment, but we haven't explored other topics yet.
--------------------

New Message: I think it was Dr. Lisa Wong.


Exception in thread Exception in threading.excepthook:
Exception ignored in thread started by: <bound method Thread._bootstrap of <Thread(Thread-4 (stream_chat), stopped 135876694857408)>>
Traceback (most recent call last):
  File "/home/aldrax/anaconda3/lib/python3.12/threading.py", line 1030, in _bootstrap
    self._bootstrap_inner()
  File "/home/aldrax/anaconda3/lib/python3.12/threading.py", line 1075, in _bootstrap_inner
    self._invoke_excepthook(self)
  File "/home/aldrax/anaconda3/lib/python3.12/threading.py", line 1389, in invoke_excepthook
    local_print("Exception in threading.excepthook:",
  File "/home/aldrax/anaconda3/lib/python3.12/site-packages/ipykernel/iostream.py", line 573, in flush
    self.pub_thread.schedule(self._flush)
  File "/home/aldrax/anaconda3/lib/python3.12/site-packages/ipykernel/iostream.py", line 266, in schedule
    self._event_pipe.send(b"")
  File "/home/aldrax/anaconda3/lib/python3.12/site-packages/zmq/sugar/socket.py", line 696, in send
    ret

New Message: Yeah, and she mentioned something about a new open-source project.
--- Live Summary ---
We've been discussing the keynote, particularly the AI section, which you found amazing and featured a demo on real-time voice translation that you enjoyed. You recalled that the presenter was Dr. Lisa Wong, and she mentioned a new open-source project, although we didn't delve into the specifics of the project. Our conversation has centered around your positive experience with the keynote's AI segment and the details you remembered about Dr. Wong's presentation.
--------------------


--- Live Summary ---
We've been discussing the keynote, specifically the AI section presented by Dr. Lisa Wong, which you thought was amazing and featured a notable demo on real-time voice translation. Dr. Wong also mentioned a new open-source project during her presentation, although we haven't discussed the details of the project. Our conversation has been focused on your experience with the keynote's AI

In [3]:
llm.invoke("hello").content

'Hello. How can I help you today?'

In [10]:
import json
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_groq import ChatGroq

# === Load chat history from JSON and filter by eventId ===
def load_chat_messages(filepath: str, event_id: str) -> str:
    with open(filepath, "r", encoding="utf-8") as f:
        data = json.load(f)

    # Filter messages by eventId
    messages = [entry["message"] for entry in data if entry["eventId"] == event_id]

    # Combine into a single string
    return "\n".join(messages)

# === Setup LLM ===
llm = ChatGroq(temperature=0.3, model_name="llama3-70b-8192")

prompt = PromptTemplate(
    input_variables=["chat"],
    template="""
You are an assistant summarizing an event's chat history. Write a 2–3 paragraph summary that captures key ideas, names, and discussions.

Chat:
{chat}

Summary:
"""
)

chain = LLMChain(llm=llm, prompt=prompt)

# === Run summarization ===
event_id = "681e535fc99231a537cc9c90"  # Replace with your target eventId
chat_text = load_chat_messages("chat_history.json", event_id)
summary = chain.run(chat=chat_text)

print("\n=== Summary for Event ID", event_id, "===\n")
print(summary)



=== Summary for Event ID 681e535fc99231a537cc9c90 ===

Here is a summary of the chat:

The chat discussion revolved around the concept of change" and its potential benefits. The conversation started with a mention of a "computer inside" which seemed to be related to an energy project. The project's goal was to bring about positive impact, with one of the key benefits being the project could lead to personal growth and development for the individuals involved.

The chat also touched on the idea that this project had the potential to bring about significant change, with one participant mentioning that the project could be a "game-changer". Although the details of the project were not fully fleshed out, the participants were enthusiastic about its potential to drive positive change.

Overall, the chat was optimistic in tone, with participants expressing excitement about the project's potential to make a meaningful impact. Unfortunately, the chat did not delve deeper into the specifics of