In [20]:
import dspy
import json
import os
import spacy
from dotenv import load_dotenv
from pydantic import BaseModel
from typing import List
from datetime import datetime
from pymongo import MongoClient



load_dotenv()

GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')
MONGODB_URI = os.getenv('MONGODB_URI')


In [21]:
lm = dspy.LM('gemini/gemini-2.0-flash-exp', api_key=GEMINI_API_KEY)
dspy.configure(lm=lm)

In [22]:
# MongoDB Atlas connection
client = MongoClient(MONGODB_URI)
db = client['renai']
collection = db['events']

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

In [23]:
class Entity(BaseModel):
    entity: str
    type: str

class EventEntity(BaseModel):
    entity: str
    type: str
    role: str

class Event(BaseModel):
    action: str
    type: str
    date: str
    location: str
    entities: List[EventEntity]

In [24]:
class ExtractEvents(dspy.Signature):
    """Extract a list of relevant events, each containing Event type, date, location and participating entities (if any, along with their role in the specific event) information from text, current date and given entities."""

    text: str = dspy.InputField()
    speaker: str = dspy.InputField(desc="the speaker of the text")
    entities: List[Entity] = dspy.InputField(desc="a list of entities and their metadata")
    current_date: str = dspy.InputField(desc="the current date to convert relative dates like 'today', 'yesterday', 'tomorrow' to actual dates")
    
    events: List[Event] = dspy.OutputField(desc="a list of events being talked about, either happening during the meeting or being referenced to, should NOT include events to happen in the future, and their metadata with fields: action(What Happened), type, date (convert relative dates like 'today', 'yesterday', 'tomorrow' to actual dates), location, entities (fetched from input)")


In [25]:
class KnowledgeExtraction(dspy.Module):
    def __init__(self):
        self.cot2 = dspy.ChainOfThought(ExtractEvents)

    def normalize_text(self, text):
        # Normalize text to title case
        return text.title()

    def extract_entities(self, text):
        doc = nlp(text)
        entities = []
        for ent in doc.ents:
            normalized_entity = self.normalize_text(ent.text)
            entities.append(Entity(entity=normalized_entity, type=ent.label_))
        return entities

    def forward(self, text, speaker):
        entities = self.extract_entities(text)
        current_date = datetime.now().strftime('%Y-%m-%d')
        events = self.cot2(text=text, speaker=speaker, entities=entities, current_date=current_date)
        return events

In [26]:
module = KnowledgeExtraction()

In [30]:
# Load utterances_info.json
with open('../utterances_info.json', 'r') as f:
    utterances = json.load(f)

# Process each text and insert responses into MongoDB
all_events = []
for utterance in utterances:
    text = utterance['text']
    speaker = utterance['speaker']
    response = module(text=text, speaker=speaker)

    events = response.events
    print(events)
    all_events.extend(events)


[Event(action='students skipping school', type='Absence', date='2025-01-17', location='school', entities=[EventEntity(entity='Fridays', type='DATE', role='time')])]
[]
[]
[Event(action='students getting sick with symptoms like sniffling and coughing', type='illness_outbreak', date='2025-01-18', location="speaker's office", entities=[])]
[Event(action='missed school', type='absence', date='2024-11', location='school', entities=[EventEntity(entity='John Smith', type='PERSON', role='student')])]
[Event(action='John was stressed and dealing with helping his parents take care of his younger siblings', type='Personal', date='2025-01-18', location='Unknown', entities=[EventEntity(entity='John', type='PERSON', role='Person experiencing stress')])]
[Event(action='talk to John', type='Meeting', date='2025-01-18', location="B's office", entities=[EventEntity(entity='John', type='PERSON', role='attendee')])]
[Event(action='discussed ideas', type='meeting', date='2025-01-18', location='unknown', en

In [28]:
# # Convert events to dictionaries and insert into MongoDB
# events_dicts = [event.dict() for event in all_events]
# collection.insert_many(events_dicts)

In [29]:
lm.inspect_history(10)





[34m[2025-01-18T21:09:05.825905][0m

[31mSystem message:[0m

Your input fields are:
1. `text` (str)
2. `speaker` (str): the speaker of the text
3. `entities` (list[Entity]): a list of entities and their metadata
4. `current_date` (str): the current date to convert relative dates like 'today', 'yesterday', 'tomorrow' to actual dates

Your output fields are:
1. `reasoning` (str)
2. `events` (list[Event]): a list of events being talked about, either happening during the meeting or being referenced to, should NOT include events to happen in the future, and their metadata with fields: action(What Happened), type, date (convert relative dates like 'today', 'yesterday', 'tomorrow' to actual dates), location, entities (fetched from input)

All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## text ## ]]
{text}

[[ ## speaker ## ]]
{speaker}

[[ ## entities ## ]]
{entities}

[[ ## current_date ## ]]
{current_date}

[[ ## reasoning ## ]]
