In [86]:
import dspy
import os
from dotenv import load_dotenv
from pydantic import BaseModel
from typing import List
from datetime import datetime


load_dotenv()

GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')

In [87]:
lm = dspy.LM('gemini/gemini-2.0-flash-exp', api_key=GEMINI_API_KEY)
dspy.configure(lm=lm)

In [88]:
class Entity(BaseModel):
    entity: str
    type: str

class EventEntity(BaseModel):
    entity: str
    type: str
    role: str

class Event(BaseModel):
    action: str
    type: str
    date: str
    location: str
    entities: List[EventEntity]

class ExtractEntities(dspy.Signature):
    """Extract structEntitiesured from text."""

    text: str = dspy.InputField()
    speaker: str = dspy.InputField(desc="the speaker of the text")

    entities: List[Entity] = dspy.OutputField(desc="a list of entities and their metadata")

# class ExtractEvents(dspy.Signature):
#     """Extract a list of events, each containing Event type, date, location and participating entities (if any, along with their role in the specific event) information from text, current date and given entities."""

#     text: str = dspy.InputField()
#     speaker: str = dspy.InputField(desc="the speaker of the text")
#     entities: List[Entity] = dspy.InputField(desc="a list of entities and their metadata")
#     current_date: str = dspy.InputField(desc="the current date to convert relative dates like 'today', 'yesterday', 'tomorrow' to actual dates")
    
#     events: List[Event] = dspy.OutputField(desc="a list of events and their metadata with fields: action(What Happened), type, date (convert relative dates like 'today', 'yesterday', 'tomorrow' to actual dates), location, entities (fetched from input)")

class ExtractEvents(dspy.Signature):
    """Extract a list of relevant events, each containing Event type, date, location and participating entities (if any, along with their role in the specific event) information from text, current date and given entities."""

    text: str = dspy.InputField()
    speaker: str = dspy.InputField(desc="the speaker of the text")
    entities: List[Entity] = dspy.InputField(desc="a list of entities and their metadata")
    current_date: str = dspy.InputField(desc="the current date to convert relative dates like 'today', 'yesterday', 'tomorrow' to actual dates")
    
    events: List[Event] = dspy.OutputField(desc="a list of events being talked about, either happening during the meeting or being referenced to, should NOT include events to happen in the future, and their metadata with fields: action(What Happened), type, date (convert relative dates like 'today', 'yesterday', 'tomorrow' to actual dates), location, entities (fetched from input)")




class KnowledgeExtraction(dspy.Module):
    def __init__(self):
      self.cot1 = dspy.ChainOfThought(ExtractEntities)
      self.cot2 = dspy.ChainOfThought(ExtractEvents)

    def forward(self, text, speaker):
        entities = self.cot1(text=text, speaker=speaker)
        current_date = datetime.now().strftime('%Y-%m-%d')
        events = self.cot2(text=text, speaker=speaker, entities=entities, current_date=current_date)
        return events
    
module = KnowledgeExtraction()

# text = "Apple Inc. announced its latest iPhone 14 today." \
#     "The CEO, Tim Cook, highlighted its new features in a press release."
# response = module(text=text)
# events = response.events

In [89]:
from pymongo import MongoClient

client = MongoClient(os.getenv('MONGODB_URI'))
db = client['renai']
collection = db['events']

In [90]:
import json
# Load utterances_info.json
with open('../utterances_info.json', 'r') as f:
    utterances = json.load(f)

# Process each text and insert responses into MongoDB
all_events = []
for utterance in utterances:
    text = utterance['text']
    speaker = utterance['speaker']
    response = module(text=text, speaker=speaker)
    
    events = response.events
    print(events)
    all_events.extend(events)


[Event(action='students skipping school', type='absence', date='every Friday', location='school', entities=[EventEntity(entity='chronically absent students', type='group', role='absentee'), EventEntity(entity='Fridays', type='time', role='time of absence')])]
[]
[]
[Event(action='students getting sick', type='illness', date='2025-01-18', location="D's office", entities=[EventEntity(entity='students', type='group of people', role='patient'), EventEntity(entity='sniffling', type='symptom', role='symptom'), EventEntity(entity='coughing', type='symptom', role='symptom')])]
[Event(action='John Smith missed seven days of school', type='absence', date='2024-11', location='school', entities=[EventEntity(entity='John Smith', type='person', role='student')])]
[Event(action='John is helping his parents take care of his younger siblings', type='family_responsibility', date='2025-01-18', location='unspecified', entities=[EventEntity(entity='John', type='person', role='caregiver'), EventEntity(entit

In [91]:
for event in all_events:
    print(event)

action='students skipping school' type='absence' date='every Friday' location='school' entities=[EventEntity(entity='chronically absent students', type='group', role='absentee'), EventEntity(entity='Fridays', type='time', role='time of absence')]
action='students getting sick' type='illness' date='2025-01-18' location="D's office" entities=[EventEntity(entity='students', type='group of people', role='patient'), EventEntity(entity='sniffling', type='symptom', role='symptom'), EventEntity(entity='coughing', type='symptom', role='symptom')]
action='John Smith missed seven days of school' type='absence' date='2024-11' location='school' entities=[EventEntity(entity='John Smith', type='person', role='student')]
action='John is helping his parents take care of his younger siblings' type='family_responsibility' date='2025-01-18' location='unspecified' entities=[EventEntity(entity='John', type='person', role='caregiver'), EventEntity(entity='his parents', type='person', role='care recipient'), 

In [92]:
# events_dicts = [event.dict() for event in events]
# collection.insert_many(events_dicts)

In [93]:
# # Convert events to dictionaries and insert into MongoDB
# events_dicts = [event.dict() for event in all_events]
# collection.insert_many(events_dicts)

In [94]:
lm.inspect_history(10)





[34m[2025-01-18T18:21:52.617249][0m

[31mSystem message:[0m

Your input fields are:
1. `text` (str)
2. `speaker` (str): the speaker of the text

Your output fields are:
1. `reasoning` (str)
2. `entities` (list[Entity]): a list of entities and their metadata

All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## text ## ]]
{text}

[[ ## speaker ## ]]
{speaker}

[[ ## reasoning ## ]]
{reasoning}

[[ ## entities ## ]]
{entities}        # note: the value you produce must be pareseable according to the following JSON schema: {"type": "array", "$defs": {"Entity": {"type": "object", "properties": {"type": {"type": "string", "title": "Type"}, "entity": {"type": "string", "title": "Entity"}}, "required": ["entity", "type"], "title": "Entity"}}, "items": {"$ref": "#/$defs/Entity"}}

[[ ## completed ## ]]

In adhering to this structure, your objective is: 
        Extract structEntitiesured from text.


[31mUser message:[0m

[[ ## text #