In [23]:
import os
from gatenlp import Document
from gatenlp.corpora import ListCorpus
import requests
import json
import os
import ipywidgets as widgets
from IPython.display import display, Markdown
from gatenlp.lib_spacy import AnnSpacy
from spacy.tokenizer import Tokenizer
from spacy.lang.en import English
import pandas as pd
from tqdm import tqdm
import ollama
from pydantic import BaseModel

from dotenv import load_dotenv


In [36]:
load_dotenv()

user_email = os.getenv("USEREMAIL")  # Enter your email here
password = os.getenv("PASSWORD")  # Enter your password here

# Fetch Access Token

# Define the URL for the authentication endpoint
auth_url = "http://localhost:8080/api/v1/auths/signin"

# Define the payload with user credentials
auth_payload = json.dumps({"email": user_email, "password": "admin"})

# Define the headers for the authentication request
auth_headers = {"accept": "application/json", "content-type": "application/json"}

# Make the POST request to fetch the access token
auth_response = requests.post(auth_url, data=auth_payload, headers=auth_headers)

# Extract the access token from the response
access_token = auth_response.json().get("token")

In [27]:
class Event(BaseModel):
  event: str
  event_who: str
  event_when: str
  event_what: str
  event_type: str

class EventList(BaseModel):
  events: list[Event]

In [43]:
def askChatbot(model, role, instruction, content):
    chat_url = "http://localhost:11434/api/chat"

    # Define the headers for the chat completion request, including the access token
    chat_headers = {
        "accept": "application/json",
        "content-type": "application/json",
        "Authorization": f"Bearer {access_token}",
    }

    # Define the payload for the chat completion request
    chat_payload = json.dumps(
        {
            "stream": False,
            "model": model,
            "temperature": 0.0,
            "messages": [
                {"role": "system", "content": role},  # System role with additional context
                {"role": "user", "content": f"{instruction}\n\n{content}"},  # User message with instruction and text
            ],
        }
    )

    # Make the POST request to the chat completion endpoint
    chat_response = requests.post(chat_url, data=chat_payload, headers=chat_headers)
    #print(chat_response.json()["message"]["content"])
    structured_response = EventList.model_validate_json(chat_response.json())
    return chat_response

In [40]:
def askChatbotLocal(model, role, instruction, content):
    try:
        response = ollama.chat(
            model = model,
            options = {
                'temperature': 0
            }, 
            format = EventList.model_json_schema(),  # Use Pydantic to generate the schema or format=schema
            messages=
            [
                {"role": "system", "content": role},  # System role with additional context
                {"role": "user", "content": f"{instruction}\n\n{content}"},  # User message with instruction and text
            ]
        )

        chat_response = response['message']['content']
        structured_response = EventList.model_validate_json(response.message.content)
                    
    except Exception as e:
        print(f"Error with model {model}: {str(e)}")

    return chat_response

In [29]:
# Create a new corpus with an empty list
corpus = ListCorpus([])

# Define the base directory
base_dir = "input/annotated"

# Walk through the directory and load each XML file
for root, dirs, files in os.walk(base_dir):
    for file in files:
        if file.endswith(".xml"):
            file_path = os.path.join(root, file)
            doc = Document.load(file_path, fmt="gatexml")
            # Add the document to the corpus
            corpus.append(doc)
            print(f"Loaded {file_path} into corpus")            
                
print("All documents loaded into the corpus.")

Loaded input/annotated/train/CASE OF MURUZHEVA v. RUSSIA.xml into corpus
Loaded input/annotated/train/CASE OF O.C.I. AND OTHERS v. ROMANIA.xml into corpus
Loaded input/annotated/train/CASE OF EGILL EINARSSON v. ICELAND (No. 2).xml into corpus
Loaded input/annotated/train/CASE OF HOINESS v. NORWAY.xml into corpus
Loaded input/annotated/train/CASE OF MOSKALEV v. RUSSIA.xml into corpus
Loaded input/annotated/train/CASE OF RESIN v. RUSSIA.xml into corpus
Loaded input/annotated/train/CASE OF S.V. v. ITALY.xml into corpus
Loaded input/annotated/train/CASE OF YERMAKOVICH v. RUSSIA.xml into corpus
Loaded input/annotated/train/CASE OF S.N. v. RUSSIA.xml into corpus
Loaded input/annotated/train/CASE OF PAKHTUSOV v. RUSSIA.xml into corpus
Loaded input/annotated/train/CASE OF OTGON v. THE REPUBLIC OF MOLDOVA.xml into corpus
Loaded input/annotated/train/CASE OF SHVIDKIYE v. RUSSIA.xml into corpus
Loaded input/annotated/train/CASE OF SIDOROVA v. RUSSIA.xml into corpus
Loaded input/annotated/train/CA

In [None]:
len(corpus)

30

In [None]:
print(doc.features.get("gate.SourceURL").replace("file:/C:/Users/mnavas/", "").replace("%20", " "))

CASE OF CRISTIAN CATALIN UNGUREANU v. ROMANIA.docx


In [None]:
#nlp_spacy = English()
#nlp_spacy.add_pipe('sentencizer')
#tokenize = AnnSpacy(nlp_spacy, add_nounchunks=False, add_deps=False, add_entities=False)

#for doc in corpus:
#    doc = tokenize(doc)
#    doc

In [None]:
from gatenlp.visualization import CorpusViewer

viewer = CorpusViewer(corpus)
viewer.show()

HBox(children=(Button(icon='arrow-left', layout=Layout(width='5em'), style=ButtonStyle()), IntSlider(value=0, …

In [None]:
models = ["gemma3:12b",
          "GandalfBaum/llama3.1/claude3.7",
          "chevalblanc/claude-3-haiku:latest",
          "incept5/llama3.1-claude:latest",
          "llama3.3:latest",
          "deepseek-r1:8b",
          "mistral:latest"
]

event_definitions = """
You are an expert in legal text analysis. Here are the definitions of legal events:
- Event: Relates to the extent of text containing contextual event-related information. 
- Event_who: Corresponds to the subject of the event, which can either be a subject, but also an object (i.e., an application). 
    Examples: applicant, respondent, judge, witness
- Event_what: Corresponds to the main verb reflecting the baseline of all the paragraph. Additionally, we include thereto a complementing verb or object whenever the core verb is not self-explicit or requires an extension to attain a sufficient meaning.
    Examples: lodged an application, decided, ordered, dismissed
- Event_when: Refers to the date of the event, or to any temporal reference thereto.
- Event_circumstance: Meaning that the event correspond to the facts under judgment.
- Event_procedure: The events belongs to the procedural dimension of the case.

Events contain the annotations event_who, event_what and event_when. Events can be of type event_circumstance and event_procedure.
"""

instruction = "Analyze the provided text and extract the legal events. Provide the results in a structured format. Obviously, Event_who, Event_what and Event_when can only appear within an Event. If you find an event, also classify it into an event_circumstance or event_procedure. Do not invent additional information."


In [None]:
viaWeb = False
results = []
# Iterate over documents and models
for doc in tqdm(corpus, desc="Processing documents"):
    doc_dict = {"Document": doc.features.get("gate.SourceURL")}
    print(f"Processing document: {doc.features.get("gate.SourceURL")}")
    
    # Combine all procedure texts for the document
    procedure_texts = []
    annotations = doc.annset("Section")
    procedure_annotations = annotations.with_type("Procedure")
    for ann in procedure_annotations:
        procedure_text = doc.text[ann.start:ann.end]
        procedure_texts.append(procedure_text)
    combined_procedure_text = " ".join(procedure_texts)
    #print(f"Combined procedure text: {combined_procedure_text}")
    
    # Iterate over models
    for model in models:
        try:
            print(f"Using model: {model}")
            
            # Call the chatbot with role, instruction, and content
            if viaWeb == True:
                # via WebUI
                chat_response = askChatbot(model, event_definitions, instruction, combined_procedure_text)
                # Extract and store the response
                response_content = chat_response.json().get("message", {}).get("content", "No response content")
            else:
                # without WebUI
                chat_response = askChatbotLocal(model, event_definitions, instruction, combined_procedure_text)
                response_content = chat_response
            
            print(f"Response from {model}:\n{response_content}")
            doc_dict[model] = response_content
            
        except Exception as e:
            with open("error.txt", "a") as file:
                file.write(f"Error with model {model}: {str(e)}")
            file.close()
    
    # Append the document dictionary to the results list
    results.append(doc_dict)

# Convert results to a DataFrame and save as CSV
df = pd.DataFrame(results)
#df.to_csv("chat_responses_with_instructions.csv", index=False)
df.to_excel("chat_responses_with_instructions.xlsx", index=False)

Processing documents:   0%|          | 0/30 [00:00<?, ?it/s]

Processing document: file:/C:/Users/mnavas/CASE%20OF%20MURUZHEVA%20v.%20RUSSIA.docx
Using model: gemma3:12b


2025-05-16 20:50:13,182|INFO|httpx|HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-16 20:50:13,189|INFO|httpx|HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 400 Bad Request"
Processing documents:   0%|          | 0/30 [00:11<?, ?it/s]

Response from gemma3:12b:
{"events": [
    {
        "event": "The case originated in an application (no. 62526/15) against the Russian Federation lodged with the Court under Article 34 of the Convention for the Protection of Human Rights and Fundamental Freedoms (\"the Convention\") by a Russian national, Ms Leyla Khamarzovna Muruzheva (\"the applicant\")",
        "event_who": "Leyla Khamarzovna Muruzheva (the applicant)",
        "event_when": "11 December 2015",
        "event_what": "lodged an application",
        "event_type": "event_procedure"
    },
    {
        "event": "The applicant was represented by Ms V. Kogan and Mr E. Wesselink from the Stichting Russian Justice Initiative, an NGO based in Moscow. The Russian Government (\"the Government\") were initially represented by Mr G. Matyushkin, Representative of the Russian Federation to the European Court of Human Rights, and then by his successor in that office, Mr M. Galperin.",
        "event_who": "applicant, Russian Go




UnboundLocalError: cannot access local variable 'chat_response' where it is not associated with a value