In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install langchain-openai
!pip install timeout-decorator

import json
import numpy as np

from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
import timeout_decorator
import time

from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
)
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

Collecting langchain-openai
  Downloading langchain_openai-0.3.33-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain-core<1.0.0,>=0.3.76 (from langchain-openai)
  Downloading langchain_core-0.3.76-py3-none-any.whl.metadata (3.7 kB)
Downloading langchain_openai-0.3.33-py3-none-any.whl (74 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.0/75.0 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langchain_core-0.3.76-py3-none-any.whl (447 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m447.5/447.5 kB[0m [31m27.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: langchain-core, langchain-openai
  Attempting uninstall: langchain-core
    Found existing installation: langchain-core 0.3.75
    Uninstalling langchain-core-0.3.75:
      Successfully uninstalled langchain-core-0.3.75
Successfully installed langchain-core-0.3.76 langchain-openai-0.3.33
Collecting timeout-decorator
  Downloading timeout-decorator-0.5.0.tar.

Initialize the model

In [None]:
ISDM_API_KEY = "..." # Your API key here

# Set the environment variables from shell environment
OPENAI_API_KEY = ISDM_API_KEY
OPENAI_CHAT_MODEL = "solidrust/Codestral-22B-v0.1-hf-AWQ"
OPENAI_CHAT_API_URL = "https://isdm-chat.crocc.meso.umontpellier.fr/openai"
model = OPENAI_CHAT_MODEL

llm = ChatOpenAI(
    model=OPENAI_CHAT_MODEL,
    openai_api_key=OPENAI_API_KEY,
    openai_api_base=OPENAI_CHAT_API_URL,
)

from langchain_core.output_parsers import StrOutputParser
parser = StrOutputParser()

Open the data

In [None]:
f = open("drive/MyDrive/data/defi-text-mine-egc-2026/train_v2.jsonl", "r")

text = []
acronyms = []
options = []

for line in f:
  try:
      data = json.loads(line)

      text.append(data["text"])

      acronyms.append(data["acronym"])

      options.append(data["options"])

  except ValueError:
      print('Invalid input:',line)

f.close()

# An LLM based approach

Querry the LLM of UM (an instance of Codestral-22B)

Define the template

In [None]:
template = '''Find what is "{ACRONYM}" in this phrase: "{TEXT}". Evaluate the following options by giving the boolean list as an answer: '{OPTIONS}'. Output only the boolean list. Do not output any explanation nor other text'''

prompt = PromptTemplate.from_template(template)

Define the pipeline

In [None]:
chain = prompt | llm | parser

@timeout_decorator.timeout(10, use_signals=False)  # Set the timeout to 10 seconds
def invoke_with_timeout(ACRONYM,TEXT,OPTIONS):
    return chain.invoke({"ACRONYM": ACRONYM, "TEXT": TEXT, "OPTIONS": OPTIONS})

def prediction(ACRONYM,TEXT,OPTIONS):
    try:
        response = invoke_with_timeout(ACRONYM,TEXT,OPTIONS)  # Try to invoke within the timeout
    except timeout_decorator.timeout_decorator.TimeoutError:
        print(f"Timeout occurred for ACRONYM: {ACRONYM}. Retrying...")
        time.sleep(2)  # Optional sleep time before retrying
        try:
            response = invoke_with_timeout(ACRONYM,TEXT,OPTIONS)  # Retry the operation
        except:
            return 'error'

    # Post-process the result:
    #result = [True if item.strip()=='True' else False for item in s.split('[')[1].split(']')[0].split(',')]

    return response

Function to post-process the results

In [None]:
import re
import ast

def extract_bool_list(text):
    """Return a list of all boolean lists found in `text`.
       Example: ' [ASSISTANT] [False, True]' -> [[False, True]]"""
    text = text.replace('"','').replace("'","") # get rid of all quotes
    candidates = re.findall(r'\[.*?\]', text)  # non-greedy bracket matches
    results = []
    for cand in candidates:
        # normalize lowercase true/false to Python booleans (optional)
        cand_norm = re.sub(r'\btrue\b', 'True', cand, flags=re.IGNORECASE)
        cand_norm = re.sub(r'\bfalse\b', 'False', cand_norm, flags=re.IGNORECASE)

        try:
            val = ast.literal_eval(cand_norm)
        except Exception:
            # not a Python literal (e.g. [ASSISTANT]) -> skip
            continue

        # keep only lists made entirely of booleans
        if isinstance(val, list) and all(isinstance(x, bool) for x in val):
            results.append(val)

    return results[0] if results else None

Perform prediction

In [None]:
responses = []
result_dic = {}
examples_to_process = list(range(len(text)))

flag = False
count = 0
while examples_to_process != []:
  item = examples_to_process.pop(0)
  if count % 10 == 0:
    print(len(examples_to_process))
  # Querry the LLM:
  result = prediction(acronyms[item],text[item],str([item for item in options[item].keys()]))
  # Post-process result:
  #y_pred_tmp = [True if item.strip().replace('"','').replace("'","").lower()=='true' else False for item in result.split('[')[1].split(']')[0].split(',')]
  labels_pred = extract_bool_list(result)
  if labels_pred != None:
    if len(labels_pred) == len(options[item]):
      result_dic[item] = labels_pred
    else:
      flag = True
  else:
    flag = True

  if flag:
    examples_to_process.append(item)
    flag = False
  count+=1

491
486
479
472
466
458
453
449
442
437
430
Timeout occurred for ACRONYM: EM. Retrying...
425
418
412
407
397
390
384
378
372
366
360
354
349
345
339
331
323
318
313
304
296
289
284
275
268
262
255
250
244
238
232
227
218
210
202
193
187
181
172
166
161
156
152
Timeout occurred for ACRONYM: SE. Retrying...
149
Timeout occurred for ACRONYM: EF. Retrying...
145
139
133
126
122
117
112
105
Timeout occurred for ACRONYM: TT. Retrying...
103
96
91
85
81
79
73
69
66
60
55
52
49
47
45
42
39
36
36
33
32
30
29
28
26
22
20
20
19
18
17
15
Timeout occurred for ACRONYM: EP. Retrying...
14
13
12
11
11
10
10
10
9
8
7
6
6
Timeout occurred for ACRONYM: EP. Retrying...
5
5
4
4
4
3
2
2
2
2
2
2
2
2
2
2
2
2
2
1
1
Timeout occurred for ACRONYM: EP. Retrying...
Timeout occurred for ACRONYM: EP. Retrying...
1
1
1
Timeout occurred for ACRONYM: EP. Retrying...
1
1
1
1
1
1
1
1
1
1
1
Timeout occurred for ACRONYM: EP. Retrying...
1
Timeout occurred for ACRONYM: EP. Retrying...
1
1
1
1
Timeout occurred for ACRONYM: E

Predicted labels

In [None]:
result_dic

{0: [False, True, False, False],
 1: [True, False, False, False, False, False, True],
 2: [False, False, False, True, False, False],
 3: [False, False, False, False, False, False, False, False, True, False],
 7: [True, False],
 11: [True, False, False, False],
 12: [True, False],
 13: [False, False, False, False, False, True],
 14: [True, False],
 15: [False, False, False, True],
 16: [False, False, False, False],
 17: [True, False, False],
 20: [False, False, False, False, False, True, False, False, False, False],
 21: [False, True, False],
 22: [False, True, False, False, False, False],
 23: [False, True],
 24: [False, True, False, False, False, False],
 27: [False, True, False, False, False, False],
 28: [False, False, False],
 30: [False, False, False, False, True, False],
 32: [False, False, False, False, False, False, True, False, False],
 33: [False, False, True],
 34: [True, False, False],
 36: [True, False],
 38: [False, False, False, False, False, False, False, True],
 41: [T

# Evaluation

In [None]:
from sklearn.metrics import f1_score

f1_list = []

for i in range(len(acronyms)):
  y_pred = result_dic[i]
  y_true = list(options[i].values())
  f1_list.append(f1_score(y_true,y_pred,average='binary'))

print(np.mean(f1_list))

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


0.47459349593495936


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


My run resulted in F1=0.47459349593495936, which is lower than a baseline method based on semantic similarity.