In [1]:
from langchain.llms import Ollama
import json
import re
from bs4 import BeautifulSoup
from elqm.data import get_raw_data

In [2]:
# Create an instance of the Ollama class
ollama = Ollama(
    base_url="http://localhost:11434",
    model="dolphin2.2-mistral",
    verbose=True,
    stop=["<|im_end|>"]
)


## Test the Ollama class

In [3]:
# Stream tokens from the model
for token in ollama.stream("Say something nice!"):
    print(token, end="")

 You are amazing and I'm so glad to be here to support you. Remember that you are capable of great things, and don't hesitate to ask for help when needed. Stay positive and keep shining!

## Ask question with single context

In [4]:
data = get_raw_data()
len(data)

508

In [5]:
html = list(data.values())[0]['html']

# Remove the HTML tags and only keep the text
soup = BeautifulSoup(html, "html.parser")
text = soup.get_text()

# Reduce multiple linebreaks to a single linebreak
text = re.sub(r"\n+", "\n", text)

In [6]:
print(text)


EUR-Lex - 32001Y0123(02) - EN
Avis juridique important
|
32001Y0123(02)
Commission opinion of 20 December 2000 concerning the plan for the disposal of radioactive waste from the commissioning of the liquid metal disposal plant (LMDP) and the waste receipt, assay, characterisation and supercompaction facility (WRACS) located on the Dounreay nuclear site in Scotland (United Kingdom), in accordance with Article 37 of the Euratom Treaty  
Official Journal C 020 , 23/01/2001 P. 0004 - 0004 
Commission opinionof 20 December 2000concerning the plan for the disposal of radioactive waste from the commissioning of the liquid metal disposal plant (LMDP) and the waste receipt, assay, characterisation and supercompaction facility (WRACS) located on the Dounreay nuclear site in Scotland (United Kingdom), in accordance with Article 37 of the Euratom Treaty(2001/C 20/03)(Only the English text is authentic)On 8 June 2000 the European Commission received from the United Kingdom Government, in accordanc

In [7]:
question = "What does the commission think aboutthe disposal plans of radioactive waste?"

context = text

In [8]:
prompt = f"Question: {question}\nContext: {context}\nAnswer:"

In [9]:
# Stream tokens from the model
for token in ollama.stream(prompt):
    print(token, end="")

 The European Commission, in accordance with Article 37 of the Euratom Treaty, assessed the plan for the disposal of radioactive waste from the commissioning of the liquid metal disposal plant (LMDP) and the waste receipt, assay, characterisation and supercompaction facility (WRACS) located on the Dounreay nuclear site in Scotland (United Kingdom).

The Commission concluded that:
1. The distance between the plant and the nearest point of another Member State, Denmark (Faeroe Islands), is approximately 370 km.
2. Under normal operating conditions, the discharges of liquid and gaseous effluents will not cause an exposure of the population in other Member States that is significant from the point of view of health.
3. Solid low and intermediate level radioactive waste arising from the operations of the LMDP and WRACS will be stored on-site, with off-site movement of waste not currently envisaged.
4. In the event of unplanned discharges of radioactive waste, which may follow an accident on

## Ask question with retrieved documents

In [10]:
import numpy as np
import json

from yake import KeywordExtractor
from transformers.pipelines import AggregationStrategy
from transformers import (
    TokenClassificationPipeline,
    AutoModelForTokenClassification,
    AutoTokenizer,
)

from bs4 import BeautifulSoup

from elqm.retrieval import retrieve
from elqm.backend.utils import get_es_connection

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
# Define keyphrase extraction pipeline
class KeyphraseExtractionPipeline(TokenClassificationPipeline):
    def __init__(self, model, *args, **kwargs):
        super().__init__(
            model=AutoModelForTokenClassification.from_pretrained(model),
            tokenizer=AutoTokenizer.from_pretrained(model),
            *args,
            **kwargs
        )

    def postprocess(self, all_outputs):
        results = super().postprocess(
            all_outputs=all_outputs,
            aggregation_strategy=AggregationStrategy.SIMPLE,
        )
        return np.unique([result.get("word").strip() for result in results])


In [12]:
# Load pipeline
model_name = "ml6team/keyphrase-extraction-kbir-inspec"
extractor = KeyphraseExtractionPipeline(model=model_name)

In [13]:
es = get_es_connection()

In [14]:
question = "What does the commission think aboutthe disposal plans of radioactive waste?" 

In [15]:
hits = retrieve(question, extractor, KeywordExtractor(n=2, top=5), es)
# Print the scores
for hit in hits:
    print(hit["_score"])

16.260082
15.650679
15.620362
15.422218
15.364843
15.275118
15.10446
14.259675
13.244931
13.128133


In [16]:
processed_texts = []
for hit in hits:
    bs = BeautifulSoup(hit["_source"]["text"], "html.parser")
    text = bs.get_text()
    text = re.sub(r"\n+", "\n", text)
    processed_texts.append(text)

In [17]:
# Combine the texts into a single string
context = "\n\n".join(processed_texts)

In [18]:
prompt = f"Context: {context}\nQuestion: {question}\nAnswer:"

In [19]:
# Stream tokens from the model
for token in ollama.stream(prompt):
    print(token, end="")

 The European Commission's opinion on the disposal plan for radioactive waste arising from the dismantling of the Trawsfynydd Power Station in the United Kingdom, in accordance with Article 37 of the Euratom Treaty, states that:

1. Under normal operating conditions, the discharges of liquid and gaseous effluents will not cause significant exposure to the population in other Member States from a health perspective.
2. Solid low and intermediate level radioactive waste will be stored on-site, with off-site movement of waste not currently envisaged.
3. In case of unplanned discharges following an accident, the doses likely to be received by the population in other Member States would not be significant from a health perspective.

Consequently, the Commission is of the opinion that the implementation of the plan for the disposal of radioactive waste is not liable to result in radioactive contamination, significant from the point of view of health, of the water, soil or airspace of another