# v2 Evaluation

1. Quantitative TOGLIENDOLI DAI RETRIEVED
    1. "Quale è la definizione di \"{term}\" nella Legislazione Italiana?".format(term=item['term']) x 4000+ di **normattiva** dal vectorstore. 
    2. "What's the definition of \"{term}\" in the European Legislation?".format(term=item['term']) x Lexdrafter 1330 escludendo. 

2. Qualitative
    1. "Quale è la definizione di \"{term}\" nella Legislazione Italiana?".format(term=item['term']) x quanti termini tradotti da salvo
    2. "What's the definition of \"{term}\" in the European Legislation?".format(term=item['term']) x

    se ci sono in entrambi i dataset li si esclude da entrambi

---

## Quantitative

In [None]:
import polars as pl

In [2]:
defs = pl.read_csv("../../data/definitions_corpus/definitions.csv")

definendums_list = (defs.select(
    pl.col('label')
    .str.replace('#', '')
    .str.replace(r'([a-zà-ÿ])([A-Z])', r'${1} ${2}', n=-1)
    .str.to_lowercase()
)
)['label'].to_list()

print(definendums_list)



### LexDrafter

In [3]:
lexdrafter_df = pl.read_json('../datasets/lexdrafter/corpus.json', schema={"celex_id": pl.String, "term": pl.String, "term_definition": pl.String})

lexdrafter_df

celex_id,term,term_definition
str,str,str
"""32019R2014""","""mains""","""'mains' means the electricity …"
"""32019R2014""","""electric mains""","""'electric mains' means the ele…"
"""32019R2014""","""automatic washing machine""","""'automatic washing machine' me…"
"""32019R2014""","""household washing machine""","""'household washing machine' me…"
"""32019R2014""","""household washer-dryer""","""'household washer-dryer' means…"
…,…,…
"""31998D0181""","""GATT""","""'GATT' means ‘GATT 1947’ or ‘G…"
"""31998D0181""","""Intellectual property""","""'Intellectual property' includ…"
"""31998D0181""","""Energy Charter Protocol""","""'Energy Charter Protocol' mean…"
"""31998D0181""","""Protocol""","""'Protocol' means a treaty, the…"


In [6]:
from rapidfuzz import process, fuzz

lexdrafter_retrieval = []
lexdrafter_generation = []

for entry in lexdrafter_df.rename({'celex_id': 'CELEX_ID', 'term': 'Term', 'term_definition': 'original_definition'}).to_dicts():
    x = process.extract(entry['Term'].lower(), definendums_list, scorer=fuzz.ratio, limit=1, score_cutoff=95)
    if x: 
        lexdrafter_retrieval.append(entry)
    else:
        lexdrafter_generation.append(entry)

print(len(lexdrafter_retrieval), len(lexdrafter_generation))

707 623


In [9]:
from LegalDefAgent.src.agents import definitions_agent_eval
import uuid
from pathlib import Path
import json
import ast
import time
import logging

async def get_definition(question, model):
    inputs = {"messages": [("user", question)]}
    configurable = {"configurable": {"user_id": "1", "thread_id": uuid.uuid4().hex, "model": model}}

    async for msg in definitions_agent_eval.astream(inputs, configurable, stream_mode="values"):
        chat = msg

    return chat

  from .autonotebook import tqdm as notebook_tqdm


2025-05-06 21:24:55,542 - INFO - Logging configured
2025-05-06 21:24:56,146 - INFO - PyTorch version 2.6.0 available.
2025-05-06 21:24:56,150 - INFO - Polars version 1.21.0 available.


Fetching 30 files: 100%|██████████| 30/30 [00:00<00:00, 23041.41it/s]


2025-05-06 21:25:04,030 - INFO - loading existing colbert_linear and sparse_linear---------


Fetching 30 files: 100%|██████████| 30/30 [00:00<00:00, 33707.24it/s]


2025-05-06 21:25:09,335 - INFO - loading existing colbert_linear and sparse_linear---------
2025-05-06 21:25:10,226 - INFO - Logging configured


Fetching 30 files: 100%|██████████| 30/30 [00:00<00:00, 14592.27it/s]


2025-05-06 21:25:14,921 - INFO - loading existing colbert_linear and sparse_linear---------


Fetching 30 files: 100%|██████████| 30/30 [00:00<00:00, 48136.62it/s]


2025-05-06 21:25:19,641 - INFO - loading existing colbert_linear and sparse_linear---------
2025-05-06 21:25:19,656 - INFO - Logging configured


In [10]:
model = 'together-llama-3.3-70B-Instruct-Turbo'
dataset_name = 'quantitative_lexdrafter'
task = 'full'

Path('logs').mkdir(exist_ok=True)

logging.basicConfig(
    filename=f'logs/evaluation_log_{model}_{dataset_name}_{task}.txt',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    force=True
)


last = 0

output_file = f'results/{model}_{dataset_name}_{task}.json'

if Path(output_file).exists():
    with open(output_file, 'r') as f:
        results = ast.literal_eval(f.read())
        last = results[-1]['#']
    logging.info(f"Loaded {len(results)} existing results")
else:
    results = []
    logging.info("Starting new results file")

dataset = lexdrafter_generation + lexdrafter_retrieval

for i, item in enumerate(dataset[last:], last+1):
    result = {"entry": item, "#": i}
    term = item['Term']
    question = "What's the definition of \"{term}\" in the European Legislation?".format(term=term)
    try:
        logging.info(f"Processing term {i}/{len(dataset)}: {term}")
        res = await get_definition(question, model)
        response_json = json.loads(res['messages'][-1].content)
        result['response'] = response_json
        logging.info(f"Received response for term {i}: {response_json}")
        results.append(result)

        with open(output_file, 'w') as f:
            json.dump(results, f, indent=2, ensure_ascii=False)

        print(f'{i}/{len(dataset)}', term)
        time.sleep(2)
    
    except Exception as e:
        logging.error(f"Error processing term {i} ({term}): {str(e)}")
        item['response'] = {"error": str(e)}
        print(f'{i}/{len(dataset)} Error', term)
        results.append(item)
    
    logging.info(f"Processing complete. Processed {len(results)} terms.")


logging.info(f"Processing complete. Processed {len(results)} terms.")

You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


253/1330 fault
254/1330 transmission system element
255/1330 dynamic stability
256/1330 single day-ahead coupling
257/1330 single intraday coupling
258/1330 price coupling algorithm
259/1330 dynamic stability assessment
260/1330 frequency stability
261/1330 voltage stability
262/1330 system state
263/1330 emergency state
264/1330 restoration state
265/1330 exceptional contingency
266/1330 frequency deviation
267/1330 system frequency
268/1330 frequency restoration process
269/1330 FRP
270/1330 frequency restoration control error
271/1330 FRCE
272/1330 schedule
273/1330 K-factor of an LFC area or LFC block
274/1330 local state
275/1330 maximum steady-state frequency deviation
276/1330 observability area
277/1330 neighbouring TSOs
278/1330 operational security analysis
279/1330 operational security indicators
280/1330 operational security ranking
281/1330 operational tests
282/1330 ordinary contingency
283/1330 out-of-range contingency
284/1330 system defence plan
285/1330 ramping rate
2

CancelledError: 

### Italian

In [1]:
import polars as pl

In [2]:
italian_df = pl.read_csv('../../data/definitions_corpus/definitions.csv').filter(pl.col('dataset') != 'EurLex')

In [3]:
italian_df = italian_df.with_columns(pl.col('label').str.replace('#', '').str.replace(r'([a-zà-ÿ])([A-Z])', r'${1} ${2}', n=-1).str.to_lowercase())
italian_dict = italian_df.select(["label", "definition_text"]).rename({"label": "term", "definition_text": "term_definition"}).to_dicts()
italian_dict, len(italian_dict)

([{'term': 'accordi internazionali',
   'term_definition': "accordi internazionali: gli accordi vigenti in materia di cooperazione o di reciproca assistenza tecnico - militare nel settore della difesa, conclusi dall'Italia con uno o piu' Stati esteri;"},
  {'term': 'intese internazionali',
   'term_definition': "intese internazionali: protocolli, memorandum, intese, o altri documenti comunque denominati, posti in essere dal Ministero della difesa con uno o piu' Stati esteri, discendenti dagli accordi di cui alla lettera a);"},
  {'term': 'attivita di supporto tecnico-amministrativo',
   'term_definition': "attivita' di supporto tecnico-amministrativo: qualsiasi attivita' di assistenza tecnica, ingegneristica, logistica, manutentiva, addestrativa, formativa, amministrativa, legale, nonche' di coordinamento della contrattualistica e degli aspetti connessi alla gestione finanziaria, anche nella fase di pianificazione e definizione dell'esigenza e del relativo impatto sui costi, discendent

In [4]:
from LegalDefAgent.src.agents import definitions_agent_eval
import uuid
from pathlib import Path
import json
import ast
import time
import random
import logging

async def get_definition(question, model):
    inputs = {"messages": [("user", question)]}
    configurable = {"configurable": {"user_id": "1", "thread_id": uuid.uuid4().hex, "model": model}}

    async for msg in definitions_agent_eval.astream(inputs, configurable, stream_mode="values"):
        chat = msg

    return chat

  from .autonotebook import tqdm as notebook_tqdm


2025-05-06 22:46:26,385 - INFO - Logging configured
2025-05-06 22:46:26,979 - INFO - PyTorch version 2.6.0 available.
2025-05-06 22:46:26,983 - INFO - Polars version 1.21.0 available.


Fetching 30 files: 100%|██████████| 30/30 [00:00<00:00, 103477.89it/s]


2025-05-06 22:46:33,995 - INFO - loading existing colbert_linear and sparse_linear---------


Fetching 30 files: 100%|██████████| 30/30 [00:00<00:00, 9089.07it/s]


2025-05-06 22:46:38,987 - INFO - loading existing colbert_linear and sparse_linear---------
2025-05-06 22:46:39,847 - INFO - Logging configured


Fetching 30 files: 100%|██████████| 30/30 [00:00<00:00, 18674.55it/s]


2025-05-06 22:46:44,243 - INFO - loading existing colbert_linear and sparse_linear---------


Fetching 30 files: 100%|██████████| 30/30 [00:00<00:00, 37946.06it/s]


2025-05-06 22:46:47,076 - INFO - loading existing colbert_linear and sparse_linear---------
2025-05-06 22:46:47,162 - INFO - Logging configured


In [5]:
random.shuffle(italian_dict)

In [None]:
model = 'together-llama-3.3-70B-Instruct-Turbo'
dataset_name = 'quantitative_italian'
task = 'full'

Path('logs').mkdir(exist_ok=True)

logging.basicConfig(
    filename=f'logs/evaluation_log_{model}_{dataset_name}_{task}.txt',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    force=True
)

last = 0

output_file = f'results/{model}_{dataset_name}_{task}.json'

if Path(output_file).exists():
    with open(output_file, 'r') as f:
        results = ast.literal_eval(f.read())
        last = results[-1]['#']
    logging.info(f"Loaded {len(results)} existing results")
else:
    results = []
    logging.info("Starting new results file")

dataset = italian_dict

for i, item in enumerate(dataset[last:], last+1):
    result = {"entry": item, "#": i}
    term = item['term']
    question = "Quale è la definizione di \"{term}\" nella legislazione italiana?".format(term=term)
    try:
        logging.info(f"Processing term {i}/{len(dataset)}: {term}")
        res = await get_definition(question, model)
        response_json = json.loads(res['messages'][-1].content)
        result['response'] = response_json
        logging.info(f"Received response for term {i}: {response_json}")
        results.append(result)

        with open(output_file, 'w') as f:
            json.dump(results, f, indent=2, ensure_ascii=False)

        print(f'{i}/{len(dataset)}', term)
        time.sleep(2)
    
    except Exception as e:
        logging.error(f"Error processing term {i} ({term}): {str(e)}")
        item['response'] = {"error": str(e)}
        print(f'{i}/{len(dataset)} Error', term)
        results.append(item)
    
    logging.info(f"Processing complete. Processed {len(results)} terms.")


logging.info(f"Processing complete. Processed {len(results)} terms.")

You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


1/4985 biglietti elettronici
2/4985 personale del dipartimento della protezione civile
3/4985 punto di interscambio internet(ixp)


## Qualitative

In [5]:
import polars as pl

In [6]:
evs = pl.read_csv('./Definition-Definizione.csv')
evs

Definition,Definizione
str,str
"""Non-personal Data""","""Dato non personale"""
"""Connected device""","""Dispositivo connesso"""
"""IoT device""","""Dispositivo IoT"""
"""Open Data""","""Dato aperto"""
"""Satellite Data""","""Dato satelittare"""
…,…
"""Traffic Data""","""Dati relativi al Traffico """
"""GPS Data""","""Dati GPS"""
"""International Cooperation Poli…","""Politica di Cooperazione Inter…"
"""National Monitoring Body""","""Ente nazionale di controllo"""


In [26]:
from rapidfuzz import process, fuzz

en_list = []
it_list = []

for entry in evs.iter_rows():
    x = process.extract(entry[0].lower(), definendums_list, scorer=fuzz.token_sort_ratio, limit=1, score_cutoff=95)
    y = process.extract(entry[1].lower(), definendums_list, scorer=fuzz.token_sort_ratio, limit=1, score_cutoff=95)
    if x:
        print('found in eu: ', entry[0])
        pass
    elif y:
        print('found in it: ', entry[1])
        pass
    else:
        en_list.append(entry[0])
        it_list.append(entry[1])


print(len(en_list), len(it_list))

found in eu:  National Competition Authority
found in eu:  Organic Product
found in it:  Analisi del rischio
found in eu:  Electric Vehicle
found in eu:  Hybrid Vehicle
found in eu:  Traffic Data
73 73


### English

In [8]:
en_list

['Non-personal Data',
 'Connected device',
 'IoT device',
 'Open Data',
 'Satellite Data',
 'Cookie',
 'Synthetic Data',
 'International Data Transfer',
 'Data Sharing activity',
 'Data Buyer',
 'Dangerous Product',
 'Dangerous Product Importer',
 'Intermediary Platform',
 'Internet Platform',
 'Content Creator',
 'Digital Internal Market',
 'Data Monetization',
 'Smart Contract',
 'Gender-based Crime',
 'Victim-blaming',
 'Hate Speech',
 'Stalking',
 'Ransomware',
 'International Terrorism',
 'Cooperation Duties',
 'Law Enforcement Policy',
 'Fair compensation',
 'Law Enforcement Officer ',
 'Cryptocurrency',
 'Decentralised Finance',
 'Autonomous Trading Agent',
 'Automated trading',
 'Multi-year Agreement',
 'Restrictive Practice',
 'Confidential Information',
 'Non-animal origin ',
 'Imported Good',
 'Air Pollution',
 'Non-organic Origin',
 'Animal Welfare',
 'National Health Authority ',
 'Plant-based Product',
 'Risk Assessment',
 'Risk Evaluation',
 'Autonomous Vehicle',
 'Air q

In [None]:
from LegalDefAgent.src.agents import definitions_agent_eval
import uuid
from pathlib import Path
import json
import ast
import time
import logging

async def get_definition(question, model):
    inputs = {"messages": [("user", question)]}
    configurable = {"configurable": {"user_id": "1", "thread_id": uuid.uuid4().hex, "model": model}}

    async for msg in definitions_agent_eval.astream(inputs, configurable, stream_mode="values"):
        chat = msg

    return chat

  from .autonotebook import tqdm as notebook_tqdm


2025-05-06 19:40:30,338 - INFO - Logging configured
2025-05-06 19:40:30,982 - INFO - PyTorch version 2.6.0 available.
2025-05-06 19:40:30,986 - INFO - Polars version 1.21.0 available.


In [11]:
model = 'together-llama-3.3-70B-Instruct-Turbo'
dataset_name = 'qualitative_en'
task = 'generation'

Path('logs').mkdir(exist_ok=True)

logging.basicConfig(
    filename=f'logs/evaluation_log_{model}_{dataset_name}_{task}.txt',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    force=True
)


last = 0

output_file = f'results/{model}_{dataset_name}_{task}.json'

if Path(output_file).exists():
    with open(output_file, 'r') as f:
        results = ast.literal_eval(f.read())
        last = results[-1]['#']
    logging.info(f"Loaded {len(results)} existing results")
else:
    results = []
    logging.info("Starting new results file")

dataset = en_list

for i, term in enumerate(dataset[last:], last+1):
    result = {"term": term, "#": i}
    question = "What's the definition of \"{term}\" in the European Legislation?".format(term=term)
    try:
        logging.info(f"Processing term {i}/{len(dataset)}: {term}")
        res = await get_definition(question, model)
        response_json = json.loads(res['messages'][-1].content)
        result['response'] = response_json
        logging.info(f"Received response for term {i}: {response_json}")
        results.append(result)

        with open(output_file, 'w') as f:
            json.dump(results, f, indent=2, ensure_ascii=False)

        print(f'{i}/{len(dataset)}', term)
        time.sleep(2)
    
    except Exception as e:
        logging.error(f"Error processing term {i} ({term}): {str(e)}")
        term['response'] = {"error": str(e)}
        print(f'{i}/{len(dataset)} Error', term)
        results.append(term)
    
    logging.info(f"Processing complete. Processed {len(results)} terms.")


logging.info(f"Processing complete. Processed {len(results)} terms.")

56/74 Place of Production
57/74 International Distance Selling 
58/74 Lead Consumer Authority 
59/74 Financial Stability 
60/74 Credit Risk Assessment
61/74 Credit Risk Evaluation
62/74 International Investment
63/74 Dominant Position
64/74 Automated Creditworthiness Decision
65/74 National Bond Authority 
66/74 Market Disruption Crisis
67/74 Videosurveillance
68/74 Trojan
69/74 EU Criminal Record 
70/74 Human Trafficking 
71/74 GPS Data
72/74 International Cooperation Policy
73/74 National Monitoring Body
74/74 Law Enforcement Data Sharing


### Italian

In [25]:
it_list

['Dato non personale',
 'Dispositivo connesso',
 'Dispositivo IoT',
 'Dato aperto',
 'Dato satelittare',
 'Cookie',
 'Dato Sintetico',
 'Trasferimento internazionale di dati',
 'Attività di Condivisione di Dati',
 'Acquirente di dati',
 'Prodotto pericoloso',
 'Importatore di un prodotto pericolo',
 'Piattaforma di Intermediazione',
 'Piattaforma su Internet',
 'Creatore di Contenuti',
 'Mercato Digitale Interno',
 'Monetizzazione di Dati',
 'Contratto Intelligente',
 'Crimine di Genere',
 'Colpevolizzazione della Vittima',
 "Discorso d'odio",
 'Stalking',
 'Ransomware',
 'Terrorismo internazionale',
 'Obblighi di Cooperazione',
 'Politica delle Attività di Contrasto',
 'Equa Compensazione',
 "Ufficiale delle Forze dell'Ordine",
 'Criptovaluta',
 'Finanza Decentralizzata',
 'Agente autonomo di trading ',
 'Trading automatizzato',
 'Contratto Pluriennale',
 'Pratica Restrittiva',
 'Informazione Confidenziale',
 'Origine Non Animale',
 'Bene Importato',
 "Inquinamento dell'Aria",
 'Origi

In [13]:
model = 'together-llama-3.3-70B-Instruct-Turbo'
dataset_name = 'qualitative_it'
task = 'generation'

Path('logs').mkdir(exist_ok=True)

logging.basicConfig(
    filename=f'logs/evaluation_log_{model}_{dataset_name}_{task}.txt',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    force=True
)


last = 0

output_file = f'results/{model}_{dataset_name}_{task}.json'

if Path(output_file).exists():
    with open(output_file, 'r') as f:
        results = ast.literal_eval(f.read())
        last = results[-1]['#']
    logging.info(f"Loaded {len(results)} existing results")
else:
    results = []
    logging.info("Starting new results file")

dataset = it_list

for i, term in enumerate(dataset[last:], last+1):
    result = {"term": term, "#": i}
    question = "Quale è la definizione di \"{term}\" nella legislazione italiana?".format(term=term)
    try:
        logging.info(f"Processing term {i}/{len(dataset)}: {term}")
        res = await get_definition(question, model)
        response_json = json.loads(res['messages'][-1].content)
        result['response'] = response_json
        logging.info(f"Received response for term {i}: {response_json}")
        results.append(result)

        with open(output_file, 'w') as f:
            json.dump(results, f, indent=2, ensure_ascii=False)

        print(f'{i}/{len(dataset)}', term)
        time.sleep(2)
    
    except Exception as e:
        logging.error(f"Error processing term {i} ({term}): {str(e)}")
        term['response'] = {"error": str(e)}
        print(f'{i}/{len(dataset)} Error', term)
        results.append(term)
    
    logging.info(f"Processing complete. Processed {len(results)} terms.")


logging.info(f"Processing complete. Processed {len(results)} terms.")

2025-05-06 12:34:15,296 - INFO - Logging configured
65/74 Autorità Nazionale per i Titoli di Stato
66/74 Crisi da Instabilità del Mercato
67/74 Videosorveglianza
68/74 Trojan
69/74 Casellario Giudiziale Europeo
70/74 Traffico di Esseri Umani
71/74 Dati GPS
72/74 Politica di Cooperazione Internazionale
73/74 Ente nazionale di controllo
74/74 Condivisione di dati per le attività di contrasto


In [53]:
# to excel

import json 


# v2
def parse_definitions_to_df(data) -> pl.DataFrame:
    """
    Parse JSON data containing legal definitions into a Polars DataFrame.
    
    Args:
        data: List of dictionaries containing legal definitions
        
    Returns:
        Polars DataFrame with columns: celex_id, term, number, response_type, definition, sources
    """
    parsed_data = []
    
    for item in data:
        print(item)
        try:
            row = {
                'term': item.get('term'),
                #'number': item.get('#'),
                'generated definition': item['response']['generated_definition'].get('generated_definition') if 'generated_definition' in item['response'] else None,
                'sources': item['response']['generated_definition'].get('sources') if 'generated_definition' in item['response'] else None,
            }
        except Exception as e:
            row = {
                'term': item.get('term'),
                #'number': item.get('#'),
                'generated definition': item['response']['generated_definition'],
                'sources': item['response']['sources']
            }

        
        parsed_data.append(row)
    
    df = pl.DataFrame(parsed_data)
    
    df = df.with_columns([
        #pl.col('number').cast(pl.Int64),
        pl.col('term').cast(pl.Utf8),
        pl.col('generated definition').cast(pl.Utf8),
        pl.col('sources')
    ])
    
    return df


with open('./results/together-llama-3.3-70B-Instruct-Turbo_qualitative_it_generation.json') as file:
    results = json.load(file)

df = parse_definitions_to_df(results).to_pandas().to_excel(f'together-llama-3.3-70B-Instruct-Turbo_qualitative_it_generation.xlsx', index=True)
df

{'term': 'Dato non personale', '#': 1, 'response': {'generated_definition': {'generated_definition': "Dato non personale: qualsiasi informazione che non riguarda una persona fisica identificata o identificabile, ai sensi dell'articolo 4 del Regolamento (UE) 2016/679 del Parlamento europeo e del Consiglio del 27 aprile 2016.", 'sources': ['dato personale: qualunque informazione relativa a persona fisica, persona giuridica, ente od associazione, identificati o identificabili, anche indirettamente, mediante riferimento a qualsiasi altra informazione, ivi compreso un numero di identificazione personale;', 'dati personali: qualsiasi informazione riguardante una persona fisica identificata o identificabile («interessato»);', "dati personali: qualsiasi informazione riguardante una persona fisica identificata o identificabile in relazione a nome, numero di identificazione, dati relativi all'ubicazione, identificativo online, uno o piu' elementi caratteristici della sua identita' fisica, fisiol

In [41]:
parsed_data

NameError: name 'parsed_data' is not defined

In [37]:
print(df)

None
