# LLMSemanticAnnotator: Advanced Semantic Annotation for Plant Biology Research

Installation de la lib

In [None]:
#!pip uninstall llm_semantic_annotator
!pip install git+https://github.com/p2m2/encoder-ontology-match-abstract


Creation du fichier de configuration pour l'alignement des ontologies

In [None]:
import json

data = {
    "encodeur" : "sentence-transformers/all-MiniLM-L6-v2",
    "threshold_similarity_tag_chunk" : 0.70,
    "threshold_similarity_tag" : 0.80,
    "batch_size" : 32,

    "populate_owl_tag_embeddings" : {
        "prefix" : {
            "rdf" : "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
            "rdfs" : "http://www.w3.org/2000/01/rdf-schema#",
            "owl" : "http://www.w3.org/2002/07/owl#"
        },
        "ontologies": {
            "planteome_link" : {
                "peco": {
                    "url": "http://purl.obolibrary.org/obo/peco.owl",
                    "prefix": "http://purl.obolibrary.org/obo/PECO_",
                    "format": "xml",    
                    "label" : "rdfs:label",
                    "properties": ["obo:IAO_0000115","rdfs:comment","owl:annotatedTarget"]
                },
                "po": {
                    "url": "http://purl.obolibrary.org/obo/po.owl",
                    "prefix": "http://purl.obolibrary.org/obo/PO_",
                    "format": "xml",
                    "label" : "rdfs:label",
                    "properties": ["obo:IAO_0000115","rdfs:comment","owl:annotatedTarget"]
                },
                "pso": {
                    "url": "http://purl.obolibrary.org/obo/pso.owl",
                    "prefix": "http://purl.obolibrary.org/obo/PSO_",
                    "format": "xml",
                    "label" : "rdfs:label",
                    "properties": ["obo:IAO_0000115","rdfs:comment","owl:annotatedTarget"]
                },
                "to": {
                    "url": "http://purl.obolibrary.org/obo/to.owl",
                    "prefix": "http://purl.obolibrary.org/obo/TO_",
                    "format": "xml",
                    "label" : "rdfs:label",
                    "properties": ["obo:IAO_0000115","rdfs:comment","owl:annotatedTarget"]
                }
            }
        }
    },
    "populate_abstract_embeddings" : {
        "abstracts_per_file" : 500,
        "from_ncbi_api" : {
            "ncbi_api_chunk_size" : 200,
            "debug_nb_ncbi_request" : -1,
            "retmax" : 2000,
            "selected_term" : [
                "Crops%2C+Agricultural%2Fmetabolism%5BMeSH%5D"
            ]
        }
    }
}


with open('config.json', 'w') as fichier:
    json.dump(data, fichier, indent=4)


## Populate OWL tag embeddings

In [None]:
!llm-semantic-annotator config.json 2

## Populate abstract embeddings

In [None]:
!llm-semantic-annotator config.json 3

## Compute similarities between tags and abstract chunks
------

In [None]:
!llm-semantic-annotator config.json 4

## Display similarities information

In [None]:
!llm-semantic-annotator config.json 5

## Build turtle knowledge graph

In [None]:
!llm-semantic-annotator config.json 6

## Build dataset abstracts annotations CSV file

In [None]:
!llm-semantic-annotator config.json 7

In [None]:
import os
import tarfile
from google.colab import files

def compress_and_download(directory_name):
    # Nom de l'archive
    archive_name = f"{directory_name}.tar.gz"

    # Compression du répertoire
    with tarfile.open(archive_name, "w:gz") as tar:
        tar.add(directory_name, arcname=os.path.basename(directory_name))

    print(f"Compression terminée : {archive_name}")

    # Vérification de l'existence du fichier compressé
    if os.path.exists(archive_name):
        # Téléchargement du fichier
        files.download(archive_name)
        print(f"Le fichier {archive_name} a été téléchargé.")
    else:
        print(f"Erreur : Le fichier {archive_name} n'a pas été créé.")

# Utilisation de la fonction
directory_to_compress = "config_workdir"
compress_and_download(directory_to_compress)
