In [29]:
import rdflib
import csv
import pandas as pd

ruta_csv = "../bbdd/datos_mircancer" 
ncrna2phen = rdflib.Graph()
ncrna2phen.parse("ncrna2phengraphT.txt", format="turtle")

<Graph identifier=N15b876b5e1dd4f24a499f7a64e8ac5ac (<class 'rdflib.graph.Graph'>)>

In [30]:
ncrna2phen.bind("rdf", rdflib.URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#"))
ncrna2phen.bind("owl", rdflib.URIRef("http://www.w3.org/2002/07/owl#"))
ncrna2phen.bind("rdfs", rdflib.URIRef("http://www.w3.org/2000/01/rdf-schema#"))
ncrna2phen.bind("skos", rdflib.URIRef("http://www.w3.org/2004/02/skos/core#"))
ncrna2phen.bind("dcterms", rdflib.URIRef("http://purl.org/dc/terms/"))
ncrna2phen.bind("obo", rdflib.URIRef("http://purl.obolibrary.org/obo/"))
ncrna2phen.bind("sio", rdflib.URIRef("http://semanticscience.org/resource/"))
ncrna2phen.bind("faldo", rdflib.URIRef("http://biohackathon.org/resource/faldo#"))
ncrna2phen.bind("ncrna2phenbg", rdflib.URIRef("http://rdf.biogateway.eu/ncrna2gene/9606/"))
ncrna2phen.bind("genebg", rdflib.URIRef("http://rdf.biogateway.eu/gene/9606/"))
ncrna2phen.bind("ncrnabg", rdflib.URIRef("http://rdf.biogateway.eu/ncrna/9606/"))
ncrna2phen.bind("ncrnabg", rdflib.URIRef("http://rdf.biogateway.eu/ncrna2gene/9606/"))
ncrna2phen.bind("assembly", rdflib.URIRef("https://www.ncbi.nlm.nih.gov/assembly/"))
ncrna2phen.bind("schema", rdflib.URIRef("http://schema.org/"))
ncrna2phen.bind("bao", rdflib.URIRef("http://www.bioassayontology.org/bao#"))
ncrna2phen.bind("ncbi", rdflib.URIRef("https://www.ncbi.nlm.nih.gov/"))
ncrna2phen.bind("id", rdflib.URIRef("http://identifiers.org/"))
ncrna2phen.bind("biolink", rdflib.URIRef("https://wx3id.org/biolink/vocab/"))
ncrna2phen.bind("omim", rdflib.URIRef("http://purl.bioontology.org/ontology/OMIM/"))


database_ids = {
    "RefSeq": "http://purl.obolibrary.org/obo/NCIT_C45335",  
    "Ensembl": "http://purl.obolibrary.org/obo/NCIT_C45763",
    "miRBase": "http://purl.obolibrary.org/obo/MI_2358",
    "miRCancer": "http://mircancer.ecu.edu/",
    "LNCipedia": "https://lncipedia.org/download"
}

with open(ruta_csv, newline='', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:

        # extraer ID de gen y transcrito si viene en formato de URI
        gene_id = row['Gene_ID'].split('/')[-1]  
        transcript_id = row['ncRNA_ID'].split('/')[-1]  

        # indicar la base de datos de la que se obtiene la información
        data_source = "miRCancer"

        ncRNAbg = rdflib.URIRef(f"http://rdf.biogateway.eu/ncrna/9606/{transcript_id}#{data_source}")
        genebg = rdflib.URIRef(f"http://rdf.biogateway.eu/gene/9606/{gene_id}#{data_source}")
        ncrna2phenbg = rdflib.URIRef(f"http://rdf.biogateway.eu/ncrna2gene/bgw!{transcript_id}--omim!{gene_id}#{data_source}")

        if 'OMIM_ID' in row and len(row['OMIM_ID']) > 3:
            omim_id = row['OMIM_ID']
            disease_name = row['Disease']
            omim_uri = rdflib.URIRef(f"http://purl.bioontology.org/ontology/OMIM/{omim_id}")
            ncrna2phen.add((omim_uri, rdflib.RDF.type, rdflib.OWL.Class))
            ncrna2phen.add((omim_uri, rdflib.RDFS.subClassOf, rdflib.URIRef("http://purl.obolibrary.org/obo/DOID_4")))
            ncrna2phen.add((omim_uri, rdflib.namespace.SKOS.prefLabel, rdflib.Literal(f"Disease {disease_name}")))
            ncrna2phen.add((omim_uri, rdflib.URIRef("https://wx3id.org/biolink/vocab/category"), rdflib.URIRef("https://wx3id.org/biolink/vocab/Disease")))           
            ncrna2phen.add((ncrna2phenbg, rdflib.namespace.SKOS.definition, rdflib.Literal(f"Association between non-coding RNA {transcript_id} and disease omim:{omim_id} according to {data_source}")))
            ncrna2phen.add((ncrna2phenbg, rdflib.RDF.type, rdflib.OWL.Class))
            ncrna2phen.add((ncrna2phenbg, rdflib.RDFS.subClassOf, rdflib.URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement")))
            ncrna2phen.add((ncrna2phenbg, rdflib.RDF.predicate, rdflib.URIRef("http://purl.obolibrary.org/obo/RO_0002331")))
            ncrna2phen.add((ncrna2phenbg, rdflib.namespace.SKOS.prefLabel, rdflib.Literal(f"bgw!{transcript_id}--omim!{omim_id}")))
            ncrna2phen.add((ncrna2phenbg, rdflib.RDF.object, rdflib.URIRef(f"http://purl.bioontology.org/ontology/OMIM/{omim_id}")))
            ncrna2phen.add((ncrna2phenbg, rdflib.RDF.subject, rdflib.URIRef(f"http://rdf.biogateway.eu/ncrna/9606/{transcript_id}")))
            database_uri = database_ids.get(data_source, "http://purl.obolibrary.org/obo/NCIT_C15426")
            ncrna2phen.add((ncrna2phenbg, rdflib.URIRef("http://schema.org/evidenceOrigin"), rdflib.URIRef(database_uri)))
            ncrna2phen.add((ncrna2phenbg, rdflib.URIRef("http://semanticscience.org/resource/SIO_000253"), rdflib.URIRef(database_uri)))
            ncrna2phen.add((ncrna2phenbg, rdflib.URIRef("http://semanticscience.org/resource/SIO_000253"), rdflib.URIRef(database_uri)))

output_name = f'ncrna2phen_{data_source.lower()}.rdf'
ncrna2phen.serialize(destination=output_name, format='turtle')

<Graph identifier=N15b876b5e1dd4f24a499f7a64e8ac5ac (<class 'rdflib.graph.Graph'>)>