# Example bot to replicate the statement added to Wikidata

## Load the libraries

In [41]:
from wikidataintegrator import wdi_core, wdi_login
from wikidataintegrator.ref_handlers import update_retrieved_if_new_multiple_refs
import pandas as pd
import numpy as np
import requests

## Source:  https://search.clinicalgenome.org/kb/gene-validity.csv

In [7]:
df = pd.read_csv('https://search.clinicalgenome.org/kb/gene-validity.csv', skiprows=6, header=None)     
df

Unnamed: 0,0,1,2,3,4,5,6,7
0,A2ML1,HGNC:23336,Noonan syndrome with multiple lentigines,MONDO_0007893,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-vali...,2018-06-07T14:37:47.175Z
1,A2ML1,HGNC:23336,cardiofaciocutaneous syndrome,MONDO_0015280,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-vali...,2018-06-07T14:31:03.696Z
2,A2ML1,HGNC:23336,Costello syndrome,MONDO_0009026,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-vali...,2018-06-07T14:34:05.324Z
3,A2ML1,HGNC:23336,Noonan syndrome,MONDO_0018997,SOP5,Disputed,https://search.clinicalgenome.org/kb/gene-vali...,2018-06-07T14:23:53.157Z
4,A2ML1,HGNC:23336,Noonan syndrome-like disorder with loose anage...,MONDO_0011899,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-vali...,2018-06-07T14:40:11.599Z
5,AARS,HGNC:20,undetermined early-onset epileptic encephalopathy,MONDO_0018614,SOP6,Limited,https://search.clinicalgenome.org/kb/gene-vali...,2018-11-20T17:00:00.000Z
6,ABCC9,HGNC:60,hypertrichotic osteochondrodysplasia Cantu type,MONDO_0009406,SOP4,Definitive,https://search.clinicalgenome.org/kb/gene-vali...,2017-09-27T00:00:00
7,ABCD1,HGNC:61,X-linked cerebral adrenoleukodystrophy,MONDO_0010247,SOP4,Definitive,https://search.clinicalgenome.org/kb/gene-vali...,2018-02-07T14:00:00
8,ABHD12,HGNC:15868,PHARC syndrome,MONDO_0012984,SOP5,Definitive,https://search.clinicalgenome.org/kb/gene-vali...,2018-06-28T16:45:15.791Z
9,ACAD8,HGNC:87,isobutyryl-CoA dehydrogenase deficiency,MONDO_0012648,SOP6,Definitive,https://search.clinicalgenome.org/kb/gene-vali...,2019-04-26T16:00:00.000Z


## login section

In [35]:
print("Logging in...")
#if "WDUSER" in os.environ and "WDPASS" in os.environ:
#    WDUSER = os.environ['WDUSER']
#    WDPASS = os.environ['WDPASS']
#else:
#    raise ValueError("WDUSER and WDPASS must be specified in local.py or as environment variables")
login = wdi_login.WDLogin("<username>", "<password>")

Logging in...
https://www.wikidata.org/w/api.php
Successfully logged in as Andrawaag


# set variables

In [6]:
# Retrieve the QID for the 
HGNC = "A2ML1"

sparqlQuery = "SELECT * WHERE {?gene wdt:P353 \""+HGNC+"\"}"
result = wdi_core.WDItemEngine.execute_sparql_query(sparqlQuery)

HGNC_qid = result["results"]["bindings"][0]["gene"]["value"].replace("http://www.wikidata.org/entity/", "")
HGNC_qid

'Q18051234'

In [43]:
MONDO = "MONDO_0009026".replace("_", ":")
sparqlQuery = "SELECT * WHERE {?disease wdt:P5270 \""+MONDO+"\"}"
result = wdi_core.WDItemEngine.execute_sparql_query(sparqlQuery)

MONDO_qid = result["results"]["bindings"][0]["disease"]["value"].replace("http://www.wikidata.org/entity/", "")
MONDO_qid

'Q1136492'

In [44]:
from datetime import datetime
import copy
def create_reference():
    refStatedIn = wdi_core.WDItemID(value="Q64403342", prop_nr="P248", is_reference=True)
    timeStringNow = datetime.now().strftime("+%Y-%m-%dT00:00:00Z")
    refRetrieved = wdi_core.WDTime(timeStringNow, prop_nr="P813", is_reference=True)
    refURL = wdi_core.WDUrl(value="https://search.clinicalgenome.org/kb/gene-validity/d910a9d8-516e-443d-acba-8d61f7574792--2018-06-07T14:23:53", prop_nr="P854", is_reference=True)

    return [refStatedIn, refRetrieved, refURL]

In [38]:
reference = create_reference()
statement = [wdi_core.WDItemID(value=MONDO_qid, prop_nr="P2293", references=[copy.deepcopy(reference)])]

In [45]:
#wikidata_item = wdi_core.WDItemEngine(wd_item_id=HGNC_qid,data=statement, append_value=["P2293"])
wikidata_item = wdi_core.WDItemEngine(wd_item_id=HGNC_qid)
wikidata_item.get_wd_json_representation()

{'labels': {'en': {'language': 'en', 'value': 'A2ML1'},
  'fr': {'language': 'fr', 'value': 'A2ML1'},
  'nl': {'language': 'nl', 'value': 'A2ML1'},
  'srn': {'language': 'srn', 'value': 'A2ML1'},
  'de': {'language': 'de', 'value': 'A2ML1'},
  'it': {'language': 'it', 'value': 'A2ML1'},
  'pt': {'language': 'pt', 'value': 'A2ML1'},
  'es': {'language': 'es', 'value': 'A2ML1'},
  'sv': {'language': 'sv', 'value': 'A2ML1'},
  'uk': {'language': 'uk', 'value': 'A2ML1'},
  'ar': {'language': 'ar', 'value': 'A2ML1'}},
 'descriptions': {'en': {'language': 'en',
   'value': 'protein-coding gene in the species Homo sapiens'},
  'fr': {'language': 'fr', 'value': "gène de l'espèce Homo sapiens"},
  'nl': {'language': 'nl', 'value': 'gen van de soort Homo sapiens'},
  'it': {'language': 'it', 'value': 'gene umano'},
  'de': {'language': 'de', 'value': 'Gen der Spezies Homo sapiens'},
  'pt': {'language': 'pt', 'value': 'gene da espécie Homo sapiens'},
  'es': {'language': 'es', 'value': 'gen de l

In [36]:
# wikidata_item.write(login)

'Q18051234'

In [None]:
# Andra's script above