# Install dependencies

In [6]:
import sys
def installModule(projectName:str, moduleName:str=None):
    '''Installs and loads the given module if not already installed'''
    if moduleName is None:
        moduleName=projectName
    if moduleName not in sys.modules:
        !python -m pip install --no-input $projectName
        print(f'{projectName} installed')
    else:
        print(f'{projectName} found')
    %reload_ext $moduleName

installModule('jupyter-xml')
installModule('jupyter-rdfify')
installModule('SPARQLWrapper')
installModule('tabulate')
installModule("pylodstorage", "lodstorage")

jupyter-xml found


jupyter-rdfify found


SPARQLWrapper found
tabulate found
pylodstorage found


# RDFify tool documentation

In [3]:
%rdf turtle -h

usage: %rdf turtle [-h] [--serialize SERIALIZE]
                   [--display {graph,table,raw,none}] [--label LABEL]
                   [--prefix] [--entail {none,rdfs,owl,rdfs+owl}]

optional arguments:
  -h, --help            show this help message and exit
  --serialize SERIALIZE, -s SERIALIZE
                        Format for serializing when display is set to raw.
  --display {graph,table,raw,none}, -d {graph,table,raw,none}
                        How output is displayed
  --label LABEL, -l LABEL
                        Store graph locally with this label
  --prefix, -p          Define a prefix which gets prepend to every query.
                        Useful for PREFIX declarations
  --entail {none,rdfs,owl,rdfs+owl}, -e {none,rdfs,owl,rdfs+owl}
                        Uses a brute force implementation of the finite
                        version of RDFS semantics or OWL 2 RL. Uses owlrl
                        python package.


In [None]:
%rdf sparql -h

In [None]:
%rdf --help

# RDFify

<div class="alert alert-block alert-info">
<b>Note:</b> For remote SPARQL endpoints the different display formats such as table or graph are no working. Only the display format raw functions as expected.
</div>

In [17]:
%%rdf sparql --endpoint https://query.wikidata.org/sparql -d raw -f xml -s substances
SELECT ?substance ?substanceLabel ?formula ?structure ?CAS
WHERE { 
  ?substance wdt:P31 wd:Q11173.
  ?substance wdt:P231 ?CAS.
  ?substance wdt:P274 ?formula.
  ?substance wdt:P117  ?structure.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
LIMIT 15


SPARQL endpoint set to 'https://query.wikidata.org/sparql'.
SPARQL format set to 'xml'.
SPARQL display format set to 'table'.
Error during query:
'str' object has no attribute 'print'


In [15]:
store = %rdf -r
g = store["rdfresults"]


Error: unrecognized arguments: -r


<div class="alert alert-block alert-info">
<b>Note:</b> To get the query result the magic word `%rdf -r` must be used. The command is currently not working neither in the local install of RDFify or in the DBIS profile of the RWTH Jupyter Hub
</div>

# SPARQLWrapper

In [19]:
from SPARQLWrapper import SPARQLWrapper, JSON, CSV
from tabulate import tabulate
from IPython.display import Image, SVG

q = """
SELECT ?substance ?substanceLabel ?formula ?structure ?CAS
WHERE { 
  ?substance wdt:P31 wd:Q11173.
  ?substance wdt:P231 ?CAS.
  ?substance wdt:P274 ?formula.
  ?substance wdt:P117  ?structure.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
LIMIT 15

"""

sparql = SPARQLWrapper("http://query.wikidata.org/sparql")
sparql.setQuery(q)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
table = [[result[column]["value"] for column in result] for result in results["results"]["bindings"]]
print(tabulate(table))
# uncomment to display images
#for imageName in [row[1] for row in table]:
#        display(Image(url=imageName, width = 200, height = 200))



--------------------------------------  ------------------------------------------------------------------------------------------------------  -----------  ----------------  ------------------------------
http://www.wikidata.org/entity/Q402607  http://commons.wikimedia.org/wiki/Special:FilePath/Tyrosol.png                                          501-94-0     C₈H₁₀O₂           tyrosol
http://www.wikidata.org/entity/Q408646  http://commons.wikimedia.org/wiki/Special:FilePath/Triclosan.svg                                        3380-34-5    C₁₂H₇Cl₃O₂        triclosan
http://www.wikidata.org/entity/Q408762  http://commons.wikimedia.org/wiki/Special:FilePath/Tetramethylphenylendiamine.svg                       100-22-1     C₁₀H₁₆N₂          tetramethyl-p-phenylenediamine
http://www.wikidata.org/entity/Q409028  http://commons.wikimedia.org/wiki/Special:FilePath/Sulfur%20dichloride.svg                              10545-99-0   Cl₂S              sulfur dichloride
http://www.wikidata.org/ent

# pyLODStorage

In [8]:
from IPython.display import display, Markdown
from lodstorage.query import QueryManager, Query
from lodstorage.sparql import SPARQL
import copy
show=True
queryRecord={
    "endpoint":"https://query.wikidata.org/sparql",
    "prefixes": ["http://www.wikidata.org/entity/","http://commons.wikimedia.org/wiki/Special:FilePath/"],
    "lang": "sparql",
    "name": "CAS15",
    "title": "15 Random substances with CAS number",
    "description": "Wikidata SPARQL query showing the 15 random chemical substances with their CAS Number",
    "query": """# List of 15 random chemical components with CAS-Number, formula and structure
# see also https://github.com/WolfgangFahl/pyLoDStorage/issues/46
# WF 2021-08-23
SELECT ?substance ?substanceLabel ?formula ?structure ?CAS
WHERE { 
?substance wdt:P31 wd:Q11173.
?substance wdt:P231 ?CAS.
?substance wdt:P274 ?formula.
?substance wdt:P117  ?structure.
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
LIMIT 15
"""
    }
endpointUrl=queryRecord.pop("endpoint")
prefixes=queryRecord.pop("prefixes")
endpoint=SPARQL(endpointUrl)
query=Query(**queryRecord)
try:
    qlod=endpoint.queryAsListOfDicts(query.query)
    lod=copy.deepcopy(qlod)
    tablefmt="github"
    for prefix in prefixes:
        query.prefixToLink(lod,prefix,tablefmt)
    tryItUrl=query.getTryItUrl(endpointUrl.replace("/sparql",""))
    doc=query.documentQueryResult(lod, tablefmt=tablefmt,floatfmt=".0f",tryItUrl=tryItUrl)
    display(Markdown(str(doc)))
except Exception as ex:
            print(f"{query.title} at {endpointUrl} failed: {ex}")

## 15 Random substances with CAS number
Wikidata SPARQL query showing the 15 random chemical substances with their CAS Number
### query
```sql
# List of 15 random chemical components with CAS-Number, formula and structure
# see also https://github.com/WolfgangFahl/pyLoDStorage/issues/46
# WF 2021-08-23
SELECT ?substance ?substanceLabel ?formula ?structure ?CAS
WHERE { 
?substance wdt:P31 wd:Q11173.
?substance wdt:P231 ?CAS.
?substance wdt:P274 ?formula.
?substance wdt:P117  ?structure.
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
LIMIT 15

```
[try it!](https://query.wikidata.org/#%23%20List%20of%2015%20random%20chemical%20components%20with%20CAS-Number%2C%20formula%20and%20structure%0A%23%20see%20also%20https%3A//github.com/WolfgangFahl/pyLoDStorage/issues/46%0A%23%20WF%202021-08-23%0ASELECT%20%3Fsubstance%20%3FsubstanceLabel%20%3Fformula%20%3Fstructure%20%3FCAS%0AWHERE%20%7B%20%0A%3Fsubstance%20wdt%3AP31%20wd%3AQ11173.%0A%3Fsubstance%20wdt%3AP231%20%3FCAS.%0A%3Fsubstance%20wdt%3AP274%20%3Fformula.%0A%3Fsubstance%20wdt%3AP117%20%20%3Fstructure.%0ASERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22.%20%7D%0A%7D%0ALIMIT%2015%0A)
## result
| substance                                         | substanceLabel    | formula        | structure                                                                                                                                             | CAS         |
|---------------------------------------------------|-------------------|----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------|-------------|
| [Q407258](http://www.wikidata.org/entity/Q407258) | calcium sulfate   | CaO₄S          | [Calcium sulfate.svg](http://commons.wikimedia.org/wiki/Special:FilePath/Calcium%20sulfate.svg)                                                       | 7778-18-9   |
| [Q407258](http://www.wikidata.org/entity/Q407258) | calcium sulfate   | CaSO₄          | [Calcium sulfate.svg](http://commons.wikimedia.org/wiki/Special:FilePath/Calcium%20sulfate.svg)                                                       | 7778-18-9   |
| [Q408050](http://www.wikidata.org/entity/Q408050) | dimethyl ether    | C₂H₆O          | [Dimethyl ether Structural Formulae.svg](http://commons.wikimedia.org/wiki/Special:FilePath/Dimethyl%20ether%20Structural%20Formulae.svg)             | 115-10-6    |
| [Q408099](http://www.wikidata.org/entity/Q408099) | brivaracetam      | C₁₁H₂₀N₂O₂     | [Brivaracetam.svg](http://commons.wikimedia.org/wiki/Special:FilePath/Brivaracetam.svg)                                                               | 357336-20-0 |
| [Q408370](http://www.wikidata.org/entity/Q408370) | dihydralazine     | C₈H₁₀N₆        | [Dihydralazine structure.svg](http://commons.wikimedia.org/wiki/Special:FilePath/Dihydralazine%20structure.svg)                                       | 484-23-1    |
| [Q408400](http://www.wikidata.org/entity/Q408400) | florfenicol       | C₁₂H₁₄Cl₂FNO₄S | [Florfenicol.png](http://commons.wikimedia.org/wiki/Special:FilePath/Florfenicol.png)                                                                 | 73231-34-2  |
| [Q408529](http://www.wikidata.org/entity/Q408529) | amikacin          | C₂₂H₄₃N₅O₁₃    | [Amikacin.svg](http://commons.wikimedia.org/wiki/Special:FilePath/Amikacin.svg)                                                                       | 37517-28-5  |
| [Q408718](http://www.wikidata.org/entity/Q408718) | cyclopropanone    | C₃H₄O          | [Cyclopropanone.png](http://commons.wikimedia.org/wiki/Special:FilePath/Cyclopropanone.png)                                                           | 5009-27-8   |
| [Q408718](http://www.wikidata.org/entity/Q408718) | cyclopropanone    | C₃H₄O          | [Cyclopropanone Structural Formula V1.svg](http://commons.wikimedia.org/wiki/Special:FilePath/Cyclopropanone%20Structural%20Formula%20V1.svg)         | 5009-27-8   |
| [Q409057](http://www.wikidata.org/entity/Q409057) | aluminum fluoride | AlF₃           | [Aluminium-trifluoride-monomer-2D-dimensions.png](http://commons.wikimedia.org/wiki/Special:FilePath/Aluminium-trifluoride-monomer-2D-dimensions.png) | 7784-18-1   |
| [Q409133](http://www.wikidata.org/entity/Q409133) | chloroethane      | C₂H₅Cl         | [Chloroethane-2D.svg](http://commons.wikimedia.org/wiki/Special:FilePath/Chloroethane-2D.svg)                                                         | 75-00-3     |
| [Q409133](http://www.wikidata.org/entity/Q409133) | chloroethane      | C₂H₅Cl         | [Chloroethane-skeletal.svg](http://commons.wikimedia.org/wiki/Special:FilePath/Chloroethane-skeletal.svg)                                             | 75-00-3     |
| [Q409133](http://www.wikidata.org/entity/Q409133) | chloroethane      | C₂H₅Cl         | [Chloroethane FormulaV1.svg](http://commons.wikimedia.org/wiki/Special:FilePath/Chloroethane%20FormulaV1.svg)                                         | 75-00-3     |
| [Q409375](http://www.wikidata.org/entity/Q409375) | monochloramine    | ClH₂N          | [Chloramine-2D.png](http://commons.wikimedia.org/wiki/Special:FilePath/Chloramine-2D.png)                                                             | 10599-90-3  |
| [Q409618](http://www.wikidata.org/entity/Q409618) | dihydroxyacetone  | C₃H₆O₃         | [Dihydroxyacetone acsv.svg](http://commons.wikimedia.org/wiki/Special:FilePath/Dihydroxyacetone%20acsv.svg)                                           | 96-26-4     |