# Getting Started
This notebook uses some utility classes to examine the data in the workbench

In [15]:
#Imports
import spark_session_builder
from PWBdata import PWBdata
import ipywidgets as widgets

In [2]:
# Create a sparksession and sparkcontext
sparkSession, sc = spark_session_builder.session()
sc

In [3]:
#  PATH TO THE DATA
hdfs_path = "hdfs://demo-full-load-hdfs-namenode:8020" 

### The PWB class wraps the tables in RWB

In [4]:
pwb = PWBdata(hdfs_path)
[x for x in dir(pwb) if not x.startswith('__')]

['get_dataframe',
 'get_table_list',
 'hdfs_path',
 'pp_data_path',
 'pp_files',
 'resnet_data_path',
 'resnet_files',
 'rmc_data_path',
 'rmc_files']

## Pharmapendium data

![Pharmapendium schema](Pharmapendium.PNG)

In [20]:
pp_tbls = pwb.get_table_list('pp')
widgets.Dropdown(
    options=[x['name'] for x in pp_tbls],
    description='Tables:',
    disabled=False,
)

Dropdown(description='Tables:', options=('Activitytarget', 'Activitytargetextended', 'Activitytargetproperty',…

In [14]:
for i in pp_tbls:
    print (i['name'])

Activitytarget
Activitytargetextended
Activitytargetproperty
Concomitant
Document
Dosingregimen
Drug
Drugclass
Drugindication
Drugreference
Drugtarget
Effect
Endpoint
Faersadministrativeinformation
Faersdrugreport
Indication
Measure
Medatatype
Meenzymetransporter
Meyler
Pathogen
Pkparameter
Reference
Result
Source
Species
Study
Studydesign
Studydesigntype
Subject
Synonym
Target
Word


In [6]:
pp_effect_df = pwb.get_dataframe(sparkSession, 'pp', 'Effect')
pp_effect_df.limit(5).toPandas()

Unnamed: 0,hdfsIngestTime,offset,Effect,core_relatedMatch,core_closeMatch,core_changeNote,core_prefLabel,core_scopeNote,core_broadMatch,hasPriority,...,core_narrower,core_topConceptOf,skos_xl_hiddenLabel,skos_xl_altLabel,core_related,core_editorialNote,core_example,core_definition,core_note,provenanceJob
0,2020-07-21T09:14:06.753589,394,https://data.elsevier.com/lifescience/taxonomy...,,,,Conjunctival melanoma,,,,...,,,,,,,,,,b7524128-d53f-4569-b139-3062eb1f07a2
1,2020-07-21T09:13:49.254668,433,https://data.elsevier.com/lifescience/taxonomy...,,,,Picornavirus infection,,,,...,,,,,,,,,,b7524128-d53f-4569-b139-3062eb1f07a2
2,2020-07-21T09:14:23.737472,416,https://data.elsevier.com/lifescience/taxonomy...,,,,Afferent loop syndrome,,,,...,,,,,,,,,,b7524128-d53f-4569-b139-3062eb1f07a2
3,2020-07-21T09:13:55.971939,438,https://data.elsevier.com/lifescience/taxonomy...,,,,Mitochondrial neurogastrointestinal encephalop...,,,,...,,,,,,,,,,b7524128-d53f-4569-b139-3062eb1f07a2
4,2020-07-21T09:14:01.265585,396,https://data.elsevier.com/lifescience/taxonomy...,,,,Psychological factor affecting medical condition,,,,...,,,,,,,,,,b7524128-d53f-4569-b139-3062eb1f07a2


## RMC Data

![RMC Schema](RMC.PNG)

In [7]:
rmc_tbls = pwb.get_table_list('rmc')
for i in rmc_tbls:
    print (i['name'])

Activitysite
Administrationroute
Administrationtype
Affiliationorganisation
Agentconfiguration
Aminoacid
Analyticaltechnique
Assaycategory
Author
Bindingexperimenttype
Bioassay
Bioassaygraft
Biologicalactivity
Biologicalmaterial
Biologicalmaterialtype
Biologicalphenomenon
Cellculturetype
Cellcultureuse
Celltype
Cellularorganelle
Cellularphenomenon
Chemicalcompound
Chemicalcompoundeffect
Chemicalcompoundname
Chemicalcompoundstructure
Chemicalcompoundtype
Citationaffiliation
Citationauthor
Citationpatentbibliography
Citationsourcetype
Citationtype
Clamptype
Clinicalstate
Clinicalstatetype
Controldatapointtype
Countrycode
Datasource
Developmentstage
Document
Documentdetails
Documenttype
Druglikeness
Electrophysiologyconfiguration
Elementlocation
Elementnature
Experimentalmodel
Feeding
Feedingmethod
Feedingtype
Genotype
Genotypetype
Highestclinicalphase
Internationalchemicalidentifier
Isolatedorgantype
Journal
Journaltitle
Language
Location
Measuredobject
Measuredparameter
Measurelocation


In [8]:
rmc_target_df = pwb.get_dataframe(sparkSession, 'rmc', 'Target')
rmc_target_df.limit(5).toPandas()

Unnamed: 0,hdfsIngestTime,offset,Target,hasOtherInfo,hasCreationDate,hasStatus,hasLabel,hasUpdateDate,hasSynapticaID,provenanceJob
0,2020-07-13T13:06:44.936282,112,https://data.elsevier.com/lifescience/entity/r...,,2014-01-23 13:51:57,V,S-ribosylhomocysteine lyase (gene luxS),2018-04-10 14:13:14,,
1,2020-07-13T13:08:15.870647,1077,https://data.elsevier.com/lifescience/entity/r...,,2014-01-23 13:51:57,V,Na+/H+ exchanger,2017-01-11 12:26:03,,
2,2020-07-13T13:07:12.260025,1426,https://data.elsevier.com/lifescience/entity/r...,,2014-01-23 13:51:57,O,Deacetoxycephalosporin C Synthetase,,,
3,2020-07-13T14:52:01.431957,3623,https://data.elsevier.com/lifescience/entity/r...,,2017-02-16 11:47:37,N,Retinoblastoma Binding Protein 7,,,
4,2020-07-13T13:08:29.569775,37,https://data.elsevier.com/lifescience/entity/r...,,2014-01-23 13:51:57,O,Vivapain-2,,,


## Resnet data

![Resnet Schema](Resnet.PNG)

In [9]:
resnet_tbls = pwb.get_table_list('resnet')
for i in resnet_tbls:
    print (i['name'])

Annotation
Binding
Biomarker
Cellexpression
Cellobject
Cellprocess
Celltype
Chemicalreaction
Clinicalparameter
Clinicaltrial
Complex
Dictvaluetype
Directregulation
Disease
Expression
Functionalassociation
Functionalclass
Geneticchange
Geneticvariant
Group
Mirnaeffect
Molsynthesis
Moltransport
Organ
Organism
Other
Pathway
Promoterbinding
Propertytype
Protein
Protmodification
Quantitativechange
Regulation
Semanticconcept
Smallmol
Statechange
Submission
Tissue
Treatment
Virus


In [10]:
resnet_Smallmol_df = pwb.get_dataframe(sparkSession, 'resnet', 'Smallmol')
resnet_Smallmol_df.limit(5).toPandas()

Unnamed: 0,hdfsIngestTime,offset,SmallMol,dateModified,isRegulatorOf,isMemberOf,isPartnerOf,hasName,hasUrn,dateCreated,isTargetOf,hasProperty,isPublic,provenanceJob
0,2020-07-15T22:55:06.854697,29232,https://data.elsevier.com/lifescience/entity/r...,2018-11-26T17:52:37.864Z,,,,(Z)-7-dodecen-1-ol,urn:agi-cas:20056-92-2,2018-11-26T17:52:37.863Z,,https://data.elsevier.com/lifescience/entity/r...,True,
1,2020-07-15T22:52:56.061199,29179,https://data.elsevier.com/lifescience/entity/r...,2018-11-26T17:52:47.206Z,,,,F 1541,urn:agi-cas:68654-60-4,2018-11-26T17:52:47.206Z,,https://data.elsevier.com/lifescience/entity/r...,True,
2,2020-07-15T22:52:56.06128,29180,https://data.elsevier.com/lifescience/entity/r...,2018-11-26T17:53:32.600Z,,,,2-methyl-1 2-butanediol,urn:agi-cas:41051-72-3,2018-11-26T17:53:32.600Z,,https://data.elsevier.com/lifescience/entity/r...,True,
3,2020-07-15T22:53:02.416912,28845,https://data.elsevier.com/lifescience/entity/r...,2018-11-26T17:54:24.238Z,https://data.elsevier.com/lifescience/entity/r...,https://data.elsevier.com/lifescience/entity/r...,https://data.elsevier.com/lifescience/entity/r...,furans,urn:agi-smol:furan%20derivative,2018-11-26T17:54:24.238Z,https://data.elsevier.com/lifescience/entity/r...,https://data.elsevier.com/lifescience/entity/r...,True,
4,2020-07-15T22:53:00.664626,28980,https://data.elsevier.com/lifescience/entity/r...,2018-11-26T17:54:25.566Z,,,,GYKI-13380,urn:agi-cas:75614-09-4,2018-11-26T17:54:25.565Z,,https://data.elsevier.com/lifescience/entity/r...,True,
