In [None]:
### NUMBER OF ACTIVE GENETIC DISORDERS


# We launch the config file with the installation of the R libraries
source("config.R")

# INPUTS : "product1_en_cross_jdbor_evo"; "product3_156_en_jdbor_evo.xml".


# OUTPUTS : "number_of_active_genetic_disorders"; "number_of_non_active_genetic_disorders";
# percentage_of_active_genetic_disorders"; "percentage_of_non_active_genetic_disorders".

In [None]:
# We launch the libraries
library(tidyverse)
library(rlist)
library(data.table)
library(dplyr)
library(XML)
library(methods)

In [None]:
### Later we will need to get the number of total disorders for the calculations

# We open xml "product1_en_cross_jdbor_evo" that contains these datas
product1=xmlParse("product1_en_cross_jdbor_evo.xml")
product1=xmlToList(product1)

# We remove the last element of the list, which is the numeric count of all elements of the list
product1=product1$DisorderList
product1=product1[-length(product1)]

### Our list for working
indice_list=1
product=list()
for (i in 1:length(product1))
{
  product[[indice_list]]=product1[[i]]
  indice_list=indice_list+1
}
# Each element in the list is a disorder

In [None]:
# We only select the active clinical entities:
# Head of classification (flag=128), Historical entities (flag=512) & On-line (flag=1)
indice_list=1
all_active_clinical_entities=list()
for (i in 1:length(product))
  if (product[[i]]$DisorderFlagList$DisorderFlag$Value=='1' |
      product[[i]]$DisorderFlagList$DisorderFlag$Value=='128' |
      product[[i]]$DisorderFlagList$DisorderFlag$Value=='512')
  {
    all_active_clinical_entities[[indice_list]]=product[[i]]
    indice_list=indice_list+1
  }

In [None]:
### Number of disorders
indice_list=1
disorders=list()
for (i in 1:length(all_active_clinical_entities))
  if (all_active_clinical_entities[[i]]$DisorderGroup$Name$text=='Disorder')
  {
    disorders[[indice_list]]=all_active_clinical_entities[[i]]
    indice_list=indice_list+1
  }
number_disorders=length(disorders)
number_disorders

In [None]:
# We can now work on the genetic datas

# We open the xml "product3_156_en_jdbor_evo.xml" that contains the genetic disorders
xmlfiles = list.files(pattern='product3_156')
xmldoc=xmlParse(file=xmlfiles)

# We convert all the nodes "Disorder" in a data frame
xml_156=xmlToDataFrame(nodes = getNodeSet(xmldoc, "//Disorder"))
xml_156=as.data.frame(xml_156)

# We delete the duplicatas
xml_156=xml_156[!duplicated(xml_156),]

In [None]:
# We only keep the disorders
xml_156=filter(xml_156, DisorderType=='Malformation syndrome' | DisorderType=='Disease' |
DisorderType=='Morphological anomaly'| DisorderType=='Clinical syndrome' |
DisorderType=='Particular clinical situation in a disease or syndrome' | DisorderType=='Biological anomaly')

In [None]:
# Then we finaly get the number of genetic disorders
number_of_active_genetic_disorders=nrow(xml_156)
number_of_active_genetic_disorders

In [None]:
# And the number of non genetic disorders
number_of_non_active_genetic_disorders=number_disorders - number_of_active_genetic_disorders
number_of_non_active_genetic_disorders

In [None]:
# Percentage of genetic disorders
percentage_of_active_genetic_disorders=number_of_active_genetic_disorders/number_disorders*100
percentage_of_active_genetic_disorders

In [None]:
# Percentage of non genetic disorders
percentage_of_non_active_genetic_disorders=number_of_non_active_genetic_disorders/number_disorders*100
percentage_of_non_active_genetic_disorders