# Experiment IV: Generalization of the hypothesis configuraiton

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


## Prepare graphs

In [3]:
from hypotest.ontologies import molecular_subgraph, full_graph
from grontocrawler.graph import produce_graph
from hypotest.graph_mutation import normalize_hypothgraph

sub_onto = molecular_subgraph.g
full_onto = full_graph.g

sub_unnorm = produce_graph.produce_graph(sub_onto, options=['existential-arcs'])
full_unnorm = produce_graph.produce_graph(full_onto, options=['existential-arcs'])

sub_norm, _ = normalize_hypothgraph.normalize_hypothgraph(sub_unnorm)
full_norm, _ = normalize_hypothgraph.normalize_hypothgraph(full_unnorm)

INFO:rdflib:RDFLib Version: 4.2.1


### Configuration

In [69]:
from hypotest.confidence import compute_confidence
from hypotest.graph_generation import hypoth_conf
Hypoth_Conf = hypoth_conf.Hypoth_Conf
conf_source, conf_target = hypoth_conf.generate_max_endpoints(sub_norm)
print(conf_source, conf_target)

('http://plumdeq.xyz/ontologies/hypothesis/Synovial_inflammation', 'http://plumdeq.xyz/ontologies/hypothesis/Cartilage_degeneration')


## See what alse can you prove with the same evidence set

* Synovial inflammation                               0.142857
* Positive regulation of TNF alpha overproduction     0.142857
* Cartilage degeneration                              0.142857
* Diminution of load bearing capacity of cartilage    0.107143
* Biochemical imbalance                               0.107143

In [12]:
ns = "http://plumdeq.xyz/ontologies/hypothesis/"
evidences = [
    'Synovial inflammation',
    'Positive regulation of TNF alpha overproduction',
    'Cartilage degeneration',
    'Biochemical imbalance'
]

In [13]:
evidence_set = [''.join([ns, e.replace(' ', '_')]) for e in evidences]
for e in evidence_set:
    assert e in full_norm.node
    assert e in sub_norm.node, e

## Initial confidences

In [91]:
init_conf = Hypoth_Conf(conf_source, conf_target, evidence_set)
init_sub = compute_confidence.normalized_confidence(sub_norm, init_conf)
init_full = compute_confidence.normalized_confidence(full_norm, init_conf)

In [92]:
init_sub

0.6666666666666666

In [93]:
init_full

0.5357142857142857

## Try all possible hypothesis configurations

In [14]:
import networkx as nx
import itertools as it

In [22]:
confs_sub = {}
confs_full = {}

In [25]:
for s, t in it.combinations(full_norm.nodes(), 2):
    if nx.has_path(full_norm, s, t):
        s_label = full_norm.node[s]['label']
        t_label = full_norm.node[t]['label']
        new_conf = Hypoth_Conf(s, t, evidence_set)
        
        # Confidence for subgraph
        if s in sub_norm.node and t in sub_norm.node:
            confs_sub[(s_label, t_label)] = compute_confidence.normalized_confidence(sub_norm, new_conf)
        else:
            confs_sub[(s_label, t_label)] = 0
        
        # Confidence for full graph
        confs_full[(s_label, t_label)] = compute_confidence.normalized_confidence(full_norm, new_conf)

## Study these confidences as dataframes

In [27]:
import pandas as pd

In [30]:
df_sub = pd.Series(confs_sub)
df_full = pd.Series(confs_full)

In [34]:
df_sub.describe()

count    174.000000
mean       0.165396
std        0.278585
min        0.000000
25%        0.000000
50%        0.000000
75%        0.433333
max        1.000000
dtype: float64

In [35]:
df_full.describe()

count    174.000000
mean       0.416871
std        0.168516
min        0.000000
25%        0.343388
50%        0.428571
75%        0.500000
max        1.000000
dtype: float64

In [51]:
df_sub[df_sub > 0.6]

Cartilage degeneration                                  Biochemical imbalance                                     0.666667
                                                        Negative regulation of Collagen production                0.750000
                                                        Positive regulation of TNF alpha overproduction           1.000000
Loss of collagen                                        Cartilage degeneration                                    0.666667
                                                        Negative regulation of Collagen production                0.666667
                                                        Positive regulation of TNF alpha overproduction           0.800000
Loss of proteoglycan                                    Cartilage degeneration                                    0.666667
                                                        Negative regulation of Chondrocytes anabolic activity     0.666667
                

In [47]:
df_full[df_full > 0.6]

Cartilage degeneration                            Biochemical imbalance                                     0.666667
                                                  Negative regulation of Collagen production                0.750000
                                                  Positive regulation of TNF alpha overproduction           1.000000
Diminution of load bearing capacity of cartilage  Positive regulation of TNF alpha overproduction           0.750000
                                                  Synovial inflammation                                     0.666667
Loss of collagen                                  Positive regulation of TNF alpha overproduction           0.625000
Loss of proteoglycan                              Positive regulation of TNF alpha overproduction           0.625000
Meniscal tear                                     Positive regulation of TNF alpha overproduction           0.750000
Synovial inflammation                             Negative regul

## Study them together

In [53]:
dataframe = pd.DataFrame({'sub': df_sub, 'full': df_full})

In [54]:
dataframe.describe()

Unnamed: 0,full,sub
count,174.0,174.0
mean,0.416871,0.165396
std,0.168516,0.278585
min,0.0,0.0
25%,0.343388,0.0
50%,0.428571,0.0
75%,0.5,0.433333
max,1.0,1.0


### Both big confidence

In [81]:
both_big_confidence = dataframe[(dataframe['sub'] > 0.6) & (dataframe['full'] > 0.6)]

In [82]:
both_big_confidence

Unnamed: 0,Unnamed: 1,full,sub
Cartilage degeneration,Biochemical imbalance,0.666667,0.666667
Cartilage degeneration,Negative regulation of Collagen production,0.75,0.75
Cartilage degeneration,Positive regulation of TNF alpha overproduction,1.0,1.0
Loss of collagen,Positive regulation of TNF alpha overproduction,0.625,0.8
Loss of proteoglycan,Positive regulation of TNF alpha overproduction,0.625,0.8
Synovial inflammation,Negative regulation of Chondrocytes anabolic activity,0.666667,0.666667
Synovial inflammation,Negative regulation of Collagen production,0.666667,0.666667
Synovial inflammation,Negative regulation of Proteoglycan production,0.666667,0.666667
Synovial inflammation,Positive regulation of Chondrocytes catabolic activity,0.666667,0.666667
Synovial inflammation,Positive regulation of TNF alpha overproduction,1.0,1.0


In [95]:
print(both_big_confidence.to_latex())

\begin{tabular}{llrr}
\toprule
                      &                       &      full &       sub \\
\midrule
Cartilage degeneration & Biochemical imbalance &  0.666667 &  0.666667 \\
                      & Negative regulation of Collagen production &  0.750000 &  0.750000 \\
                      & Positive regulation of TNF alpha overproduction &  1.000000 &  1.000000 \\
Loss of collagen &                       &  0.625000 &  0.800000 \\
Loss of proteoglycan &                       &  0.625000 &  0.800000 \\
Synovial inflammation & Negative regulation of Chondrocytes anabolic activity &  0.666667 &  0.666667 \\
                      & Negative regulation of Collagen production &  0.666667 &  0.666667 \\
                      & Negative regulation of Proteoglycan production &  0.666667 &  0.666667 \\
                      & Positive regulation of Chondrocytes catabolic activity &  0.666667 &  0.666667 \\
                      & Positive regulation of TNF alpha overproduction &  1.

### Quiet a big gap of difference

In [89]:
gap_confidence = dataframe[(dataframe['sub'] > 0.6) & (dataframe['full'] < 0.3) | 
                           (dataframe['full'] > 0.6) & (dataframe['sub'] < 0.3)]

In [90]:
gap_confidence

Unnamed: 0,Unnamed: 1,full,sub
Diminution of load bearing capacity of cartilage,Positive regulation of TNF alpha overproduction,0.75,0.0
Diminution of load bearing capacity of cartilage,Synovial inflammation,0.666667,0.0
Meniscal tear,Positive regulation of TNF alpha overproduction,0.75,0.0


### Sub zero confidence and full OK confidence

In [65]:
sub_is_zero_full_is_quiet_good = dataframe[(dataframe['sub'] == 0) & (dataframe['full'] > 0.5)]

In [94]:
sub_is_zero_full_is_quiet_good

Unnamed: 0,Unnamed: 1,full,sub
Cartilage calcification,Positive regulation of TNF alpha overproduction,0.6,0.0
Diminution of load bearing capacity of cartilage,Biochemical imbalance,0.571429,0.0
Diminution of load bearing capacity of cartilage,Negative regulation of Chondrocytes anabolic activity,0.6,0.0
Diminution of load bearing capacity of cartilage,Negative regulation of Collagen production,0.6,0.0
Diminution of load bearing capacity of cartilage,Negative regulation of Proteoglycan production,0.6,0.0
Diminution of load bearing capacity of cartilage,Positive regulation of Chondrocytes catabolic activity,0.6,0.0
Diminution of load bearing capacity of cartilage,Positive regulation of TNF alpha overproduction,0.75,0.0
Diminution of load bearing capacity of cartilage,Synovial inflammation,0.666667,0.0
Meniscal tear,Biochemical imbalance,0.571429,0.0
Meniscal tear,Negative regulation of Collagen production,0.6,0.0


In [68]:
print(dataframe.to_latex())

\begin{tabular}{llrr}
\toprule
                      &                       &      full &       sub \\
\midrule
Biochemical imbalance & Decrease of cartilage elasticity &  0.500000 &  0.000000 \\
                      & Knee pain &  0.500000 &  0.000000 \\
Bone erosion & Joint deformation &  0.000000 &  0.000000 \\
Cartilage calcification & Biochemical imbalance &  0.500000 &  0.000000 \\
                      & Cartilage degeneration &  0.333333 &  0.000000 \\
                      & Decrease of cartilage elasticity &  0.375000 &  0.000000 \\
                      & Joint deformation &  0.222222 &  0.000000 \\
                      & Knee pain &  0.285714 &  0.000000 \\
                      & Loss of collagen &  0.428571 &  0.000000 \\
                      & Meniscal tear &  0.250000 &  0.000000 \\
                      & Negative regulation of Chondrocytes anabolic activity &  0.500000 &  0.000000 \\
                      & Negative regulation of Collagen production &  0.500000 & 

In [67]:
print(sub_is_zero_full_is_quiet_good.to_latex())

\begin{tabular}{llrr}
\toprule
                      &                       &      full &  sub \\
\midrule
Cartilage calcification & Positive regulation of TNF alpha overproduction &  0.600000 &  0.0 \\
Diminution of load bearing capacity of cartilage & Biochemical imbalance &  0.571429 &  0.0 \\
                      & Negative regulation of Chondrocytes anabolic activity &  0.600000 &  0.0 \\
                      & Negative regulation of Collagen production &  0.600000 &  0.0 \\
                      & Negative regulation of Proteoglycan production &  0.600000 &  0.0 \\
                      & Positive regulation of Chondrocytes catabolic activity &  0.600000 &  0.0 \\
                      & Positive regulation of TNF alpha overproduction &  0.750000 &  0.0 \\
                      & Synovial inflammation &  0.666667 &  0.0 \\
Meniscal tear & Biochemical imbalance &  0.571429 &  0.0 \\
                      & Negative regulation of Collagen production &  0.600000 &  0.0 \\
       

### Small confidence full, good in sub 

In [100]:
small_full_good_sub = dataframe[(dataframe['sub'] > 0.5) & (dataframe['full'] < 0.5)]
small_full_good_sub

Unnamed: 0,Unnamed: 1,full,sub
Loss of collagen,Cartilage degeneration,0.4375,0.666667
Loss of proteoglycan,Cartilage degeneration,0.4375,0.666667
Loss of proteoglycan,Loss of collagen,0.46875,0.571429
Loss of proteoglycan,Positive regulation of MMP13 production,0.46875,0.571429
Positive regulation of Aggrecanases production,Negative regulation of Chondrocytes anabolic activity,0.46875,0.571429
Positive regulation of Aggrecanases production,Negative regulation of Collagen production,0.46875,0.571429
Positive regulation of Aggrecanases production,Negative regulation of Proteoglycan production,0.46875,0.571429
Positive regulation of Aggrecanases production,Positive regulation of Chondrocytes catabolic activity,0.46875,0.571429
Positive regulation of Aggrecanases production,Synovial inflammation,0.458333,0.6
Positive regulation of Chondrocytes catabolic activity,Negative regulation of Collagen production,0.441176,0.533333
