<a href="https://colab.research.google.com/github/jrbobes/jrbobes-hitl-ml/blob/master/PAAD_RuleEngine_POC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **PAAD - RuleEngine - PoC**

author: jose.bobes at udc.es

# Importing required libraries

In [2]:
# Importing Required Libraries
import sys
print('Python: {}'.format(sys.version))

import scipy
print('scipy: {}'.format(scipy.__version__))

import numpy
print('numpy: {}'.format(numpy.__version__))

import matplotlib
import matplotlib.pyplot as plt
print('matplotlib: {}'.format(matplotlib.__version__))

import pandas as pd
print('pandas: {}'.format(pd.__version__))





Python: 3.8.16 (default, Dec  7 2022, 01:12:13) 
[GCC 7.5.0]
scipy: 1.7.3
numpy: 1.21.6
matplotlib: 3.2.2
pandas: 1.3.5


# Preparing the Pancreas data

In [3]:
# Loading Data
url = "https://raw.githubusercontent.com/jrbobes/jrbobes-hitl-ml/master/Libro1_Adapted.csv"
columns = ['ns10:file_uuid','ns2:gender', 'ns5:race', 'ns5:age_at_initial_pathologic_diagnosis', 'ns3:pathologic_T', 'ns3:pathologic_N', 'ns3:pathologic_M', 'ns3:pathologic_stage', 'ns5:vital_status', 'ns5:days_to_death', 'therapy_type']

dataset = pd.read_csv(url, sep=';', usecols=columns, encoding = "ISO-8859-1")

print(dataset.shape)
print(dataset.head())

(205, 11)
                         ns10:file_uuid ns2:gender                   ns5:race  \
0  1AF64746-0F41-408F-9CB3-D567BEBA1217     FEMALE                      WHITE   
1  0D7B23AC-0988-4E95-9AF3-82180A1DAB0A     FEMALE                      WHITE   
2  290AC731-F653-432D-8C35-8E6C178BD2C6     FEMALE                      WHITE   
3  E2FC1DD6-0B4D-409D-9D1C-0F8E0ADB6113     FEMALE  BLACK OR AFRICAN AMERICAN   
4  40A76730-988D-4FF1-A17A-91AA85DD7C76       MALE                      WHITE   

   ns5:age_at_initial_pathologic_diagnosis ns3:pathologic_stage  \
0                                       53            Stage IIB   
1                                       54            Stage IIA   
2                                       58             Stage IV   
3                                       66            Stage IIA   
4                                       64            Stage IIA   

  ns3:pathologic_T ns3:pathologic_N ns3:pathologic_M ns5:vital_status  \
0               T3         

In [4]:
# Feature Selection
# Split dataset in features and target variables (REF: https://www.datacamp.com/tutorial/decision-tree-classification-python)
feature_cols = ['ns2:gender', 'ns5:race', 'ns5:age_at_initial_pathologic_diagnosis', 'ns3:pathologic_T', 'ns3:pathologic_N', 'ns3:pathologic_M', 'ns3:pathologic_stage']

X = dataset[feature_cols] # Features
X_encoded = pd.get_dummies(X)
y = dataset.therapy_type # Target variable

# Installing the rules engine

In [5]:
!pip install rule_engine

import rule_engine

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting rule_engine
  Downloading rule-engine-3.5.0.tar.gz (40 kB)
[K     |████████████████████████████████| 40 kB 3.1 MB/s 
[?25hCollecting ply>=3.9
  Downloading ply-3.11-py2.py3-none-any.whl (49 kB)
[K     |████████████████████████████████| 49 kB 5.0 MB/s 
Building wheels for collected packages: rule-engine
  Building wheel for rule-engine (setup.py) ... [?25l[?25hdone
  Created wheel for rule-engine: filename=rule_engine-3.5.0-py3-none-any.whl size=44366 sha256=1f53d9338fab7e009784d1656cac7fffb32930b2b8e18b1d333502677c669919
  Stored in directory: /root/.cache/pip/wheels/d6/25/45/26c5f3f9fe4c0094cd95b32ebc40bd315cf803c8c1aaaecbc1
Successfully built rule-engine
Installing collected packages: ply, rule-engine
Successfully installed ply-3.11 rule-engine-3.5.0


# Explore the data

In [6]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 11 columns):
 #   Column                                   Non-Null Count  Dtype  
---  ------                                   --------------  -----  
 0   ns10:file_uuid                           205 non-null    object 
 1   ns2:gender                               205 non-null    object 
 2   ns5:race                                 200 non-null    object 
 3   ns5:age_at_initial_pathologic_diagnosis  205 non-null    int64  
 4   ns3:pathologic_stage                     202 non-null    object 
 5   ns3:pathologic_T                         205 non-null    object 
 6   ns3:pathologic_N                         205 non-null    object 
 7   ns3:pathologic_M                         205 non-null    object 
 8   ns5:vital_status                         205 non-null    object 
 9   ns5:days_to_death                        67 non-null     float64
 10  therapy_type                             205 non-n

In [7]:
print(dataset['ns5:days_to_death'])

print("Age at initial diagnosis MEAN: " + str(dataset['ns5:age_at_initial_pathologic_diagnosis'].mean()))

0       486.0
1         NaN
2         NaN
3      1332.0
4       117.0
        ...  
200     394.0
201       NaN
202     110.0
203     741.0
204     123.0
Name: ns5:days_to_death, Length: 205, dtype: float64
Age at initial diagnosis MEAN: 64.8


# Create Pancreatic Adenocarcinoma NCCN Guidelines rules

In [37]:
# Establish a context
#paadRulesContext = rule_engine.Context(default_value=None)

# Declare all the rules
rulePan1 = rule_engine.Rule(
  'therapy_type == "Chemotherapy"'
)

rulePan2 = rule_engine.Rule(
  'therapy_type == "Other"'
)

In [38]:
# Compose rules

# We use a dictionary to fit into the matches function of the Rule Engine library
dictionaryset = dataset.to_dict('records')
#print(dictionaryset)

#print('Chemotherapy results: ')
#for row in rulePan1.filter(dictionaryset):
#		print(row)
  

for idx,x in enumerate(dictionaryset):
  if rulePan1.matches(dictionaryset[idx]):
    print(str(idx) + " -> PAN1")
  if rulePan2.matches(dictionaryset[idx]):
    print(str(idx) + " -> PAN2")

0 -> PAN2
1 -> PAN2
2 -> PAN2
3 -> PAN1
4 -> PAN2
5 -> PAN1
6 -> PAN1
7 -> PAN1
8 -> PAN1
9 -> PAN1
10 -> PAN1
11 -> PAN2
12 -> PAN1
13 -> PAN1
14 -> PAN1
15 -> PAN1
16 -> PAN1
17 -> PAN1
18 -> PAN1
19 -> PAN1
20 -> PAN1
21 -> PAN2
22 -> PAN2
23 -> PAN2
24 -> PAN2
25 -> PAN1
26 -> PAN1
27 -> PAN2
28 -> PAN2
29 -> PAN1
30 -> PAN2
31 -> PAN2
32 -> PAN1
33 -> PAN1
34 -> PAN1
35 -> PAN2
36 -> PAN2
37 -> PAN2
38 -> PAN1
39 -> PAN2
40 -> PAN1
41 -> PAN1
42 -> PAN1
43 -> PAN2
44 -> PAN1
45 -> PAN2
46 -> PAN1
47 -> PAN2
49 -> PAN2
50 -> PAN2
51 -> PAN1
52 -> PAN1
53 -> PAN1
54 -> PAN1
55 -> PAN1
56 -> PAN1
57 -> PAN1
58 -> PAN1
59 -> PAN2
60 -> PAN1
61 -> PAN1
62 -> PAN2
63 -> PAN1
64 -> PAN1
65 -> PAN2
66 -> PAN1
67 -> PAN2
68 -> PAN1
69 -> PAN1
70 -> PAN1
71 -> PAN2
72 -> PAN2
73 -> PAN1
74 -> PAN1
75 -> PAN1
76 -> PAN1
77 -> PAN1
78 -> PAN1
79 -> PAN2
80 -> PAN1
81 -> PAN2
82 -> PAN1
83 -> PAN1
84 -> PAN1
85 -> PAN2
86 -> PAN1
87 -> PAN1
88 -> PAN1
89 -> PAN2
90 -> PAN2
91 -> PAN1
92 -> PAN