# Importing Required Libraries

In [1]:
import pandas as pd

In [2]:
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Loading raw data and checking for datatype and null values

In [4]:
raw_data= pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Co-Occurance_of_procedures/data.csv')

In [5]:
raw_data.head()

Unnamed: 0,PatientID,TestName
0,1,Blood test
1,1,X-ray
2,1,ECG
3,1,Allergy test
4,1,Stool sample analysis


In [6]:
raw_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 139 entries, 0 to 138
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   PatientID  139 non-null    int64 
 1   TestName   139 non-null    object
dtypes: int64(1), object(1)
memory usage: 2.3+ KB


In [7]:
raw_data.isna().sum()

PatientID    0
TestName     0
dtype: int64

# Pre-Processing the data for the association algorithm

In [8]:
grouped_df = raw_data.groupby('PatientID')['TestName'].apply(lambda x: x.values.tolist())

In [9]:
procedures=[]
for group in grouped_df:
    procedures.append(group)
    
print(procedures)

[['Blood test', 'X-ray', 'ECG', 'Allergy test', 'Stool sample analysis'], ['Urine test', 'MRI scan', 'Biopsy'], ['CT scan', 'ECG', 'Colonoscopy', 'Pap smear', 'Bone density test', 'Stool sample analysis', 'Blood test'], ['Mammogram', 'HIV test'], ['X-ray', 'Pulmonary function test', 'Biopsy', 'Urine test'], ['HIV test'], ['ECG', 'Blood test', 'CT scan', 'X-ray', 'Colonoscopy', 'Mammogram'], ['Stool sample analysis', 'Bone density test', 'Urine test', 'ECG', 'MRI scan', 'Pap smear', 'Allergy test', 'Blood test', 'Colonoscopy'], ['Pulmonary function test', 'Biopsy', 'Mammogram'], ['Bone density test', 'MRI scan', 'Stool sample analysis', 'Allergy test', 'X-ray'], ['ECG', 'Blood test'], ['CT scan', 'Urine test', 'Colonoscopy', 'Bone density test', 'HIV test', 'Stool sample analysis', 'Pap smear', 'Mammogram'], ['MRI scan', 'Urine test', 'Allergy test', 'X-ray'], ['ECG', 'Blood test', 'CT scan', 'Colonoscopy', 'Mammogram', 'Pap smear'], ['Pulmonary function test', 'Biopsy', 'HIV test'], ['

In [10]:
# Transaction encoding
te = TransactionEncoder()
te_array = te.fit_transform(procedures)
df = pd.DataFrame(te_array, columns=te.columns_)

# Implementing the algorithm

In [11]:
# Applying Apriori algorithm
frequent_itemsets = apriori(df, min_support=0.2, use_colnames=True)

In [12]:
# Generating association rules
rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.5)

In [13]:
# Printing frequent itemsets
print("Frequent Itemsets:")
print(frequent_itemsets)

Frequent Itemsets:
     support                        itemsets
0   0.290323                  (Allergy test)
1   0.225806                        (Biopsy)
2   0.354839                    (Blood test)
3   0.290323             (Bone density test)
4   0.322581                       (CT scan)
5   0.354839                   (Colonoscopy)
6   0.354839                           (ECG)
7   0.225806                      (HIV test)
8   0.258065                      (MRI scan)
9   0.387097                     (Mammogram)
10  0.290323                     (Pap smear)
11  0.290323         (Stool sample analysis)
12  0.322581                    (Urine test)
13  0.322581                         (X-ray)
14  0.258065           (X-ray, Allergy test)
15  0.225806           (Blood test, CT scan)
16  0.258065       (Colonoscopy, Blood test)
17  0.322581               (Blood test, ECG)
18  0.225806          (Colonoscopy, CT scan)
19  0.258065            (Mammogram, CT scan)
20  0.225806            (CT scan, Pa

In [14]:
# Printing association rules
print("\nAssociation Rules:")
print(rules)


Association Rules:
                  antecedents                consequents  antecedent support  \
0                     (X-ray)             (Allergy test)            0.322581   
1              (Allergy test)                    (X-ray)            0.290323   
2                (Blood test)                  (CT scan)            0.354839   
3                   (CT scan)               (Blood test)            0.322581   
4               (Colonoscopy)               (Blood test)            0.354839   
5                (Blood test)              (Colonoscopy)            0.354839   
6                (Blood test)                      (ECG)            0.354839   
7                       (ECG)               (Blood test)            0.354839   
8               (Colonoscopy)                  (CT scan)            0.354839   
9                   (CT scan)              (Colonoscopy)            0.322581   
10                (Mammogram)                  (CT scan)            0.387097   
11                  

# Output

In [15]:
output_df=rules[['antecedents','consequents','conviction']]

In [16]:
output_df.head(27)

Unnamed: 0,antecedents,consequents,conviction
0,(X-ray),(Allergy test),3.548387
1,(Allergy test),(X-ray),6.096774
2,(Blood test),(CT scan),1.862903
3,(CT scan),(Blood test),2.150538
4,(Colonoscopy),(Blood test),2.365591
5,(Blood test),(Colonoscopy),2.365591
6,(Blood test),(ECG),7.096774
7,(ECG),(Blood test),7.096774
8,(Colonoscopy),(CT scan),1.862903
9,(CT scan),(Colonoscopy),2.150538


# Inference

In [17]:
def tests_after(test_string):
  after_tests=''
  for i in output_df['antecedents']:
    if test_string==next(iter(i)):
      rows=output_df[output_df['antecedents']==i]
      break
    
  max_conviction =rows['conviction'].max()

  after_frozenset=rows['consequents'].loc[rows['conviction']==max_conviction].values
  after_list=list(after_frozenset[0])
  for tests in after_list:
    after_tests+=f' {tests}'
    
    return (f"The patient takes {after_tests} test after {test_string}")

In [18]:
def tests_before(test_string):
  before_tests=''
  for i in output_df['consequents']:
    if test_string==next(iter(i)):
      rows=output_df[output_df['consequents']==i]
      break
    
  max_conviction =rows['conviction'].max()

  before_frozenset=rows['antecedents'].loc[rows['conviction']==max_conviction].values
  before_list=list(before_frozenset[0])
  for tests in before_list:
    before_tests+=f' {tests}'

  return (f"The patient takes {before_tests} test before {test_string}")

In [26]:
def procedures_predict(test_name):
  print(f'{tests_before(test_name)}'+' and '+f'{tests_after(test_name)}')


procedures_predict('Colonoscopy')

The patient takes  Blood test ECG test before Colonoscopy and The patient takes  Blood test test after Colonoscopy
