# Symptom 데이터 연관성 분석
(https://www.kaggle.com/edgarjames/disease-predictor)

**Disease Number Description**

**0: Heart Disease**

**1: Viral Fever or Cold**

**2: Jaundice**

**3: Food Poisoning**

**4: Patient is normal**

## 1. 데이터 불러오기 및 전처리

In [1]:
import numpy as np
import pandas as pd

In [2]:
data=pd.read_csv(r"C:\Users\Desktop\data\symptom.csv")
data

Unnamed: 0,Disease,Temperature,Pulse Rate,L.A Pain,U.A Pain,Vomiting Feeling,Yellowish Urine,Indigestion
0,0,10,13.8,0,10,0,0,0
1,1,11,13.6,10,10,0,0,0
2,2,13,14.0,10,0,0,10,0
3,3,11,12.8,0,0,10,0,10
4,0,14,15.0,0,10,0,0,0
...,...,...,...,...,...,...,...,...
1036,1,14,14.0,10,10,0,0,0
1037,2,11,13.0,10,0,0,10,0
1038,3,10,10.0,10,0,10,0,10
1039,4,10,10.0,0,0,0,0,0


In [3]:
use_cols = ['L.A Pain','U.A Pain','Vomiting Feeling','Yellowish Urine','Indigestion']
data1 = data[use_cols]/10
data1

Unnamed: 0,L.A Pain,U.A Pain,Vomiting Feeling,Yellowish Urine,Indigestion
0,0.0,1.0,0.0,0.0,0.0
1,1.0,1.0,0.0,0.0,0.0
2,1.0,0.0,0.0,1.0,0.0
3,0.0,0.0,1.0,0.0,1.0
4,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...
1036,1.0,1.0,0.0,0.0,0.0
1037,1.0,0.0,0.0,1.0,0.0
1038,1.0,0.0,1.0,0.0,1.0
1039,0.0,0.0,0.0,0.0,0.0


In [4]:
data1['Heart']     = np.where(data['Disease']==0, 1,0) 
data1['Cold']      = np.where(data['Disease']==1, 1,0) 
data1['Jaundice']  = np.where(data['Disease']==2, 1,0) 
data1['Poisoning'] = np.where(data['Disease']==3, 1,0) 
data1['Normal']    = np.where(data['Disease']==4, 1,0) 

In [5]:
data1

Unnamed: 0,L.A Pain,U.A Pain,Vomiting Feeling,Yellowish Urine,Indigestion,Heart,Cold,Jaundice,Poisoning,Normal
0,0.0,1.0,0.0,0.0,0.0,1,0,0,0,0
1,1.0,1.0,0.0,0.0,0.0,0,1,0,0,0
2,1.0,0.0,0.0,1.0,0.0,0,0,1,0,0
3,0.0,0.0,1.0,0.0,1.0,0,0,0,1,0
4,0.0,1.0,0.0,0.0,0.0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
1036,1.0,1.0,0.0,0.0,0.0,0,1,0,0,0
1037,1.0,0.0,0.0,1.0,0.0,0,0,1,0,0
1038,1.0,0.0,1.0,0.0,1.0,0,0,0,1,0
1039,0.0,0.0,0.0,0.0,0.0,0,0,0,0,1


## 2. 연관성 분석

In [6]:
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

### 지지도

In [7]:
frequent_itemsets = apriori(data1, min_support=0.05, use_colnames=True)
frequent_itemsets



Unnamed: 0,support,itemsets
0,0.597502,(L.A Pain)
1,0.317003,(U.A Pain)
2,0.198847,(Vomiting Feeling)
3,0.195965,(Yellowish Urine)
4,0.197887,(Indigestion)
5,0.108549,(Heart)
6,0.208453,(Cold)
7,0.194044,(Jaundice)
8,0.197887,(Poisoning)
9,0.291066,(Normal)


### 신뢰도, 향상도

In [8]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=3)
rules = rules.sort_values("lift", ascending=False)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
27,(Jaundice),"(L.A Pain, Yellowish Urine)",0.194044,0.194044,0.193084,0.99505,5.127953,0.15543,162.803074,0.998802
24,"(L.A Pain, Yellowish Urine)",(Jaundice),0.194044,0.194044,0.193084,0.99505,5.127953,0.15543,162.803074,0.998802
25,"(L.A Pain, Jaundice)",(Yellowish Urine),0.193084,0.195965,0.193084,1.0,5.102941,0.155246,inf,0.996429
8,(Yellowish Urine),(Jaundice),0.195965,0.194044,0.194044,0.990196,5.102941,0.156018,82.207493,1.0
9,(Jaundice),(Yellowish Urine),0.194044,0.195965,0.194044,1.0,5.102941,0.156018,inf,0.997616
26,(Yellowish Urine),"(L.A Pain, Jaundice)",0.195965,0.193084,0.193084,0.985294,5.102941,0.155246,54.870317,1.0
43,"(Poisoning, Indigestion)","(Vomiting Feeling, L.A Pain)",0.197887,0.195965,0.195965,0.990291,5.053398,0.157186,82.815562,1.0
34,"(Vomiting Feeling, Indigestion)",(Poisoning),0.197887,0.197887,0.197887,1.0,5.053398,0.158728,inf,1.0
47,(Poisoning),"(Vomiting Feeling, L.A Pain, Indigestion)",0.197887,0.195965,0.195965,0.990291,5.053398,0.157186,82.815562,1.0
28,"(Poisoning, L.A Pain)",(Indigestion),0.195965,0.197887,0.195965,1.0,5.053398,0.157186,inf,0.997611


### 질병종류별 연관성

In [9]:
# subset of the rules
rules[rules['consequents'] == {'Heart'}]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
1,(U.A Pain),(Heart),0.317003,0.108549,0.108549,0.342424,3.154545,0.074139,1.355662,1.0


In [10]:
rules[rules['consequents'] == {'Cold'}]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
12,"(L.A Pain, U.A Pain)",(Cold),0.208453,0.208453,0.208453,1.0,4.797235,0.165001,inf,1.0
2,(U.A Pain),(Cold),0.317003,0.208453,0.208453,0.657576,3.154545,0.142373,2.311596,1.0


In [11]:
rules[rules['consequents'] == {'Jaundice'}]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
24,"(L.A Pain, Yellowish Urine)",(Jaundice),0.194044,0.194044,0.193084,0.99505,5.127953,0.15543,162.803074,0.998802
8,(Yellowish Urine),(Jaundice),0.195965,0.194044,0.194044,0.990196,5.102941,0.156018,82.207493,1.0


In [12]:
rules[rules['consequents'] == {'Poisoning'}]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
34,"(Vomiting Feeling, Indigestion)",(Poisoning),0.197887,0.197887,0.197887,1.0,5.053398,0.158728,inf,1.0
29,"(L.A Pain, Indigestion)",(Poisoning),0.195965,0.197887,0.195965,1.0,5.053398,0.157186,inf,0.997611
40,"(Vomiting Feeling, L.A Pain, Indigestion)",(Poisoning),0.195965,0.197887,0.195965,1.0,5.053398,0.157186,inf,0.997611
21,"(Vomiting Feeling, L.A Pain)",(Poisoning),0.195965,0.197887,0.195965,1.0,5.053398,0.157186,inf,0.997611
11,(Indigestion),(Poisoning),0.197887,0.197887,0.197887,1.0,5.053398,0.158728,inf,1.0
7,(Vomiting Feeling),(Poisoning),0.198847,0.197887,0.197887,0.995169,5.028986,0.158537,166.037464,1.0


In [None]:
rules[rules['consequents'] == {'Normal'}]