# 6번 문제

### 0. DB 커넥션
+ 아래 접속정보 수정 필요
+ host, dbname, user, password, port

In [152]:
!pip install psycopg



In [153]:
import psycopg2
import pandas as pd

conn = psycopg2.connect(host="",  # end-point 정보
                        dbname="",  # db 명
                        user="",  # 계정
                        password="",  # 패스워드
                        port=5432)  # 포트

cur = conn.cursor()  # cursor 생성

### 1. 제 2형 당뇨병을 진단받은 환자의 의약품 처방 내역 추출
+ output : df(타입 : 데이터프레임)
+ df 컬럼 리스트
    + person_id : 환자 id
    + drug_exposure_start_date : 처방일자
    + drug_names : 처방일자에 처방받은 drug 리스트
        + ex) digoxin, (smvastatin, clopidogrel), naproxen 등

In [154]:
# 쿼리파일 read
f = open("./query/6_1.sql", "r", encoding="utf-8")
query = f.read()
f.close()
print(query)

/*
 * 6번 문제 1차 쿼리
 * 제 2형 당뇨병을 진단받은 환자의 의약품 처방 내역 추출
*/ 

WITH 
t2d AS (
	-- 조건 a : 제 2형 당뇨병 (Type 2 diabetes, T2D)을 진단 받은 경우 추출
	SELECT 
			person_id
		,	condition_start_date
	FROM de.condition_occurrence
	WHERE condition_concept_id IN (3191208, 36684827, 3194332, 3193274, 43531010
									, 4130162, 45766052, 45757474, 4099651, 4129519
									, 4063043, 4230254, 4193704, 4304377, 201826
									, 3194082, 3192767)
)
, drug_exposure_list AS (
	-- 제 2형 당뇨병 환자들이 digoxin, smvastatin, clopidogrel, naproxen을 처방받은 경우 추출
	SELECT 
			person_id
		,	drug_concept_id
		,	CASE WHEN drug_concept_id = 19018935 THEN 'digoxin'
				WHEN drug_concept_id IN (1539411, 1539463) THEN 'simvastatin'
				WHEN drug_concept_id = 19075601 THEN 'clopidogrel'
				WHEN drug_concept_id = 1115171 THEN 'naproxen'
			ELSE NULL END AS drug_name
		,	drug_exposure_start_date	
	FROM de.drug_exposure
	WHERE person_id IN (SELECT person_id FROM t2d)  -- 제 2형 당뇨병 필터
	AND drug_concept_id IN (19018935, 1539411, 1539463, 19075

In [155]:
# 쿼리 실행 및 데이터프레임 생성
cur.execute(query)
col_nm = [x[0] for x in cur.description]
df = pd.DataFrame(cur.fetchall(), columns=col_nm)

### 2. 의약품 내역이 변경된 패턴 추출
+ 의약품이 변경되지 않은 경우는 추출하지 않음

In [156]:
res = dict()  # 패턴 저장용 딕셔너리

for person_id in df['person_id'].unique():

    start_flag = None
    change_flag = None
    last_drug_name = None
    tmp_pattern = []

    for record in df[df.person_id == person_id].itertuples():
        if "," in record.drug_names:  # 2개 이상 동시처방인 경우 괄호 추가
            drug_names = "({drug_names})".format(drug_names=record.drug_names)
        else: 
            drug_names = record.drug_names

        # 처방 변화 확인 로직
        if start_flag:
            if last_drug_name == drug_names:  # 이전 처방과 같으면 pass 
                pass
            else:  # 이전 처방과 다름
                change_flag = 1
                tmp_pattern.append(last_drug_name)
                last_drug_name = drug_names
        else:  # 최초 처방
            start_flag = 1
            last_drug_name = drug_names

    # 의약품 변경이 있었는지 확인
    if change_flag:  # 의약품 변경이 있는 경우
        if tmp_pattern[-1] == last_drug_name:
            pass
        else:
            tmp_pattern.append(last_drug_name)
        
        # 변경 패턴 저장
        pattern_join = " -> ".join(tmp_pattern)
        
        if pattern_join in res:
            res[pattern_join] += 1
        else:
            res[pattern_join] = 1    
        
    else:
        pass  # 의약품이 변경되지 않은 경우 패스

### 3. 결과

In [157]:
df_res = pd.DataFrame.from_dict(res, orient='index').reset_index()
df_res.columns = ['pattern', 'person_count']
df_res.sort_values(by=['person_count', 'pattern'], ascending=False, inplace=True)
pd.set_option('display.max_colwidth', None)  # 데이터프레임 컬럼너비 제한 제거

df_res

Unnamed: 0,pattern,person_count
0,simvastatin -> naproxen -> simvastatin,4
4,simvastatin -> clopidogrel,2
2,"simvastatin -> (clopidogrel, simvastatin)",2
7,simvastatin -> naproxen,1
9,"simvastatin -> clopidogrel -> (clopidogrel, simvastatin) -> digoxin -> (clopidogrel, digoxin, simvastatin) -> clopidogrel -> (clopidogrel, digoxin, simvastatin) -> clopidogrel -> (clopidogrel, digoxin, simvastatin)",1
3,naproxen -> digoxin -> simvastatin,1
6,"naproxen -> digoxin -> (digoxin, simvastatin)",1
1,"naproxen -> (clopidogrel, simvastatin) -> simvastatin -> (clopidogrel, simvastatin) -> simvastatin -> (clopidogrel, simvastatin) -> simvastatin -> (clopidogrel, simvastatin) -> simvastatin -> (clopidogrel, simvastatin) -> simvastatin -> (clopidogrel, simvastatin) -> simvastatin -> (clopidogrel, simvastatin) -> simvastatin -> (clopidogrel, simvastatin) -> simvastatin -> (clopidogrel, simvastatin) -> simvastatin -> (clopidogrel, simvastatin) -> simvastatin -> (clopidogrel, simvastatin) -> simvastatin -> (clopidogrel, simvastatin)",1
5,digoxin -> naproxen -> digoxin,1
8,"(clopidogrel, simvastatin) -> simvastatin -> (clopidogrel, simvastatin) -> simvastatin -> (clopidogrel, simvastatin) -> simvastatin -> (clopidogrel, simvastatin) -> simvastatin -> (clopidogrel, simvastatin)",1
