# Pharmacy SVD Recommender

In this example we use SQL to calculate dot products for a drug recommendation.

In [1]:
import pandas as pd
import os

# Local libraries should automatically reload
%load_ext autoreload
%autoreload 1

### Get actual purchases

In [2]:
%aimport gpudb_df

_cnxn = gpudb_df.get_odbc()

PATIENT_ID = 2839568

_sql = """
select pm.patient_id,
    d.TID as TID,
    d.GENERIC_NAME as GENERIC_NAME,
    pm.RX_COUNT
from PHARM_MATRIX pm
join DRUG_TID as d
    ON pm.TID = d.TID
where pm.patient_id = {}
order by pm.RX_COUNT desc
""".format(PATIENT_ID)

_actual_df = pd.read_sql(_sql, _cnxn)
_cnxn.close()

_actual_df = _actual_df.set_index('TID')
_actual_df

Connected to GPUdb ODBC Server (6.2.0.9.20180622232941)


DatabaseError: Execution failed on sql '
select pm.patient_id,
    d.TID as TID,
    d.GENERIC_NAME as GENERIC_NAME,
    pm.RX_COUNT
from PHARM_MATRIX pm
join DRUG_TID as d
    ON pm.TID = d.TID
where pm.patient_id = 2839568
order by pm.RX_COUNT desc
': ('42S02', '[42S02] [Kinetica][SQLEngine] (31740) Table or view not found: KINETICA..PHARM_MATRIX (31740) (SQLExecDirectW)')

### Get approximated purchases

We get approximated purchases with:

$
\mathbf{\tilde{a}}_i = \mathbf{u}_i \Sigma \mathbf{V}^T
$

This reduces to a set of dot products.

In [3]:
%aimport gpudb_df

_cnxn = gpudb_df.get_odbc()

_sql = """
select top 10 
    pv.patient_id,
    d.TID as TID,
    d.generic_name,
    (pv.v0 * dv.v0)
    + (pv.v1 * dv.v1) 
    + (pv.v2 * dv.v2) 
    + (pv.v3 * dv.v3) 
    + (pv.v4 * dv.v4) 
    + (pv.v5 * dv.v5) 
    + (pv.v6 * dv.v6) 
    + (pv.v7 * dv.v7) 
    + (pv.v8 * dv.v8) 
    + (pv.v9 * dv.v9) 
    as DRUG_RATING
from PATIENT_U_VEC as pv, DRUG_V_VEC as dv
join DRUG_TID as d
    ON dv.TID = d.TID
where pv.patient_id = {}
order by drug_rating desc
""".format(PATIENT_ID)

_approx_df = pd.read_sql(_sql, _cnxn)
_cnxn.close()

_approx_df = _approx_df.set_index('TID')
_approx_df

Connected to GPUdb ODBC Server (6.2.0.9.20180622232941)


Unnamed: 0_level_0,PATIENT_ID,GENERIC_NAME,DRUG_RATING
TID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3510,2839568,GLUCOSE BLOOD TEST STRIP,2.484941
1157,2839568,SYRINGE/NEEDLE (DISP) 1 ML,1.651278
1158,2839568,BLOOD GLUCOSE MONITORING KIT,1.401927
2578,2839568,TOLBUTAMIDE TAB 500 MG,1.378701
6092,2839568,INSULIN NPH (HUMAN) (ISOPHAN,1.319132
156,2839568,METFORMIN HCL TAB SR 24HR MO,1.165763
225,2839568,VALSARTAN TAB 80 MG,0.734418
6090,2839568,INSULIN REGULAR (HUMAN) SOLN,0.508133
2530,2839568,NEBIVOLOL HCL TAB 5 MG (BASE,0.422035
6081,2839568,LEVOTHYROXINE SODIUM TAB 88,0.409597


### Get recommended purchases

The recommendation is:

$
\mathbf{\tilde{r}}_i = \mathbf{\tilde{a}}_i - \mathbf{a}_i
$

In [4]:
_recommended_df = _approx_df.loc[set(_approx_df.index) - set(_actual_df.index)]
_recommended_df.sort_values('DRUG_RATING', ascending=False)

Unnamed: 0_level_0,PATIENT_ID,GENERIC_NAME,DRUG_RATING
TID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1157,2839568,SYRINGE/NEEDLE (DISP) 1 ML,1.651278
2578,2839568,TOLBUTAMIDE TAB 500 MG,1.378701
6092,2839568,INSULIN NPH (HUMAN) (ISOPHAN,1.319132
156,2839568,METFORMIN HCL TAB SR 24HR MO,1.165763
225,2839568,VALSARTAN TAB 80 MG,0.734418
6090,2839568,INSULIN REGULAR (HUMAN) SOLN,0.508133
2530,2839568,NEBIVOLOL HCL TAB 5 MG (BASE,0.422035
6081,2839568,LEVOTHYROXINE SODIUM TAB 88,0.409597
