[View in Colaboratory](https://colab.research.google.com/github/robmoore/diabetes-prediction/blob/master/Create_RxNorm_SQLite_Database.ipynb)

In [16]:
!apt install sqlite3 -q

Reading package lists...
Building dependency tree...
Reading state information...
sqlite3 is already the newest version (3.19.3-3).
0 upgraded, 0 newly installed, 0 to remove and 0 not upgraded.


In [4]:
import getpass
import sqlite3
import pandas as pd
import numpy as np

from contextlib import closing

# Data uploaded to Google Drive below. Assumes the existence of a folder named 'Diabetes-Prediction' in your Google Drive account.
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code
Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
![ -f "terminology_download_script.zip" ] || (wget -q -N https://download.nlm.nih.gov/rxnorm/terminology_download_script.zip && unzip -o terminology_download_script.zip curl-uts-download.sh uts.nlm.nih.gov.crt && chmod +x curl-uts-download.sh)

In order to download the RxNorm data required below, you'll need to have an [account](https://uts.nlm.nih.gov//license.html). Please enter your credentials below.

In [6]:
uts_username = getpass.getpass(prompt='Enter your UTS username: ')
uts_password = getpass.getpass(prompt='Enter your UTS password: ')
# Insert username and password and fix issue with Windows variable reference so logout occurs
!sed -i'' "s/UTS_USERNAME=.*$/UTS_USERNAME=\"{uts_username}\"/;s/UTS_PASSWORD=.*$/UTS_PASSWORD=\"{uts_password}\"/;s/%\([^%]*\)%/\$\1/g" curl-uts-download.sh

Enter your UTS username: ··········
Enter your UTS password: ··········


In [7]:
rxnorm_fn='RxNorm_full_current.zip'
![ -f {rxnorm_fn} ] || (./curl-uts-download.sh https://download.nlm.nih.gov/umls/kss/rxnorm/{rxnorm_fn} > /dev/null && zip -T {rxnorm_fn})

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 10284    0 10284    0     0  10284      0 --:--:-- --:--:-- --:--:-- 67215
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   313  100   313    0     0    313      0  0:00:01 --:--:--  0:00:01  2301
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100   252  100   252    0     0    252      0  0:00:01 --:--:--  0:00:01   252
100  237M  100  237M    0     0  4512k      0  0:00:54  0:00:54 --:--:-- 4514k
test of RxNorm_full_current.zip OK


In [8]:
!rm rxnorm.db

rm: cannot remove 'rxnorm.db': No such file or directory


In [13]:
%%bash
if [ ! -d "rrf" ]; then
  echo "Unzipping RxNorm file"
  unzip -o RxNorm_full_current.zip rrf/RXNCONSO.RRF rrf/RXNREL.RRF rrf/RXNSAT.RRF scripts/mysql/*.sql
fi

# Mostly stolen from https://github.com/chb/py-umls/blob/master/databases/rxnorm.sh
if [ ! -e "rxnorm.db" ]; then
  echo "Creating tables"
  sqlite3 rxnorm.db < scripts/mysql/Table_scripts_mysql_rxn.sql

  for f in rrf/*.RRF; do
    echo "-> Pre-processing ${f}"
    # strip last pipe to avoid warnings, escape quotes with double quotes and also by wrapping all fields in quotes
    sed 's/|$//;s/"/""/g;s/[^|]*/"&"/g' "$f" > "$f.prep"
    table=$(basename "${f%.RRF}")
    echo "-> Importing $table"
    sqlite3 rxnorm.db ".import '$f.prep' '$table'"
  done

  echo "Creating indicies"
  sqlite3 rxnorm.db < scripts/mysql/Indexes_mysql_rxn.sql

  echo "Dropping unused tables"
  grep DROP scripts/mysql/Table_scripts_mysql_rxn.sql | grep -v 'RXNREL\|RXNSAT\|RXNCONSO' | sqlite3 rxnorm.db
fi

Creating tables
-> Pre-processing rrf/RXNCONSO.RRF
-> Importing RXNCONSO
-> Pre-processing rrf/RXNREL.RRF
-> Importing RXNREL
-> Pre-processing rrf/RXNSAT.RRF
-> Importing RXNSAT
Creating indicies
Dropping unused tables


In [0]:
def processNdcs(ndcs):
  atvs = (','.join('"{0}"'.format(ndc) for ndc in ndcs))
  with closing(sqlite3.connect('file:rxnorm.db?mode=ro', uri=True)) as conn: 
    return pd.read_sql_query("""
SELECT rel1.RXCUI2 AS RXCUI, rel2.RXCUI1 AS INGREDIENT
FROM RXNREL rel1
JOIN RXNCONSO con ON rel1.RXCUI1 = con.RXCUI AND TTY='SCDC'
JOIN RXNREL rel2 ON rel1.RXCUI1 = rel2.RXCUI2 AND rel2.RELA = 'has_ingredient'
WHERE rel1.RXCUI2 IN (SELECT RXCUI FROM RXNSAT WHERE ATN = 'NDC' AND ATV IN ({})) AND rel1.RELA = 'consists_of' 
ORDER BY RXCUI
""".format(atvs), conn)

In [15]:
ndcs = ["23490544301", "00143126730", "00456069801", "60432060504", "00247211730", "00247028100", "00093084015", "00168000215", "00173045301"]

ingredients = processNdcs(ndcs)
print(ingredients)

     RXCUI INGREDIENT
0  1085686      10759
1  1248057       8745
2  1248057       8163
3  1797933      41126
4   197604       3407
5   203088       6135
6   206805      28889
7   314076      29046
8   351906      18993
9   352272     321988


In [0]:
!rm -rf rrf scripts curl-uts-* terminology_* uts* {rxnorm_fn}
!cp rxnorm.db /content/gdrive/My\ Drive/Diabetes-Prediction/