In [None]:
# Start with loading all necessary libraries
import pandas as pd
import pyodbc
import numpy as np
import matplotlib.pyplot as plt
import re

In [None]:
sql_conn = pyodbc.connect('DRIVER={SQL Server};'
                            'SERVER=L_AAGname;'
                            'DATABASE=BedView;'
                            'Trusted_Connection=yes') 
query = "set transaction isolation level read uncommitted select cn.Diagnosis,ps.SpecialtyDesc,ps.AdmissionDate,ps.DischargeDate,pn.AmuTriage from tblClinicalNote cn inner join tblPatientSpell ps on ps.pkPatientSpellID=cn.fkPatientSpellID inner join tblPatientNote pn on pn.fkPatientSpellID=ps.pkPatientSpellID"
df = pd.read_sql(query, sql_conn)
df = df.dropna()
df

In [None]:
df_AMU = df[df['AmuTriage']!='-1'].copy()
df_AMU

In [None]:
df_AMU = df_AMU[df_AMU['AdmissionDate']>'2018-11-07 09:13:59']
df_AMU

In [None]:
def triage_plot(dataframe):
    triage_counts = dataframe['AmuTriage'].value_counts()
    triage_counts = triage_counts.rename({'-1':'not via AMU'})
    
    sizes = triage_counts*2
    y = []
    cumulative_y = 0
    for size in sizes:
        cumulative_y+=np.sqrt(size)/2
        y.append(cumulative_y)
        cumulative_y+=np.sqrt(size)/2
        
    cm = plt.get_cmap('RdYlGn_r')
    colors = cm(sizes/max(sizes))
    color_dict = {}
    
    plt.figure(figsize=(17,14*y[-1]/400))
    plt.ylim(max(y),np.sqrt(min(y)))
    for i,triage in enumerate(triage_counts.index):
        color_dict[triage] = colors[i]
        plt.scatter(x=0,y=y[i],s=sizes[i],marker='s',color=color_dict[triage])
        if i==0 or y[i]-y[i-1]>4: plt.text((y[-1]/500)*np.sqrt(sizes)[i]/10000,y[i],triage,va='center')
    plt.gca().axis('off');
    plt.title('AMU Triage')
    plt.show()
triage_plot(df_AMU)

In [None]:
def specialty_plot(dataframe):
    specialty_counts = dataframe['SpecialtyDesc'].value_counts()
    
    sizes = specialty_counts*4
    y = []
    cumulative_y = 0
    for size in sizes:
        cumulative_y+=np.sqrt(size/np.pi)
        y.append(cumulative_y)
        cumulative_y+=np.sqrt(size/np.pi)
        
    cm = plt.get_cmap('RdYlGn_r')
    colors = cm(sizes/max(sizes))
    specialty_color_dict = {}
    
    plt.figure(figsize=(17,14*y[-1]/600))
    plt.ylim(max(y),np.sqrt(min(y)))
    for i,specialty in enumerate(specialty_counts.index):
        specialty_color_dict[specialty] = colors[i]
        plt.scatter(x=0,y=y[i],s=sizes[i],marker='o',color=colors[i])
        if i==0 or y[i]-y[i-1]>4: plt.text((y[-1]/500)*np.sqrt(sizes)[i]/10000,y[i],specialty,va='center')
    plt.gca().axis('off');
    plt.title('Specialty at discharge')
    plt.show()
specialty_plot(df_AMU)

In [None]:
df_AMU['LengthOfStay'] = df_AMU['DischargeDate'] - df_AMU['AdmissionDate']
df_AMU['LengthOfStay'] = df_AMU['LengthOfStay'].astype('timedelta64[D]')
#df_AMU['LengthOfStay'] = df_AMU['LengthOfStay']/np.timedelta64(1,'D')
lenStay = df_AMU['LengthOfStay'].value_counts().sort_index()

In [None]:
# Compute pie slices
N = 7
width = 2 * np.pi/N
theta = np.linspace(0.0 + width, 2 * np.pi + width, N, endpoint=False)
theta_deg = np.linspace(0.0 + 7.5, 360 + 7.5, N, endpoint=False)
radii = lenStay.values

ax = plt.subplot(111, projection='polar')
ax.bar(theta, radii[0:7], width=width,label='Data');
ax.bar(theta, radii[7:14], bottom=2000)

In [None]:
import mpl_toolkits.axisartist.floating_axes as floating_axes
from matplotlib.projections import PolarAxes
from mpl_toolkits.axisartist.grid_finder import FixedLocator, \
     MaxNLocator, DictFormatter
import numpy as np
import matplotlib.pyplot as plt

# "radius" between 0 and a max value of 40,000
# as roughly in your example
# normalize the r coordinates and offset by 1 (will be clear later)
MAX_R = max(lenStay.values[7:14])
radii = lenStay.values[7:14]/MAX_R + 1

# initialize figure:
fig = plt.figure()

# set up polar axis
tr = PolarAxes.PolarTransform()

# define angle ticks around the circumference:
angle_ticks = [(0, r"$0$"),
               (.25*np.pi, r"$\frac{1}{4}\pi$"),
               (.5*np.pi, r"$\frac{1}{2}\pi$"), 
               (.75*np.pi, r"$\frac{3}{4}\pi$"),
               (1.*np.pi, r"$\pi$"),
               (1.25*np.pi, r"$\frac{5}{4}\pi$"),
               (1.5*np.pi, r"$\frac{3}{2}\pi$"),
               (1.75*np.pi, r"$\frac{7}{4}\pi$")]

# set up ticks and spacing around the circle
grid_locator1 = FixedLocator([v for v, s in angle_ticks])
tick_formatter1 = DictFormatter(dict(angle_ticks))

# set up grid spacing along the 'radius'
radius_ticks = [(1., '0.0'),
                (1.5, '%i' % (MAX_R/2.)),
                (2.0, '%i' % (MAX_R))]

grid_locator2 = FixedLocator([v for v, s in radius_ticks])
tick_formatter2 = DictFormatter(dict(radius_ticks))

# set up axis:
# tr: the polar axis setup
# extremes: theta max, theta min, r max, r min
# the grid for the theta axis
# the grid for the r axis
# the tick formatting for the theta axis
# the tick formatting for the r axis
grid_helper = floating_axes.GridHelperCurveLinear(tr,
                                                  extremes=(2.*np.pi, 0, 2, 1),
                                                  grid_locator1=grid_locator1,
                                                  grid_locator2=grid_locator2,
                                                  tick_formatter1=tick_formatter1,
                                                  tick_formatter2=tick_formatter2)

ax1 = floating_axes.FloatingSubplot(fig, 111, grid_helper=grid_helper)
fig.add_subplot(ax1)

# create a parasite axes whose transData in RA, cz
aux_ax = ax1.get_aux_axes(tr)

aux_ax.patch = ax1.patch # for aux_ax to have a clip path as in ax
ax1.patch.zorder=0.9 # but this has a side effect that the patch is
                     # drawn twice, and possibly over some other
                     # artists. So, we decrease the zorder a bit to
                     # prevent this.

# plot your data:
aux_ax.bar(theta, radii)


grid_helper = floating_axes.GridHelperCurveLinear(tr,
                                                  extremes=(2.*np.pi, 3, 5, 4),
                                                  grid_locator1=grid_locator1,
                                                  grid_locator2=grid_locator2,
                                                  tick_formatter1=tick_formatter1,
                                                  tick_formatter2=tick_formatter2)

ax1 = floating_axes.FloatingSubplot(fig, 111, grid_helper=grid_helper)
fig.add_subplot(ax1)

# create a parasite axes whose transData in RA, cz
aux_ax = ax1.get_aux_axes(tr)

aux_ax.patch = ax1.patch # for aux_ax to have a clip path as in ax
ax1.patch.zorder=0.9 # but this has a side effect that the patch is
                     # drawn twice, and possibly over some other
                     # artists. So, we decrease the zorder a bit to
                     # prevent this.

# plot your data:
aux_ax.bar(theta, radii)


plt.show()  

In [None]:
triage_counts = df_AMU['AmuTriage'].value_counts()

triage_sizes = triage_counts/4
triage_y = []
cumulative_triage_y = 0
for size in triage_sizes:
    cumulative_triage_y+=np.sqrt(size)/2
    triage_y.append(cumulative_triage_y)
    cumulative_triage_y+=np.sqrt(size)/2

cm = plt.get_cmap('RdYlGn_r')
triage_colors = cm(triage_sizes/max(triage_sizes))
triage_color_dict = {}


specialty_counts = df_AMU['SpecialtyDesc'].value_counts()

specialty_sizes = specialty_counts/2
specialty_y = []
cumulative_specialty_y = 0
for size in specialty_sizes:
    cumulative_specialty_y+=np.sqrt(size/np.pi)
    specialty_y.append(cumulative_specialty_y)
    cumulative_specialty_y+=np.sqrt(size/np.pi)

specialty_cm_subsection = np.linspace(0, 1, len(specialty_counts))
specialty_colors = cm(specialty_sizes/max(specialty_sizes))
specialty_color_dict = {}

# Display the generated image:
figure, (triage_fig, specialty_fig) = plt.subplots(nrows=1,ncols=2, figsize=(8,14*triage_y[-1]/500))

triage_fig.set_ylim(max(triage_y),np.sqrt(min(triage_y)))
for i,triage in enumerate(triage_counts.index):
    triage_color_dict[triage] = triage_colors[i]
    triage_fig.scatter(x=0,y=triage_y[i],s=triage_sizes[i],marker='s',color=triage_color_dict[triage])
    if i==0 or triage_y[i]-triage_y[i-1]>4: triage_fig.text((specialty_y[-1]/600)*np.sqrt(triage_sizes)[i]/1000,triage_y[i],triage,va='center')
triage_fig.axis('off');
triage_fig.set_title('AMU Triage')

specialty_fig.set_ylim(max(specialty_y),np.sqrt(min(specialty_y)))
for i,specialty in enumerate(specialty_counts.index):
    specialty_color_dict[specialty] = specialty_colors[i]
    specialty_fig.scatter(x=0,y=specialty_y[i],s=specialty_sizes[i],marker='o',color=specialty_color_dict[specialty])
    if i==0 or specialty_y[i]-specialty_y[i-1]>5: specialty_fig.text((specialty_y[-1]/600)*np.sqrt(specialty_sizes)[i]/500,specialty_y[i],specialty,va='center')
specialty_fig.axis('off');
specialty_fig.set_title('Specialty at discharge')
    
plt.tight_layout()
plt.show()

In [None]:
df['Diagnosis'] = df['Diagnosis'].str.lower()

In [None]:
term_to_abbreviation_dict = {'[^a-z]od[^a-z]':' overdose '}

In [None]:
df_19 = df[df['Diagnosis'].str.lower().str.contains(" acute c",case=False)]
print(df_19['Diagnosis'].values)

In [None]:
df_AMU['Diagnosis'] = df['Diagnosis'].replace(term_to_abbreviation_dict, regex=True)
df_19 = df[df['Diagnosis'].str.lower().str.contains("[^a-z]od[^a-z]",case=False)]
print(len(df_19['Diagnosis'].values))

In [None]:
re.sub(r"(\s)sat(\s[0-9][0-9]%)", r"\1saturated\2", 'Currently sat 94%')

In [None]:
snomedct = pd.read_csv('sct2_Description_Snapshot-en_INT_20190731.txt',sep="\t",usecols=['term'])
consecutive_caps_after = snomedct['term'].str.findall(r"((?:\b[A-Za-z]+\b\s)+\([A-Z][A-Z]+\))")
consecutive_caps_after = consecutive_caps_after[consecutive_caps_after.astype(str)!='[]']
consecutive_caps_after = consecutive_caps_after.dropna()
slist = []
for x in consecutive_caps_after:
    slist.extend(x)

term_to_abbreviation_dict = {}
for l in slist:
    inside_brackets = re.findall(r"\(([A-Za-z]+)\)", l)[0]
    len_inside_brackets = len(inside_brackets)
    num_words = len(l.split()) - 1
    if len_inside_brackets == num_words:
        words_before_brackets = []
        for i in range(len_inside_brackets):
            if i<num_words: words_before_brackets.insert(0,l.split(" ")[-i-2])
        string_before_brackets = " ".join(words_before_brackets)
        if all(words_before_brackets[i].lower()[0]==inside_brackets[i].lower() for i in range(len_inside_brackets)):
            if string_before_brackets not in term_to_abbreviation_dict.keys() and inside_brackets not in term_to_abbreviation_dict.values():
                term_to_abbreviation_dict[string_before_brackets] = inside_brackets
        

consecutive_caps_before = snomedct['term'].str.findall(r"[A-Za-z][A-Za-z]+\s\((?:\b[A-Za-z]+\b\s)(?:\b[A-Za-z]+\b\s?)+\)")
consecutive_caps_before = consecutive_caps_before[consecutive_caps_before.astype(str)!='[]']
consecutive_caps_before = consecutive_caps_before.dropna()
slist = []
for x in consecutive_caps_before:
    slist.extend(x)
for l in slist:
    inside_brackets = re.findall(r"\((?:\b[A-Za-z]+\b\s)(?:\b[A-Za-z]+\b\s?)+\)", l)[0]
    inside_brackets = inside_brackets.strip('()')
    words_inside_brackets = inside_brackets.split()
    num_words_inside_brackets = len(words_inside_brackets)
    word_before_brackets = l.split()[0]
    if num_words_inside_brackets==len(word_before_brackets) and all(words_inside_brackets[i].lower()[0]==word_before_brackets[i].lower() for i in range(num_words_inside_brackets)):
        if inside_brackets not in term_to_abbreviation_dict.keys() and word_before_brackets not in term_to_abbreviation_dict.values():
            term_to_abbreviation_dict[inside_brackets] = word_before_brackets
            

consecutive_caps_dash = snomedct['term'].str.findall(r"[A-Za-z][A-Za-z]+\s\-\s(?:\b[A-Za-z]+\b\s)(?:\b[A-Za-z]+\b\s?)+")
consecutive_caps_dash = consecutive_caps_dash[consecutive_caps_dash.astype(str)!='[]']
consecutive_caps_dash = consecutive_caps_dash.dropna()
slist = []
for x in consecutive_caps_dash:
    slist.extend(x)
for l in slist:
    after_dash = l.split(' - ')[1]
    words_after_dash = after_dash.split()
    num_words_after_dash = len(words_after_dash)
    word_before_dash = l.split(' - ')[0]
    if num_words_after_dash==len(word_before_dash) and all(words_after_dash[i].lower()[0]==word_before_dash[i].lower() for i in range(num_words_after_dash)):
        if after_dash not in term_to_abbreviation_dict.keys():
            term_to_abbreviation_dict[after_dash] = word_before_dash



consecutive_caps_after = df['Diagnosis'].str.findall(r"((?:\b[A-Za-z]+\b\s)+\([A-Z][A-Z]+\))")
consecutive_caps_after = consecutive_caps_after[consecutive_caps_after.astype(str)!='[]']
consecutive_caps_after = consecutive_caps_after.dropna()
slist = []
for x in consecutive_caps_after:
    slist.extend(x)

for l in slist:
    inside_brackets = re.findall(r"\(([A-Za-z]+)\)", l)[0]
    len_inside_brackets = len(inside_brackets)
    num_words = len(l.split()) - 1
    if len_inside_brackets == num_words:
        words_before_brackets = []
        for i in range(len_inside_brackets):
            if i<num_words: words_before_brackets.insert(0,l.split()[-i-2])
        string_before_brackets = " ".join(words_before_brackets)
        if all(words_before_brackets[i].lower()[0]==inside_brackets[i].lower() for i in range(len_inside_brackets)):
            if string_before_brackets not in term_to_abbreviation_dict.keys() and inside_brackets not in term_to_abbreviation_dict.values():
                term_to_abbreviation_dict[string_before_brackets] = inside_brackets
        

consecutive_caps_before = df['Diagnosis'].str.findall(r"[A-Za-z][A-Za-z]+\s\((?:\b[A-Za-z]+\b\s)(?:\b[A-Za-z]+\b\s?)+\)")
consecutive_caps_before = consecutive_caps_before[consecutive_caps_before.astype(str)!='[]']
consecutive_caps_before = consecutive_caps_before.dropna()
slist = []
for x in consecutive_caps_before:
    slist.extend(x)
for l in slist:
    inside_brackets = re.findall(r"\((?:\b[A-Za-z]+\b\s)(?:\b[A-Za-z]+\b\s?)+\)", l)[0]
    inside_brackets = inside_brackets.strip('()')
    words_inside_brackets = inside_brackets.split()
    num_words_inside_brackets = len(words_inside_brackets)
    word_before_brackets = l.split()[0]
    if num_words_inside_brackets==len(word_before_brackets) and all(words_inside_brackets[i].lower()[0]==word_before_brackets[i].lower() for i in range(num_words_inside_brackets)):
        if inside_brackets not in term_to_abbreviation_dict.keys() and word_before_brackets not in term_to_abbreviation_dict.values():
            term_to_abbreviation_dict[inside_brackets] = word_before_brackets
            

consecutive_caps_dash = df['Diagnosis'].str.findall(r"[A-Za-z][A-Za-z]+\s\-\s(?:\b[A-Za-z]+\b\s)(?:\b[A-Za-z]+\b\s?)+")
consecutive_caps_dash = consecutive_caps_dash[consecutive_caps_dash.astype(str)!='[]']
consecutive_caps_dash = consecutive_caps_dash.dropna()
slist = []
for x in consecutive_caps_dash:
    slist.extend(x)
for l in slist:
    after_dash = l.split('-')[1]
    words_after_dash = after_dash.split()
    num_words_after_dash = len(words_after_dash)
    word_before_dash = l.split(' - ')[0]
    if num_words_after_dash==len(word_before_dash) and all(words_after_dash[i].lower()[0]==word_before_dash[i].lower() for i in range(num_words_after_dash)):
        after_dash = after_dash.strip()
        if after_dash not in term_to_abbreviation_dict.keys() and word_before_dash not in term_to_abbreviation_dict.values():
            term_to_abbreviation_dict[after_dash] = word_before_dash
        
term_to_abbreviation_dict['trop'] = 'troponin'
term_to_abbreviation_dict['inf ex'] = 'infective ex'
term_to_abbreviation_dict['inf asthma'] = 'infective asthma'
term_to_abbreviation_dict[' exa '] = ' exacerbation '
term_to_abbreviation_dict['exac[^a-z]'] = 'exacerbation '
term_to_abbreviation_dict['ex copd'] = 'exacerbation copd'
term_to_abbreviation_dict['copd ex '] = 'copd exacerbation '
term_to_abbreviation_dict['ex of'] = 'exacerbation of'
term_to_abbreviation_dict['ex asthma'] = 'exacerbation asthma'
term_to_abbreviation_dict['ex due'] = 'exacerbation due'
term_to_abbreviation_dict['ex chronic'] = 'exacerbation chronic'
term_to_abbreviation_dict['infective exacerbation'] = 'ie'
term_to_abbreviation_dict['infected exacerbation'] = 'ie'
term_to_abbreviation_dict['ie of copd'] = 'iecopd'
term_to_abbreviation_dict['ie copd'] = 'iecopd'
term_to_abbreviation_dict['ie- copd'] = 'iecopd'
term_to_abbreviation_dict['ie-copd'] = 'iecopd'
term_to_abbreviation_dict['pulmonary embolism'] = 'pe'
term_to_abbreviation_dict['nebuliser'] = 'neb'
term_to_abbreviation_dict['nebulizer'] = 'neb'
term_to_abbreviation_dict['nebulisers'] = 'nebs'
term_to_abbreviation_dict['nebulizers'] = 'nebs'
term_to_abbreviation_dict['influenza'] = 'flu'
term_to_abbreviation_dict['over dose'] = 'overdose'
term_to_abbreviation_dict['[^a-z]od[^a-z]'] = ' overdose '
term_to_abbreviation_dict['msk'] = 'musculoskeletal'
term_to_abbreviation_dict['o2 sat'] = 'oxygen saturation'
term_to_abbreviation_dict['o sat'] = 'oxygen saturation'
term_to_abbreviation_dict['sat of'] = 'saturation of'
term_to_abbreviation_dict['sat %'] = 'saturation %'
term_to_abbreviation_dict['sat ?'] = 'saturday %'
term_to_abbreviation_dict['sat 0'] = 'saturday 0'
term_to_abbreviation_dict['lupus ac'] = 'lupus anticoagulant'
term_to_abbreviation_dict['ac confusion'] = 'acute confusion'
term_to_abbreviation_dict[' ac joint'] = ' acromioclavicular joint'
term_to_abbreviation_dict[' ac left'] = ' acute left'
term_to_abbreviation_dict['ac ross'] = 'across'
term_to_abbreviation_dict['ac appendicitis'] = 'acute appendicitis'

#term_to_abbreviation_dict = {key.lower():val.lower() for (key,val) in term_to_abbreviation_dict.items()}
for key,val in term_to_abbreviation_dict.items():
    print(key+" & "+val+'\\\\')

In [None]:
df_AMU['SpecialtyDesc'].value_counts()

In [None]:
df_19 = df_AMU[df_AMU['Diagnosis'].str.lower().str.contains("acute coronary syndrome",case=False)]
print(df_19['Diagnosis'].values)

In [None]:
df_AMU['Diagnosis'] = df_AMU['Diagnosis'].str.lower()   

In [None]:
df_AMU['Diagnosis'] = df_AMU['Diagnosis'].replace(term_to_abbreviation_dict, regex=True)

In [None]:
df_AMU = df_AMU.replace({'Diagnosis': term_to_abbreviation_dict})

In [None]:
df_AMU['Diagnosis'].replace(term_to_abbreviation_dict, inplace=True)

In [None]:
df_AMU[df_AMU['Diagnosis'].str.contains('acute corononary syndrome')]

In [None]:
df_nonAMU = df[df['AmuTriage']=='-1'].copy()
df_nonAMU

In [None]:
def triage_plot(dataframe):
    triage_counts = dataframe['AmuTriage'].value_counts()
    triage_counts = triage_counts.rename({'-1':'not via AMU'})
    sizes = triage_counts/4
    y = []
    cumulative_y = 0
    for size in sizes:
        cumulative_y+=np.sqrt(size)/2
        y.append(cumulative_y)
        cumulative_y+=np.sqrt(size)/2
    plt.figure(figsize=(17,14*y[-1]/500))
    plt.scatter(x=np.zeros(len(triage_counts)),y=y,s=sizes,marker='s')
    plt.ylim(max(y),np.sqrt(min(y)))
    for i,triage in enumerate(triage_counts.index):
        if i==0 or y[i]-y[i-1]>4: plt.text((y[-1]/500)*np.sqrt(sizes)[i]/10000,y[i],triage,va='center')
    plt.gca().axis('off');
    plt.title('AMU Triage')
    plt.show()
triage_plot(df_AMU)