In [None]:
# dependencies
import pandas as pd
from fuzzywuzzy import fuzz
import numpy as np
import matplotlib.pyplot as mp

In [None]:
db_name = "database.xlsx"

In [None]:
topics = {
  "mk": "Musculoskeletal Radiology",
  "ct": "Computed Tomography",
  "er": "Emergency Radiology",
  "bq": "Biomarkers and Quantative imaging",
  "sq": "Safety and Quality",
  "pr": "Professionalism",
  "nm": "Nuclear Medicine",
  "ch": "Chest Radiology",
  "br": "Breast Imaging",
  "gu": "Geritourinary Radiology"
}

Section 2

In [None]:
def levenshtein_distance(a, b):
    '''
    Code from https://www.educative.io/answers/the-levenshtein-distance-algorithm
    Calculate the levenshtein distance between two strings
    params:
    a: string
    b: string
    returns:
    levenshtein dinstace of a and b (integer)
    '''
    # Declaring array 'D' with rows = len(a) + 1 and columns = len(b) + 1:
    D = [[0 for i in range(len(b) + 1)] for j in range(len(a) + 1)]

    # Initialising first row:
    for i in range(len(a) + 1):
        D[i][0] = i

    # Initialising first column:
    for j in range(len(b) + 1):
        D[0][j] = j

    for i in range(1, len(a) + 1):
        for j in range(1, len(b) + 1):
            if a[i - 1] == b[j - 1]:
                D[i][j] = D[i - 1][j - 1]
            else:
                # Adding 1 to account for the cost of operation
                insertion = 1 + D[i][j - 1]
                deletion = 1 + D[i - 1][j]
                replacement = 1 + D[i - 1][j - 1]

                # Choosing the best option:
                D[i][j] = min(insertion, deletion, replacement)

    LD = D[len(a)][len(b)]
    return LD

Topic: Musculoskeletal Radiology

In [None]:
topic = topics["mk"] # fetch topic

mk_sheet = pd.read_excel(db_name, sheet_name="mk")
titleLDs = []

# title levenshtein distances
for title in mk_sheet.title:
    titleLDs.append(levenshtein_distance(title.lower(), topic.lower()))

# means and SDs
print("mean: ", np.mean(titleLDs))
print("standard deviation: ", np.std(titleLDs))

# visualize in 1D graph
fig, ax = mp.subplots(ncols=1, nrows=1, figsize=(20, 1))
ax.set_title("Topic x Title Levenshtein Distances")
ax.plot(titleLDs, np.zeros_like(titleLDs) + 0, 'x')
mp.show()


Topic: Computed Tomography

In [None]:
topic = topics["ct"] # fetch topic

mk_sheet = pd.read_excel(db_name, sheet_name="ct")
titleLDs = []

# title levenshtein distances
for title in mk_sheet.title:
    titleLDs.append(levenshtein_distance(title.lower(), topic.lower()))

# means and SDs
print("mean: ", np.mean(titleLDs))
print("standard deviation: ", np.std(titleLDs))

# visualize in 1D graph
fig, ax = mp.subplots(ncols=1, nrows=1, figsize=(20, 1))
ax.set_title("Topic x Title Levenshtein Distances")
ax.plot(titleLDs, np.zeros_like(titleLDs) + 0, 'x')
mp.show()



Topic: Emergency Radiology

In [None]:
topic = topics["er"] # fetch topic

mk_sheet = pd.read_excel(db_name, sheet_name="er")
titleLDs = []

# title levenshtein distances
for title in mk_sheet.title:
    titleLDs.append(levenshtein_distance(title.lower(), topic.lower()))

# means and SDs
print("mean: ", np.mean(titleLDs))
print("standard deviation: ", np.std(titleLDs))

# visualize in 1D graph
fig, ax = mp.subplots(ncols=1, nrows=1, figsize=(20, 1))
ax.set_title("Topic x Title Levenshtein Distances")
ax.plot(titleLDs, np.zeros_like(titleLDs) + 0, 'x')
mp.show()


Topic: Biomarkers and Quantative imaging

In [None]:
topic = topics["bq"] # fetch topic

mk_sheet = pd.read_excel(db_name, sheet_name="bq")
titleLDs = []

# title levenshtein distances
for title in mk_sheet.title:
    titleLDs.append(levenshtein_distance(title.lower(), topic.lower()))

# means and SDs
print("mean: ", np.mean(titleLDs))
print("standard deviation: ", np.std(titleLDs))

# visualize in 1D graph
fig, ax = mp.subplots(ncols=1, nrows=1, figsize=(20, 1))
ax.set_title("Topic x Title Levenshtein Distances")
ax.plot(titleLDs, np.zeros_like(titleLDs) + 0, 'x')
mp.show()


Topic: Safety and Quality

In [None]:
topic = topics["sq"] # fetch topic

mk_sheet = pd.read_excel(db_name, sheet_name="sq")
titleLDs = []

# title levenshtein distances
for title in mk_sheet.title:
    titleLDs.append(levenshtein_distance(title.lower(), topic.lower()))

# means and SDs
print("mean: ", np.mean(titleLDs))
print("standard deviation: ", np.std(titleLDs))

# visualize in 1D graph
fig, ax = mp.subplots(ncols=1, nrows=1, figsize=(20, 1))
ax.set_title("Topic x Title Levenshtein Distances")
ax.plot(titleLDs, np.zeros_like(titleLDs) + 0, 'x')
mp.show()


Topic: Professionalism

In [None]:
topic = topics["pr"] # fetch topic

mk_sheet = pd.read_excel(db_name, sheet_name="pr")
titleLDs = []

# title levenshtein distances
for title in mk_sheet.title:
    titleLDs.append(levenshtein_distance(title.lower(), topic.lower()))

# means and SDs
print("mean: ", np.mean(titleLDs))
print("standard deviation: ", np.std(titleLDs))

# visualize in 1D graph
fig, ax = mp.subplots(ncols=1, nrows=1, figsize=(20, 1))
ax.set_title("Topic x Title Levenshtein Distances")
ax.plot(titleLDs, np.zeros_like(titleLDs) + 0, 'x')
mp.show()


Topic: Nuclear Medicine

In [None]:
topic = topics["nm"] # fetch topic

mk_sheet = pd.read_excel(db_name, sheet_name="nm")
titleLDs = []

# title levenshtein distances
for title in mk_sheet.title:
    titleLDs.append(levenshtein_distance(title.lower(), topic.lower()))

# means and SDs
print("mean: ", np.mean(titleLDs))
print("standard deviation: ", np.std(titleLDs))

# visualize in 1D graph
fig, ax = mp.subplots(ncols=1, nrows=1, figsize=(20, 1))
ax.set_title("Topic x Title Levenshtein Distances")
ax.plot(titleLDs, np.zeros_like(titleLDs) + 0, 'x')
mp.show()


Topic: Chest Radiology

In [None]:
topic = topics["ch"] # fetch topic

mk_sheet = pd.read_excel(db_name, sheet_name="ch")
titleLDs = []

# title levenshtein distances
for title in mk_sheet.title:
    titleLDs.append(levenshtein_distance(title.lower(), topic.lower()))

# means and SDs
print("mean: ", np.mean(titleLDs))
print("standard deviation: ", np.std(titleLDs))

# visualize in 1D graph
fig, ax = mp.subplots(ncols=1, nrows=1, figsize=(20, 1))
ax.set_title("Topic x Title Levenshtein Distances")
ax.plot(titleLDs, np.zeros_like(titleLDs) + 0, 'x')
mp.show()


Topic: Breast Imaging

In [None]:
topic = topics["br"] # fetch topic

mk_sheet = pd.read_excel(db_name, sheet_name="br")
titleLDs = []

# title levenshtein distances
for title in mk_sheet.title:
    titleLDs.append(levenshtein_distance(title.lower(), topic.lower()))

# means and SDs
print("mean: ", np.mean(titleLDs))
print("standard deviation: ", np.std(titleLDs))

# visualize in 1D graph
fig, ax = mp.subplots(ncols=1, nrows=1, figsize=(20, 1))
ax.set_title("Topic x Title Levenshtein Distances")
ax.plot(titleLDs, np.zeros_like(titleLDs) + 0, 'x')
mp.show()


Topic: Geritourinary Radiology

In [None]:
topic = topics["gu"] # fetch topic

mk_sheet = pd.read_excel(db_name, sheet_name="gu")
titleLDs = []

# title levenshtein distances
for title in mk_sheet.title:
    titleLDs.append(levenshtein_distance(title.lower(), topic.lower()))

# means and SDs
print("mean: ", np.mean(titleLDs))
print("standard deviation: ", np.std(titleLDs))

# visualize in 1D graph
fig, ax = mp.subplots(ncols=1, nrows=1, figsize=(20, 1))
ax.set_title("Topic x Title Levenshtein Distances")
ax.plot(titleLDs, np.zeros_like(titleLDs) + 0, 'x')
mp.show()
