In [1]:
import pandas as pds
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from tabulate import tabulate
from pandasql import sqldf
import functools

## define function for printing pandas dataframes as markdown

In [2]:
def print_pandas_md(df):
    print(tabulate(df, tablefmt="pipe", headers="keys"))

## define function to round values in an interval object

In [3]:
def round_interval(interval):
    left_val = round(interval.left)
    right_val = round(interval.right)
    return pds.Interval(left=left_val, right=right_val, closed='left')

## load local copies of data

In [4]:
tooth_proc_df = pds.read_csv('triplestore-tooth-procedure-info.tsv', sep='\t')

In [5]:
tooth_type_df = pds.read_csv('triplestore-tooth-type-info.tsv', sep='\t')

In [6]:
proc_surface_df = pds.read_csv('triplestore-procedure-surface-info.tsv', sep='\t')

In [7]:
proc_material_df = pds.read_csv('triplestore-proc-material-info.tsv', sep='\t')

# Plot data

## plot total number of distinct teeth by tooth type that underwent a procedure

In [None]:
tooth_count_df = tooth_proc_df[['tooth_num']].copy()
tooth_count_df.tooth_num = toot_count_df.tooth_num.astype(int)
tooth_count_query = """
select 
    tooth_num as [Tooth Number], count(*) as [Number of Teeth]
from
    tooth_count_df
group by
    tooth_num
order by
    tooth_num
"""
temp_df = sqldf(tooth_count_query)

In [None]:
temp_df.set_index('Tooth Number', inplace=True) # use tooth numbers for the index

In [None]:
%matplotlib inline
colors = ['#4c72b0']
sns.set_palette(colors)

# draw bar chart showing the number of procedures performed on each tooth (number)
ax = temp_df.plot.bar(figsize=(12,7)) 

# rotate the ticks on the x axis
plt.xticks(rotation=0)

# configure labels
# note the use of '\n' to create space between ticks and labels
ax.set_xlabel("\nTooth Number", fontsize=15)
ax.set_ylabel("Number of Teeth that had a Procedure\n", fontsize=15)
ax.tick_params(labelsize='large')


ax.set_yticklabels(['{:,}'.format(int(x)) for x in ax.get_yticks().tolist()]) # put commas in y-ticks
ax.get_legend().remove() # remove legend
plt.savefig("figures/tooth-count-per-tooth.png", dpi=600, bbox_inches="tight")
plt.show()

In [None]:
tooth_proc_df.head()