In [None]:
import pandas as pd
import plotly.express as px
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="white")

#### Load data and visualise subgroup coverage rate

In [None]:
cc_res = pd.read_csv('process_mining/crosslog_cc_results_v2.csv')
cc_res = cc_res[cc_res['level'] == 'Provider']
cc_res['pos'] = np.where(cc_res['subset'].isin(['>75 years', 'Intensive Therapy', 'Extended Stay (>30 days)',
                                               '1-year mortality', 'Multimorbidity', 'High Out-of-hours care (top 20% overall)']), 1, 0)
cc_res = cc_res.reset_index(drop=True)
cc_res

In [None]:
cc_rvis = cc_res[cc_res['pos']==1]
cc_rvis['n_percfit'] = cc_res['m_percfit'].loc[6:].tolist()

In [None]:
cc_rvis

In [None]:
cc_rvis['subset'] = cc_rvis['subset'].replace({'Extended Stay (>30 days)':'Extended stay'})
cc_rvis.loc[5, 'subset'] = 'Out-of-hours care'
cc_rvis = cc_rvis.rename(columns={'m_percfit': 'Y', 'n_percfit': 'N'})

In [None]:
perc_values = []
for i in range(len(cc_rvis)):
    perc_values.append(cc_rvis.loc[i, 'Y'])
    perc_values.append(cc_rvis.loc[i, 'N'])
col_values = ['>75 years', '>75 years', 'Intensive Therapy', 'Intensive Therapy',
              'Extended stay', 'Extended stay', '1-year mortality', '1-year mortality',
              'Multimorbidity', 'Multimorbidity', 'Out-of-hours care', 'Out-of-hours care']

In [None]:
perc_values

In [None]:
fig = px.bar(
    data_frame = cc_rvis,
    x = "subset",
    y = ["Y","N"],
    opacity = 1,
    orientation = "v",
    barmode = "group",
    title="Trace coverage rates representing alignment between Wave 1 and 2 subgroups",
    labels={'subset': '', 'value': '% of fitting traces in group'},
    color_discrete_map={'Y': '#8c0839', 'N': 'lightblue'}
)
# Add x and y ticks
fig.update_xaxes(tickvals=cc_rvis['subset'], ticktext=cc_rvis['subset'], layer='below traces')
#fig.update_yaxes( layer='above traces')

# Rotate x-axis labels
#fig.update_layout(xaxis=dict(tickangle=65))
# Adjust font size
fig.update_layout(font=dict(size=10))

# Adjust layout for a more condensed version
fig.update_layout(
    margin=dict(l=10, r=10, b=10, t=40),
    width=700,
    height=500,
    legend=dict(title='', orientation='v', y=0.99, x=0.09, xanchor='right'),
    bargap=0.5,
    font = dict(size=13, family='Times New Roman Black'),
    #xaxis_font=dict(),
    xaxis_title_font=dict(size=15, family='Times New Roman Black'),  # Customize x-axis label
    yaxis_title_font=dict(size=15, family='Times New Roman Black')
)

fig.update_layout(yaxis_ticksuffix = '%', yaxis_tickformat = ',.0f')

# Display numbers on top of each bar
for i in range(len(perc_values)):
    if i in [0, 2, 4, 6, 8, 10]:
        fig.add_annotation(
            x=col_values[i],
            y=perc_values[i],
            text=str(round(perc_values[i], 1)) + '%',
            font=dict(size=11, color='#2a2d30', family='Times New Roman Black'),
            showarrow=False,
            arrowhead=3,
            ax=-20,
            ay=-20,
            xshift=-12,
            yshift=6
        )
    else:
        fig.add_annotation(
            x=col_values[i],
            y=perc_values[i],
            text=str(round(perc_values[i], 1)) + '%',
            showarrow=False,
            arrowhead=3,
            font=dict(size=11, color='#2a2d30', family='Times New Roman Black'),
            #ax=20,
            #ay=20,
            xshift=12,
            yshift=6
        )

fig.update_layout(
    #xaxis=dict(showgrid=False,zeroline=False, showline=True, ticks='inside', ticklen=5, tickwidth=2,
              #tickfont=dict(size=13)),  # Customize x-axis
    #yaxis=dict(showgrid=True, zeroline=False, showline=True, ticks='inside', ticklen=5, tickwidth=2,
              #line=dict(color='black', width=1)),
    #legend=dict(showline=True, linewidth=1.5, linecolor='black'),
    bargroupgap=0.15,
plot_bgcolor='white')
fig.update_xaxes(showgrid=False, linewidth=2, linecolor='black',
                 ticks='inside', ticklen=5, tickwidth=2)
fig.update_yaxes(showgrid=True, linewidth=2, linecolor='black', gridcolor='#b8abb0', gridwidth=1,
                 ticks='inside', ticklen=5, tickwidth=2, layer='below traces')
fig.update_layout(legend=dict(borderwidth=2, bordercolor='black', x=0.11, y=1.01))


fig.write_image("process_mining/fig_cov_rate.pdf")
fig.show()

In [None]:
cc_res.columns

In [None]:
cc_res = pd.read_csv('process_mining/crosslog_cc_results_v2.csv')

In [None]:
cc_res_c = cc_res[['L1W2_lf', 'L1W2_prec', 'L1W2_gen', 'L2W1_lf', 'L2W1_prec',
                  'L2W1_gen', 'GED']]
cc_res_c.columns = ['$LF{(L_1, W_2)}$', '$P{(L_1, W_2)}$', '$G{(L_1, W_2)}$', '$LF{(L_2, W_1)}$', '$P{(L_2, W_1)}$',
                  '$G{(L_2, W_1)}$', '$GED$']

df_corr = cc_res_c.corr()

# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(df_corr, dtype=bool))

# Set up the matplotlib figure
fig, ax = plt.subplots(figsize=(9, 6))

# Set font family for x and y labels
plt.title('Correlation heatmap of the conformance checking measures.', fontname="Times New Roman Black",
         size=15)
plt.xticks(fontname="Times New Roman Black")
plt.yticks(fontname="Times New Roman Black")

# Create a custom diverging colormap
cmap = sns.diverging_palette(220, 20, as_cmap=True)


# Draw the heatmap with the mask and correct aspect ratio
heatmap = sns.heatmap(df_corr, mask=mask, cmap=cmap, vmax=1, vmin=-1, center=0,
            square=True, linewidths=.7, annot=True, fmt=".2f", annot_kws={"size": 14,
                                                                         "fontfamily": "Times New Roman Black"}, cbar=False)

# Create a custom colorbar
cbar = fig.colorbar(heatmap.get_children()[0], ax=ax, orientation='vertical', pad=0.01, aspect=20)

# Set the colorbar tick labels
cbar.set_ticks(np.arange(-1, 1.1, 0.5))
cbar.set_ticklabels(['{:,.1f}'.format(x) if x % 1 != 0 else '{:,.0f}'.format(x) for x in np.arange(-1, 1.1, 0.5)])

# Increase font size for x and y labels
heatmap.set_xticklabels(heatmap.get_xticklabels(), fontsize=13)
heatmap.set_yticklabels(heatmap.get_yticklabels(), fontsize=13)

# Increase the margin between the plot and the title
plt.subplots_adjust(top=0.85)

# Save the plot as a PDF file
plt.savefig("process_mining/correlation_heatmap.svg", bbox_inches="tight")