In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
dem_data = pd.read_csv('') #file containing demographic information (including panss scores)
patients = np.loadtxt('/patient_subs.txt', dtype= str) #subject ids for patient group

In [None]:
panss_cols = ['PANSS_datum','PANSS_P1','PANSS_P2','PANSS_P3','PANSS_P4',
              'PANSS_P5','PANSS_P6','PANSS_P7','PANSS_N1','PANSS_N2','PANSS_N3',
              'PANSS_N4','PANSS_N5','PANSS_N6','PANSS_N7','PANSS_G1','PANSS_G2',
              'PANSS_G3','PANSS_G4','PANSS_G5','PANSS_G6','PANSS_G7','PANSS_G8',
              'PANSS_G9','PANSS_G10','PANSS_G11','PANSS_G12','PANSS_G13','PANSS_G14',
              'PANSS_G15','PANSS_G16','PANSS_remission','PANSS_totaal','PANSS_positive',
              'PANSS_negative','PANSS_general','PANSS_Positive_factor','PANSS_negative_factor',
              'PANSS_disorganized_factor','PANSS_excited_factor','PANSS_depressed_factor']

In [None]:
panss = dem_data[panss_cols].loc[patients] #select only panss scores

In [None]:
#filter for missing scores
panss_filt = panss_correct_filt = panss.loc[(panss['PANSS_remission'] != 'geen PANSS') & (panss['PANSS_totaal'] < 1000)]

In [None]:
panss_neg = panss_filt[['PANSS_N1','PANSS_N2','PANSS_N3', 'PANSS_N4','PANSS_N5','PANSS_N6','PANSS_N7']]

In [None]:
#easier format for plotting
panss_neg_melt = panss_neg.melt(var_name='symptom', value_name='score')
panss_neg_melt['sub_id'] = np.concatenate([[panss_neg.index]] * 7).flatten()

#### Plot histogram of PANSS score distribution

In [None]:
sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
bins = np.histogram_bin_edges(panss_neg[panss_neg.columns[0]], 6, range=(1,6))

# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=0.5)
g = sns.FacetGrid(panss_neg_melt, row="symptom", hue="symptom", aspect=15, height=1, palette=pal)

# Draw the densities in a few steps
g.map(sns.histplot, "score", clip_on = False, alpha = 1, linewidth = 1.5, stat = 'count', bins = 6, binrange = (1,6))
g.map(sns.histplot, "score", clip_on = False, linewidth = 2, stat = 'count', bins= 6, binrange = (1,6), element = 'step')
g.map(plt.axhline, y=0, lw=0.7, clip_on=False, c = 'grey')


## Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
    ax = plt.gca()
    ax.text(1, .2, label, fontweight="bold", color=color,
            ha="right", va="bottom", transform=ax.transAxes)
    
def bins_labels(bins, **kwargs):
    bin_w = (max(bins) - min(bins)) / (len(bins) - 1)
    plt.xticks(np.arange(min(bins)+bin_w/2, max(bins), bin_w), bins, **kwargs)
    plt.xlim(bins[0], bins[-1])

g.map(label, "score")

# Set the subplots to overlap
g.fig.subplots_adjust(hspace= 0)

# Remove axes details that don't play well with overlap
g.set_titles("")

g.set(xticks=[])
g.despine(bottom=True, left=False)
g.set_xlabels('PANSS Score', fontsize = 15)
g.set(xticks=[1, 2, 3, 4, 5, 6], yticks=[20, 40])

for i, ax in enumerate(g.axes.flat):
    # This only works for the left ylabels
    ax.yaxis.set_label_position("left")
    ax.yaxis.set_ticks_position('left')
    ax.set_yticklabels([20, 40], fontsize = 13)
    if i == 3:
        ax.set_ylabel('Count', fontsize = 15)
    
    if i == 6:
        ax.set_xticks([1.417, 2.250, 3.083, 3.917, 4.750, 5.583])
        ax.set_xticklabels([1, 2, 3, 4, 5, 6], fontsize = 13)
    
g.fig.suptitle('Distribution of Negative Symptoms', fontsize = 20)