In [None]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as stats

num_iterations = 10000
num_simulations = 50

In [None]:
file_path = 'sim_results.pkl'
with open(file_path, 'rb') as f:
    results = pickle.load(f)

In [None]:
### Define functions to parse results for analysis ###

def get_species_class_combo_counts(results):
    species_class_combo_counts = {}
    for i in range(num_simulations):
        for species in results[i][5]:
            for classe, count in results[i][5][species].items():

                if (species, classe) not in species_class_combo_counts:
                    species_class_combo_counts[(species, classe)] = []
                    species_class_combo_counts[(species, classe)].append(count)
                elif (species, classe) in species_class_combo_counts:
                    species_class_combo_counts[(species, classe)].append(count) 
                    
    return species_class_combo_counts

def get_species_background_combo_counts(results):
    species_background_combo_counts = {}
    for i in range(num_simulations):
        for species in results[i][6]:
            for background, count in results[i][6][species].items():

                if (species, background) not in species_background_combo_counts:
                    species_background_combo_counts[(species, background)] = []
                    species_background_combo_counts[(species, background)].append(count)
                elif (species, background) in species_background_combo_counts:
                    species_background_combo_counts[(species, background)].append(count) 
                    
    return species_background_combo_counts

def get_class_backgroud_combo_counts(results):
    class_background_combo_counts = {}
    for i in range(num_simulations):
        for classe in results[i][7]:
            for background, count in results[i][7][classe].items():

                if (classe, background) not in class_background_combo_counts:
                    class_background_combo_counts[(classe, background)] = []
                    class_background_combo_counts[(classe, background)].append(count)
                elif (classe, background) in species_class_combo_counts:
                    class_background_combo_counts[(classe, background)].append(count) 
                    
    return class_background_combo_counts

def get_counts(j):
    new_dict = {}
    for i in range(num_simulations):
        for item in results[i][j].items():
            if item[0] not in new_dict:
                new_dict[item[0]] = []
                new_dict[item[0]].append(item[1])
            elif item[0] in new_dict:
                new_dict[item[0]].append(item[1])
                
    return new_dict

In [None]:
species_selection_counts = get_counts(0)
class_selection_counts = get_counts(1)
background_selection_counts = get_counts(2)
alignment_selection_counts = get_counts(3)
dead_farmer_counts = get_counts(4)
species_class_combo_counts = get_species_class_combo_counts(results)
species_background_combo_counts = get_species_background_combo_counts(results)
class_background_combo_counts = get_class_backgroud_combo_counts(results)

In [None]:
def calculate_parameters(dictionary):
    means = {}
    variances = {}
    for key, value in dictionary.items():

        means[key] = np.mean(value)
        variances[key] = np.var(value)
        
    return means, variances



species_means, species_variances = calculate_parameters(species_selection_counts)
class_means, class_variances = calculate_parameters(class_selection_counts)
background_means, background_variances = calculate_parameters(background_selection_counts)
alignment_means, alignment_variances = calculate_parameters(alignment_selection_counts)
dead_farmer_means, dead_farmer_variances = calculate_parameters(dead_farmer_counts)
species_class_combo_means, species_class_combo_variances = calculate_parameters(species_class_combo_counts)
species_background_combo_means, species_background_combo_variances = calculate_parameters(species_background_combo_counts)
class_background_combo_means, class_background_combo_variances = calculate_parameters(class_background_combo_counts)

In [None]:
def calculate_CI(means, variances):
    CI_lower = {}
    CI_upper = {}
    CI = {}
    for key in means:
        CI[key] = (
            np.round(means[key] - (1.96 * (np.sqrt(variances[key]) / np.sqrt(num_simulations))), 2), 
            np.round(means[key] + (1.96 * (np.sqrt(variances[key]) / np.sqrt(num_simulations))), 2)
        )
    return CI

In [None]:
species_CI = calculate_CI(species_means, species_variances)
class_CI = calculate_CI(class_means, class_variances)
background_CI = calculate_CI(background_means, background_variances)
alignment_CI = calculate_CI(alignment_means, alignment_variances)
dead_farmer_CI = calculate_CI(dead_farmer_means, dead_farmer_variances)
species_class_CI = calculate_CI(species_class_combo_means, species_class_combo_variances)
species_background_CI = calculate_CI(species_background_combo_means, species_background_combo_variances)
class_background_CI = calculate_CI(class_background_combo_means, class_background_combo_variances)

In [None]:
def plot_results(
    CI_dictionary, 
    x,
    y,
    filename
):
    sorted_data = sorted(CI_dictionary.items(), key=lambda x: (x[1][0] + x[1][1]) / 2, reverse=True)
    species = [item[0] for item in sorted_data]
    means = [(lower + upper) / 2 for lower, upper in [item[1] for item in sorted_data]]
    ci_widths = [(upper - lower) / 2 for lower, upper in [item[1] for item in sorted_data]]

    plt.figure(figsize=(10, 6))
    bars = plt.bar(species, means, yerr = ci_widths, capsize = 10)

    plt.xticks(rotation=45, ha='right', fontsize=12)
    plt.yticks(fontsize =12)
    plt.xlabel(x, fontsize=14)
    plt.ylabel(y, fontsize=14)
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.savefig(filename)
    plt.show()
    
def print_latex_table(

    CI_dictionary,
    x,
    pop = 10000,
):

    data = []

    for key, (lower, upper) in CI_dictionary.items():

        mean_value = np.round((lower + upper) / 2, 0)
        ci_width = np.round((upper - lower) / 2, 0)
        mean_percentage = np.round((mean_value / pop) * 100, 2)
        ci_percentage = np.round((ci_width / pop) * 100, 2)

        formatted_value = f'{mean_value:.0f} ± {ci_width:.0f}'
        formatted_percentage = f'{mean_percentage:.2f} ± {ci_percentage:.2f}%'

        data.append((key, formatted_value, formatted_percentage, mean_value))

    df = pd.DataFrame(data, columns=[x, 'Mean Count', 'Percentage of Population', 'Mean Value'])
    df = df.sort_values(by='Mean Value', ascending=False)

    df = df.drop(columns=['Mean Value'])

    latex_table = df.to_latex(index=False, column_format='l r r', caption='Mean Counts and Population Percentages of Adventurers')
    latex_table = latex_table.replace(r'{r}', '{l@{\hspace{0.3em}}r}')  

    print(latex_table)

In [None]:
plot_results(species_CI, x = 'Species', y = 'Mean Frequency', filename = 'figs/results_species.png')
plot_results(class_CI, x = 'Class', y = 'Mean Frequency', filename = 'figs/results_class.png')
plot_results(background_CI, x = 'Background', y = 'Mean Frequency', filename = 'figs/results_background.png')
plot_results(alignment_CI, x = 'Alignment', y = 'Mean Frequency', filename = 'figs/results_alignment.png')




print_latex_table(species_CI, 'Species')
print_latex_table(class_CI, 'Class')
print_latex_table(background_CI, 'Background')
print_latex_table(alignment_CI, 'Alignment')

In [None]:
def build_combination_df(
    CI_dictionary, x, y
):
    
    id1 = []    # x
    id2 = []    # y
    id3 = []    # mean of CI
    id4 = []    # CI width
    
    for (i, j), (k, l) in CI_dictionary.items():
        id1.append(i)
        id2.append(j)
        id3.append((k + l) / 2)
        id4.append(l - k)
        
    df = pd.DataFrame({
        x: id1,
        y: id2,
        'Mean': id3,
        'CI': id4
    })
    return df

In [None]:
species_class_df = build_combination_df(species_class_CI, x = 'Species', y = 'Class')

species_background_df = build_combination_df(species_background_CI, x = 'Species', y = 'Background')

class_background_df = build_combination_df(class_background_CI, x = 'Class', y = 'Background')

In [None]:
species_class_df

In [None]:
def run_chi_sq(df, x, y):
    contingency_table = df.pivot(index=y, columns=x, values='Mean').fillna(0)
    X_order = contingency_table.sum(axis=1).sort_values(ascending=False).index
    Y_order = contingency_table.sum(axis=0).sort_values(ascending=False).index
    contingency_table_sorted = contingency_table.loc[X_order, Y_order]
    chi2, p, dof, expected = stats.chi2_contingency(contingency_table_sorted)
    print(f"Chi-squared: {chi2}")
    print(f"P-value: {p}")
    print(f"Degrees of freedom: {dof}")
    print(f"Expected frequencies:\n{expected}")
    observed = contingency_table_sorted.values
    residuals = (observed - expected) / np.sqrt(expected)
    residuals_df = pd.DataFrame(residuals, index=contingency_table_sorted.index, columns=contingency_table_sorted.columns)
    significant_residuals = residuals_df[(residuals_df > 2) | (residuals_df < -2)]
    significant_residuals = np.round(significant_residuals, 0)
    
    return contingency_table_sorted, significant_residuals

In [None]:
def plot_contingency_table(df, x, y, filename):
    fig, ax = plt.subplots(figsize=(12, 10))

    cax = ax.imshow(contingency_table_sorted, cmap='coolwarm', vmin=0, vmax=np.max(contingency_table_sorted.values))

    cbar = fig.colorbar(cax, shrink=0.75)

    for label in cbar.ax.get_yticklabels():
        label.set_fontsize(14)

    plt.xticks(np.arange(len(contingency_table_sorted.columns)), contingency_table_sorted.columns, rotation=45, ha='right', fontsize=14)
    plt.yticks(np.arange(len(contingency_table_sorted.index)), contingency_table_sorted.index, fontsize=14)

    plt.title('Adventurer Demographics', fontsize=20)
    plt.ylabel(y, fontsize=17)
    plt.xlabel(x, fontsize=17)

    for i in range(len(contingency_table_sorted.index)):
        for j in range(len(contingency_table_sorted.columns)):
            value = contingency_table_sorted.iloc[i, j]
            ax.text(j, i, f'{value:.0f}', ha='center', va='center', color='black')

    plt.tight_layout()
    plt.savefig(filename)
    plt.show()
    
    
def plot_significant_residuals(df, x, y, filename):

    fig, ax = plt.subplots(figsize=(12, 10))
    cax = ax.imshow(significant_residuals, cmap='coolwarm', vmin=-5, vmax=5)
    cbar = fig.colorbar(cax, ax=ax, orientation='vertical', shrink = 0.75)
    cbar.set_ticks([-5,0, 5])  
    cbar.set_ticklabels(['Very Few', 'Common', 'A Lot'])  # Set custom labels
    for label in cbar.ax.get_yticklabels():
        label.set_fontsize(14)  
    plt.xticks(np.arange(len(significant_residuals.columns)), labels = significant_residuals.columns, fontsize = 14)
    plt.yticks(np.arange(len(significant_residuals.index)), labels = significant_residuals.index, fontsize = 14)
    plt.ylabel(y, fontsize = 17)
    plt.xlabel(x, fontsize = 17)
    ax.set_title('Adventurer Demographics Significance', fontsize = 20)

    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.savefig(filename)
    plt.show()



In [None]:
contingency_table_sorted, significant_residuals = run_chi_sq(species_class_df, x = 'Species', y = 'Class')
plot_contingency_table(contingency_table_sorted, x = 'Species', y = 'Class', filename = 'combo_species_class.png')
plot_significant_residuals(significant_residuals, x = 'Species', y = 'Class', filename = 'chisq_resid_species_class.png')


contingency_table_sorted, significant_residuals = run_chi_sq(species_background_df, x = 'Species', y = 'Background')
plot_contingency_table(contingency_table_sorted, x = 'Species', y = 'Background', filename = 'combo_species_background.png')
plot_significant_residuals(significant_residuals, x = 'Species', y = 'Background', filename = 'chisq_resid_species_background.png')


contingency_table_sorted, significant_residuals = run_chi_sq(class_background_df, x = 'Class', y = 'Background')
plot_contingency_table(contingency_table_sorted, x = 'Class', y = 'Background', filename = 'combo_class_background.png')
plot_significant_residuals(significant_residuals, x = 'Class', y = 'Background', filename = 'chisq_resid_class_background.png')