# Code for calculating correlations between fanfiction-features from MythFic Metadata
Associated with the paper *Rewriting the Mythological Canon: Dynamics of Popularity and Correlation in Fanfiction about Greek Myth* for the *When Readers Become Writers* conference, 


## Import some requirements

In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import spearmanr
import IPython.display as display
import openpyxl

## Open the csv of data from Github

In [3]:
df = pd.read_csv('popularity+binary_characters_genres_categories_ratings.csv')

## Little bit of pre-processing

In [5]:
# take out the chapters column because it is formatted differently from the others
# doing something with number of chapters may be nice for future work
df = df.drop(columns = ["chapters"])

## Calculate Spearman's Rho

Also calculate p-values and set a significance threshold of <0.05
Then add the heatmap layout and print the results

NB: Chat-GPT assisted me in creating this code.

In [14]:
# Fill NaN values with 0 (if any)
df = df.fillna(0)

# Function to compute Spearman correlation matrix and p-value matrix
def spearman_correlation_with_pvalues(df):
    # Select only numeric columns to avoid non-numeric data
    df_numeric = df.select_dtypes(include=[np.number])
    
    # Initialize empty matrices
    corr = df_numeric.corr(method='spearman')  # Spearman correlation matrix
    pvals = pd.DataFrame(np.ones_like(corr, dtype=float), columns=corr.columns, index=corr.index)  # p-value matrix
    
    # Compute Spearman's rank correlation and p-values using scipy.stats.spearmanr
    for i in range(len(df_numeric.columns)):
        for j in range(i + 1, len(df_numeric.columns)):
            corr_val, p_val = spearmanr(df_numeric.iloc[:, i], df_numeric.iloc[:, j])
            pvals.iloc[i, j] = p_val
            pvals.iloc[j, i] = p_val  # Symmetric matrix
    
    return corr, pvals

# Compute the Spearman correlation matrix and p-value matrix
corr_matrix, pval_matrix = spearman_correlation_with_pvalues(df)

# Set a significance threshold (e.g., p-value < 0.05)
threshold = 0.05

# Mask non-significant correlations (p-value > threshold)
significant_corr_matrix = corr_matrix.copy()
significant_corr_matrix[pval_matrix > threshold] = np.nan  # Set non-significant correlations to NaN

# Display the correlation matrix with color-coding
styled_matrix = significant_corr_matrix.style.background_gradient(cmap="coolwarm", axis=None).format(
    "{:.2f}"
)

# Display the styled matrix
display.display(styled_matrix)


  corr_val, p_val = spearmanr(df_numeric.iloc[:, i], df_numeric.iloc[:, j])


Unnamed: 0,words,comments,kudos,bookmarks,hits,Apollo,Persephone,Hades,Achilles,Patroclus,Zeus,Hera,Ares,Demeter,Aphrodite,Artemis,Athena,angst,hurt/comfort,fluff,AU,Teen,General,Mature,Explicit,Not Rated,F/M,M/M,F/F,Gen,Multi,None,Other
words,,0.32,0.29,0.36,0.35,0.07,,0.09,,,0.17,0.11,0.1,0.08,0.1,0.08,0.06,0.12,0.11,0.14,,0.06,-0.29,0.14,0.25,-0.06,0.1,0.18,0.04,-0.12,0.14,,
comments,0.32,,0.65,0.66,0.55,,0.15,0.1,0.09,0.1,,,,0.05,,,,0.08,0.07,0.09,0.07,0.08,-0.15,0.05,0.12,-0.08,0.1,0.12,,-0.09,,,-0.05
kudos,0.29,0.65,,0.86,0.87,0.07,0.22,0.2,0.16,0.19,0.06,0.04,0.05,0.07,0.04,0.05,,0.07,0.07,0.15,0.09,0.03,-0.17,0.08,0.2,-0.09,0.16,0.24,,-0.2,,,-0.1
bookmarks,0.36,0.66,0.86,,0.78,0.04,0.18,0.16,0.11,0.14,0.04,0.03,0.05,0.06,0.06,,,0.05,0.07,0.12,0.07,0.04,-0.19,0.09,0.21,-0.09,0.14,0.18,,-0.16,,,-0.09
hits,0.35,0.55,0.87,0.78,,0.07,0.21,0.19,0.15,0.18,0.11,0.07,0.08,0.08,0.09,0.07,0.03,0.03,0.04,0.12,0.07,,-0.25,0.13,0.34,-0.11,0.24,0.24,,-0.27,0.04,,-0.07
Apollo,0.07,,0.07,0.04,0.07,,-0.04,,-0.05,-0.06,0.15,0.11,0.17,0.05,0.13,0.38,0.12,0.04,,0.04,,,,,,0.06,-0.08,0.19,-0.05,,,,
Persephone,,0.15,0.22,0.18,0.21,-0.04,,0.71,-0.14,-0.13,0.08,0.04,,0.38,0.03,,,,,0.06,,,,,,,0.34,-0.2,,-0.09,,,
Hades,0.09,0.1,0.2,0.16,0.19,,0.71,,-0.1,-0.08,0.17,0.1,0.06,0.36,0.03,0.03,,,0.03,0.08,,,,,,,0.32,-0.15,-0.09,-0.1,,,
Achilles,,0.09,0.16,0.11,0.15,-0.05,-0.14,-0.1,,0.84,-0.08,-0.06,-0.06,-0.08,-0.06,-0.06,-0.04,0.11,0.03,0.06,0.12,,-0.04,,,,-0.17,0.39,-0.08,-0.09,,,-0.04
Patroclus,,0.1,0.19,0.14,0.18,-0.06,-0.13,-0.08,0.84,,-0.08,-0.06,-0.06,-0.07,-0.05,-0.07,-0.05,0.11,0.04,0.08,0.14,,-0.04,,,,-0.17,0.41,-0.07,-0.11,,,-0.04


# Export output to Excel if desired

In [16]:
styled_matrix.to_excel('styled.xlsx', engine='openpyxl')