In [None]:
# Location: the first 3 letters of each location
# multiple location entries indicate different sampling dates from that location

# Temp: the average temperature 30 days prior to the date of sampling for that location (see other script)

# Sex_f: the female:male sex ratio for the sampled individuals

# W_Single_prop_infected: the single_infected:total_infected for that date & site
# in this way we examine the relative propensity of a mosquitoes to be found single infected given a Wolbachia infection occurs
# I hypothesize this is a valid hypothesis given previous evidence suggesting secondary loss of one of the two Wolbachia strains

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

wolbachia_df = pd.read_csv("Wolbachia_propSingle.txt", sep="\t", header=0)
wolbachia_df

In [None]:
# Among different statistical options (Fisher, chi-square, Kruskal-Wallis), we will use the Kruskal-Wallis
# because the dependent variable is somewhat continuous and we have at least two predictor variables (temperature and sex ratio)
# The Kruskal-Wallis test is a non-parametric statistical test 
# used to compare the medians of two or more independent samples.

from scipy.stats import kruskal

KW_test = kruskal(wolbachia_df['W_Single_prop_Infected'], 
                  wolbachia_df['Temp'], 
                  wolbachia_df['Sex_f'],)
print(KW_test)

In [None]:
# the p-value is highly significant, however, we don't know which (if not all) of the predictor variables
# contribute to the significance. For this reason, we will employ a post-hoc test, the Dunn test, to distinguish 
# groups that are different

from scikit_posthocs import posthoc_dunn

dunn_result = posthoc_dunn([wolbachia_df['W_Single_prop_Infected'], 
                            wolbachia_df['Temp'], 
                            wolbachia_df['Sex_f']], 
                            p_adjust='holm')

dunn_result

# this is a square matrix with order of columns same as input data in the function
# temperature - single infection is highly significant
# sex ratio - single infection marginally non-significant due to a single outlier in the data

In [None]:
# removing the "AET" outlier (just to verify this hypothesis) and repeat the analysis
wolbachia_df_noOUT = wolbachia_df[(wolbachia_df['Location'] != "AET")]

KW_test_noOUT = kruskal(wolbachia_df_noOUT['W_Single_prop_Infected'], 
                        wolbachia_df_noOUT['Temp'], 
                        wolbachia_df_noOUT['Sex_f'],)
print(KW_test_noOUT)

In [None]:
dunn_result_noOUT = posthoc_dunn([wolbachia_df_noOUT['W_Single_prop_Infected'], 
                                  wolbachia_df_noOUT['Temp'], 
                                  wolbachia_df_noOUT['Sex_f']],
                                 p_adjust='holm')

dunn_result_noOUT

# sex ratio becomes slightly significant

In [None]:
# =================
# produce Figure 2
# =================

wolbachia_df['temp_bins'] = pd.cut(wolbachia_df['Temp'], bins=3, precision=1)
wolbachia_df['sex_bins'] = pd.cut(wolbachia_df['Sex_f'], bins=3, precision=1)
wolbachia_df['sex_bins'] = wolbachia_df['sex_bins'].astype(str).str.replace(r'\(\-0\.001, 0\.3\]', '[0, 0.3]', regex=True)

import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# Assuming you have a DataFrame named wolbachia_df
# Replace this with your actual DataFrame

# Convert the 'temp_bins' column to strings in your DataFrame
wolbachia_df['temp_bins_str'] = wolbachia_df['temp_bins'].astype(str)

# Create a figure with two subplots in a single row
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6), dpi=600)  # Adjust the figure size and DPI as needed

# Custom colors for the first graph
custom_colors_1 = {
    str(pd.Interval(19.7, 23.1, closed='right')): 'blue',
    str(pd.Interval(23.1, 26.5, closed='right')): 'mediumslateblue',
    str(pd.Interval(26.5, 29.9, closed='right')): 'salmon',
}

desired_order_1 = [str(pd.Interval(19.7, 23.1, closed='right')),
                  str(pd.Interval(23.1, 26.5, closed='right')),
                  str(pd.Interval(26.5, 29.9, closed='right'))]

# Create the boxplot for the first graph
sns.boxplot(x="temp_bins_str", 
            y="W_Single_prop_Infected", 
            data=wolbachia_df,
            palette=custom_colors_1,
            order=desired_order_1,
            ax=ax1)  # Use ax1 for the first subplot

ax1.set_xlabel("Temperature bins (ranges in °C)", fontsize=18)  # Adjust the font size as needed
ax1.set_ylabel("Proportion of single infection", fontsize=18)  # Adjust the font size as needed

# Add the panel designator "A" to the top left corner of the first graph
ax1.text(0.10, 0.85, "A", transform=ax1.transAxes, fontsize=28, fontweight='bold')

ax1.xaxis.set_label_coords(0.5, -0.12)
ax1.yaxis.set_label_coords(-0.10, 0.5)

# Set spines for the first graph
ax1.spines['right'].set_visible(False)
ax1.spines['top'].set_visible(False)
ax1.spines['left'].set_position(('outward', 10))  # Add some distance to the left spine
ax1.spines['bottom'].set_position(('outward', 10))  # Add some distance to the bottom spine

# Convert the 'sex_bins' column to strings in your DataFrame
wolbachia_df['sex_bins_str'] = wolbachia_df['sex_bins'].astype(str)

# Custom colors for the second graph
custom_colors_2 = {
    str(pd.Interval(0, 0.3, closed='both')): 'lightskyblue',
    str(pd.Interval(0.3, 0.7, closed='right')): 'plum',
    str(pd.Interval(0.7, 1.0, closed='right')): 'fuchsia',
}

desired_order_2 = [str(pd.Interval(0, 0.3, closed='both')),
                  str(pd.Interval(0.3, 0.7, closed='right')),
                  str(pd.Interval(0.7, 1.0, closed='right'))]

# Create the boxplot for the second graph
sns.boxplot(x="sex_bins_str", 
            y="W_Single_prop_Infected", 
            data=wolbachia_df,
            palette=custom_colors_2,
            order=desired_order_2,
            ax=ax2)  # Use ax2 for the second subplot

ax2.set_xlabel("Female:Male bins", fontsize=18)  # Adjust the font size as needed
ax2.set_ylabel("Proportion of single infection", fontsize=18)  # Adjust the font size as needed

# Add the panel designator "B" to the top left corner of the second graph
ax2.text(0.10, 0.15, "B", transform=ax2.transAxes, fontsize=28, fontweight='bold')

ax2.xaxis.set_label_coords(0.5, -0.12)
ax2.yaxis.set_label_coords(-0.10, 0.5)

# Set spines for the second graph
ax2.spines['right'].set_visible(False)
ax2.spines['top'].set_visible(False)
ax2.spines['left'].set_position(('outward', 10))  # Add some distance to the left spine
ax2.spines['bottom'].set_position(('outward', 10))  # Add some distance to the bottom spine

# Save the entire figure as a PDF file for better quality
plt.savefig("Figure_2_rev_proofs.pdf", format='pdf', bbox_inches='tight')

# Show the entire figure (if you want to display it in the notebook)
plt.show()
