In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import matplotlib.ticker as ticker
import matplotlib.pylab as pl
from itertools import cycle
import matplotlib.gridspec as gridspec
import glob
import collections
import math
import re
import os
from collections import defaultdict

This notebook is used to plot all the shannon data for the bftt species comp runs.

Need to combine all the individual processed csv files into a dictionary.

In [27]:
nRuns = 10
phase2_bigK_filepath = "species_comp_calculations/phase2_data/"
phase4_bigK_filepath = "species_comp_calculations/phase4_data/shannon_calculations_precisest_bigK_phase4_runID-"

phase2_bigK_filepath_EDGE = "species_comp_calculations/phase2_data_EDGE/"
phase4_bigK_filepath_EDGE = "species_comp_calculations/phase4_data_EDGE/shannon_calculations_precisest_bigK_phase4_runID-"

In [5]:
def combineShannonDataIntoDict(filepath, nRuns):
    '''
    This iterates over all the csv files of the processed shannon data and collates them into a dictionary.
    
    The keys to this dictionary are the runIDs, the values in this dictionary are dataframes of the shannon data over time
    '''
    
    shannon_dict = {}
    
    for runID in range(nRuns):
        
        shannon_dict["runID_"+str(runID)] = pd.read_csv(filepath+str(runID)+".csv")
        
    return shannon_dict

In [22]:
def getAveragedShannonVals(shannon_data):
    '''
    this method collates all the shannon data over all the runs and averages them
    '''
    time_df = pd.DataFrame() #time
    nBac_df = pd.DataFrame() #no. of bacteria over time
    H_df = pd.DataFrame() #shannon index over time
    E_df = pd.DataFrame() #shannon equitability over time
    S_df = pd.DataFrame() #no. of species over time
    
    for runID in shannon_data.keys():
        time_df[runID] = shannon_data[runID]["t"]
        nBac_df[runID] = shannon_data[runID]["nBac"]
        H_df[runID] = shannon_data[runID]["H"]
        E_df[runID] = shannon_data[runID]["E"]
        S_df[runID] = shannon_data[runID]["S"]
        
    t_avg = time_df.mean(axis=1)
    nBac_avg = nBac_df.mean(axis=1)
    H_avg = H_df.mean(axis=1)
    E_avg = E_df.mean(axis=1)
    S_avg = S_df.mean(axis=1)
    
    return t_avg, nBac_avg, H_avg, E_avg, S_avg

In [30]:
phase2_shannon_dict = combineShannonDataIntoDict(phase4_bigK_filepath, nRuns)
phase4_shannon_dict = combineShannonDataIntoDict(phase4_bigK_filepath, nRuns)

phase2_shannon_dict_EDGE = combineShannonDataIntoDict(phase4_bigK_filepath_EDGE, nRuns)
phase4_shannon_dict_EDGE = combineShannonDataIntoDict(phase4_bigK_filepath_EDGE, nRuns)

In [31]:
phase4_shannon_dict['runID_1']

Unnamed: 0.1,Unnamed: 0,t,nBac,H,E,S
0,0,0,1,0.000000,0.000000,1
1,1,2,846,6.653454,0.996461,794
2,2,4,1657,7.221947,0.992873,1442
3,3,6,2472,7.557532,0.991011,2051
4,4,8,3174,7.764970,0.989450,2560
...,...,...,...,...,...,...
95,95,190,72678,10.242914,0.974634,36662
96,96,192,73401,10.251629,0.974577,37014
97,97,194,73947,10.259192,0.974514,37328
98,98,196,74508,10.266815,0.974551,37606


In [32]:
t_p2, nBac_p2, H_p2, E_p2, S_p2 = getAveragedShannonVals(phase2_shannon_dict)
t_p4, nBac_p4, H_p4, E_p4, S_p4 = getAveragedShannonVals(phase4_shannon_dict)

t_p2_EDGE, nBac_p2_EDGE, H_p2_EDGE, E_p2_EDGE, S_p2_EDGE = getAveragedShannonVals(phase2_shannon_dict_EDGE)
t_p4_EDGE, nBac_p4_EDGE, H_p4_EDGE, E_p4_EDGE, S_p4_EDGE = getAveragedShannonVals(phase4_shannon_dict_EDGE)

In [26]:
def plotPhaseComparisons(x_p2, y_p2, x_p4, y_p4, x_label, y_label, title_string, filename):
    plt.figure(figsize=(8,6), dpi=220)
    
    #skip the first value as it's t=0 so there's only like 1 bacteria in there
    plt.plot(x_p2[1:], y_p2[1:], label="phase 2", lw=2.4)
    plt.plot(x_p4[1:], y_p4[1:], label="phase 4", lw=2.4)

    plt.xlabel(x_label, fontsize=18)
    plt.ylabel(y_label, fontsize=18)
    plt.title(title_string, fontsize=20)
    #plt.semilogy()
    plt.xticks(fontsize=18)
    plt.yticks(fontsize=18)
    plt.legend(fontsize=16)
    #plt.savefig("species_comp_plots/"+filename, bbox_inches="tight")
    plt.show()

In [None]:
#shannon over time plots
plotPhaseComparisons(t_p2, nBac_p2, t_p4, nBac_p4, "t", "N", "no. of bacteria vs time", "N_vs_t.pdf")
plotPhaseComparisons(t_p2, H_p2, t_p4, H_p4, "t", "H", "Shannon Index vs time", "H_vs_t.pdf")
plotPhaseComparisons(t_p2, E_p2, t_p4, E_p4, "t", "E", "Shannon Equitability vs time", "E_vs_t.pdf")
plotPhaseComparisons(t_p2, S_p2, t_p4, S_p4, "t", "S", "no. of species vs time", "S_vs_t.pdf")

#shannon over time plots - edge
plotPhaseComparisons(t_p2_EDGE, nBac_p2_EDGE, t_p4_EDGE, nBac_p4_EDGE, "t", "N", "no. of bacteria vs time (edge)", "N_vs_t_EDGE.pdf")
plotPhaseComparisons(t_p2_EDGE, H_p2_EDGE, t_p4_EDGE, H_p4_EDGE, "t", "H", "Shannon Index vs time (edge)", "H_vs_t_EDGE.pdf")
plotPhaseComparisons(t_p2_EDGE, E_p2_EDGE, t_p4_EDGE, E_p4_EDGE, "t", "E", "Shannon Equitability vs time (edge)", "E_vs_t_EDGE.pdf")
plotPhaseComparisons(t_p2_EDGE, S_p2_EDGE, t_p4_EDGE, S_p4_EDGE, "t", "S", "no. of species vs time (edge)", "S_vs_t_EDGE.pdf")

#shannon over N plots
plotPhaseComparisons(nBac_p2, H_p2, nBac_p4, H_p4, "N", "H", "Shannon Index vs N", "H_vs_N.pdf")
plotPhaseComparisons(nBac_p2, E_p2, nBac_p4, E_p4, "N", "E", "Shannon Equitability vs N", "E_vs_N.pdf")
plotPhaseComparisons(nBac_p2, S_p2, nBac_p4, S_p4, "N", "S", "no. of species vs N", "S_vs_N.pdf")

#shannon over N plots - edge
plotPhaseComparisons(nBac_p2_EDGE, H_p2_EDGE, nBac_p4_EDGE, H_p4_EDGE, "N", "H", "Shannon Index vs N (edge)", "H_vs_N_EDGE.pdf")
plotPhaseComparisons(nBac_p2_EDGE, E_p2_EDGE, nBac_p4_EDGE, E_p4_EDGE, "N", "E", "Shannon Equitability vs N (edge)", "E_vs_N_EDGE.pdf")
plotPhaseComparisons(nBac_p2_EDGE, S_p2_EDGE, nBac_p4_EDGE, S_p4_EDGE, "N", "S", "no. of species vs N (edge)", "S_vs_N_EDGE.pdf")