# Predicting Higher-Order Expression-Growth Rate Relationships

The following code uses the pairwise continuous epistasis model to predict bacterial growth rate following three CRISPRi perturbations. It also quantifies growth rates from cells with these three-sgRNA constructs using a similar growth rate calculation as employed in previous code. The predictions from the continuous epistasis model outperform a coupling-insensitve Null model across all orders of CRISPRi perturbations.

6/28/22 - Ryan Otto

### Import packages and libraries

In [None]:
from Bio.Seq import Seq
import regex
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import pickle
import math
import itertools
import plot_defaults
plot_defaults.change_defaults()

### Define relevant variables

In [None]:
date = '220815'
input_path = 'input_files'
output_path = 'intermediate_files'
figure_path = 'Figures'
file_path = 'intermediate_files'
read_file_list = ['T0_S1', 'T2_S2', 'T4_S3', 'T6_S4', 'T8_S5', 'T10_S6']  # File prefix names
BC_list = ['TGAAAG', 'CCATGC', 'CATGAT', 'ACTAGG', 'TAGACT', 'TCATAC']  # Relevant barcodes
# "Targets" are the desired sgRNA constructs at each CRISPRi location
sg1targets = ['gdhA_1_42_C', 'negC_rand_42', 'dapA_3_214_C', 'dapA_3_214_B_MM9', 'gdhA_1_42_B_MM8']
sg2targets = ['dapB_1_18_B_MM11', 'negC_rand_42', 'gltB_3_284_B_MM10', 'dapB_1_18_C', 'gltB_3_284_C']
sg3targets = ['folA_1_56_B_MM10', 'negC_rand_42', 'thyA_3_233_C', 'folA_1_56_B_MM3', 'thyA_3_233_B_MM5',
             'purN_3_238_B_MM11', 'purL_3_201_C', 'purL_3_201_B_MM5', 'purN_1_86_C']
sgRNA2seq = {}  # Deconvolutes sgRNA ID and sequence
with open(f'{input_path}/Tri_FASTA.txt') as f:
    full_seq = f.readlines()
    for i, line in enumerate(full_seq):
        if line[0] == '>':
            sgRNA2seq[line[1:-1]] = str(Seq(full_seq[i+2][35:55]).reverse_complement())

### Import data

In [None]:
with open(f'{file_path}/220815_pair_avals.pickle', 'rb') as handle:
    pair_avals = pickle.load(handle)
with open(f'{file_path}/220815_repression_mean.pickle', 'rb') as handle:
    qPCR_vals = pickle.load(handle)
with open(f'{file_path}/220815_hill_elements.pickle', 'rb') as handle:
    hill_elements = pickle.load(handle)
with open(f'{file_path}/220815_pairwise_gr_min.pickle', 'rb') as handle:
    min_gr = pickle.load(handle)  # Minimum growth rate observed in the pairwise experiment
with open(f'{input_path}/turbidostat_GR.pickle', 'rb') as handle:
    turb_gr = pickle.load(handle)
with open(f'{file_path}/220815_df_growth_pool_filt_rescale.pickle', 'rb') as handle:
    pairwise_gr_rescale = pickle.load(handle)
with open(f'{file_path}/220815_df_growth_pool_filt_sem_rescale.pickle', 'rb') as handle:
    pairwise_sem_rescale = pickle.load(handle)
# This file was saved from this analysis. It can be regenerated by uncommenting code below, replacing the old file
with open(f'{input_path}/third_order_counts.pickle', 'rb') as handle:
    total_sgRNA_BC = pickle.load(handle)  # sgRNA IDs from sequencing position 1

### Define analysis functions

In [None]:
def read_seqs(file_name, seq_path):
    """Extract sequences from paired-end FASTQ files
    Arguments:
    file_name: Prefix for file to extract sequences from
    seq_path: Path to sequencing files
    Returns:
    sequences: A dictionary containing lists of Read 1 and Read 2 sequences
    """
    sequences = {}
    for read in ['R1', 'R2']:
        sequences[read] = []
        with open(f'{seq_path}/{file_name}_L001_{read}_001.fastq', "r") as handle:
            lines = handle.read().splitlines()
            for temp_seq in lines[1::4]:  # Extract all sequences
                if read == 'R1':  # Reverse complement Read 2
                    sequences[read].append(temp_seq)
                else:  # Don't reverse complement Read 1
                    sequences[read].append(str(Seq(temp_seq).reverse_complement()))
    return sequences


def seq_find(seq, left, right, target_len, num_seqs, hamming_dist):
    """Identifies a target sequence using regular expressions. Sequence must be of specified length, and gaps
    cannot exist in the flanking sequences.
    Arguments:
    seq: Full sequence
    left: Left flanking sequence
    right: Right flanking sequence
    target_len: Length of desired sequence
    num_seqs: Total number of desired sequences in full sequence
    hamming_dist: Acceptable number of mismatches in flanking regions (no gaps)
    Returns:
    target_seq: If found, returns the desired sequence. If multiple sequences are desired, returns them all
    """
    regex_string = '(' + left + '.'*target_len + right + ')' + '{s<' + str(hamming_dist) + '}'
    target_area = regex.findall(regex_string, seq)
    target_seq = [found[len(left):-len(right)] for found in target_area]
    # Split the logic below to return the correct number of items
    if num_seqs == 1:
        if len(target_seq) == 1:
            return target_seq[0]
        else:
            return 'Not found'
    if num_seqs == 2:
        if len(target_seq) == 2:
            return target_seq[0], target_seq[1]
        else:
            return 'Not found', 'Not found'
    else:
        return None


def assign_ID(seq_dict, ID_to_seq, hamming_dist, targets):
    """Assigns an sgRNA ID to provided sequences. sgRNAs will be called if they match exactly one desired sequence
    at within the provided hamming distance. Prints out summary statistics.
    Arguments:
    seq_dict: Dictionary containing sequences to be called
    ID_to_seq: Dictionary relating sgRNAs to their exact sequence
    hamming_dist: Acceptable number of mismatches
    targets: Desired sgRNAs in the given position
    Returns:
    sgRNA_IDs: Dictionary containing called sgRNA IDs
    """
    sg_total, sgRNA_IDs = {}, {}
    for lib in seq_dict:
        sgRNA_IDs[lib] = []
        for seq in seq_dict[lib]:
            if seq == 'Not found':
                sgRNA_IDs[lib].append('Not found')
                continue
            found = []
            for ID, ref_seq in ID_to_seq.items():
                if sum([x1 != y1 for x1, y1 in zip(seq, ref_seq)]) <= hamming_dist:  # Finds hamming distance
                    found += [ID]
            if len(found) == 1:  # Uniquely mapped sequences
                sgRNA_IDs[lib].append(found[0])
                if found[0] in sg_total:
                    sg_total[found[0]] += 1
                else:
                    sg_total[found[0]] = 1
            elif found:  # Not uniquely mapped
                sgRNA_IDs[lib].append('Multiple')
            else:  # None mapped
                sgRNA_IDs[lib].append('None')
    print(f'Percent assigned: \
            {np.round(100*sum(list(sg_total.values()))/(sum(len(seq_dict[lib]) for lib in seq_dict)), 3)}%')
    total_targets = 0
    for sgRNA in sg_total:
        if sgRNA in targets:
            total_targets += sg_total[sgRNA]
    print(f'Percent wanted: {np.round(100*total_targets/sum(sg_total.values()), 3)}%')
    return sgRNA_IDs

In [None]:
def qtest_dixon(replicates):
    """One-sided Dixon Q test at 95% confidence: Statistical test used for identyfing outliers in data set
    https://www.philadelphia.edu.jo/academics/ajaber/uploads/0501522-Chapter%203-Statiscal%20tests.pdf
    Used here to identify CRISPRi escapers: replicates with abnormally fast growth rates
    Argument:
    replicates: List of growth rate replicates to test for an outlier
    Returns:
    rep_sorted[:-1] or replicates: If an escaper was found, removes it. Otherwise, returns all replicates
    rep_sorted[-1] or np.nan: If an escaper was found, returns the escaper. Otherwise, returns nan.
    """
    Q30 = [0, 0, 0.941, 0.765, 0.642, 0.560]  # One-sided values, 95% confidence
    if len(replicates) >= 4:
        rep_sorted = sorted(replicates)
        gap = abs(rep_sorted[-2] - rep_sorted[-1])
        rep_range = rep_sorted[-1] - rep_sorted[0]
        if rep_range != 0:  # If all values are equal, can't run this test
            Q_val = gap / rep_range
            if Q_val >= Q30[len(replicates)-1]:
                return rep_sorted[:-1], rep_sorted[-1]
    return replicates, np.nan


def third_order_gr(avals_list, repression_list, params):
    """Calculates an expected growth rate following three separate CRISPRi perturbations. Accounts for coupling
    between repression
    Arguments:
    avals_list: List of coupling constants, ordered ij, ik, ji, jk, ki, kj (1-2, 1-3, 2-1, 2-3, 3-1, 3-2)
    repression_list: List of repression intensities to predict for all three knockdowns.
    params: Single-knockdown sigmoidal parameters.
    Returns:
    full_gr_list: A list of four lists. The first three are the relative growth rate contributions of each
                  repression individually (after accounting for coupling), and the fourth is the overall growth rate.
    """
    Ro_list = [x[0] for x in params]  # Extract all R0 values
    reff_list, resids_list = solveReff_third(avals_list, repression_list, Ro_list)
    r1eff, r2eff, r3eff = reff_list  # Extract individual effective repressions
    gr1, gr2, gr3 = np.zeros(np.shape(r1eff)), np.zeros(np.shape(r2eff)), np.zeros(np.shape(r3eff))
    gr_triple = np.zeros(np.shape(r1eff))
    for i in range(len(repression_list[0])):
        for j in range(len(repression_list[1])):
            for k in range(len(repression_list[2])):
                gr1[i, j, k] = growth_rate(r1eff[i, j, k], params[0][0], params[0][1])
                gr2[i, j, k] = growth_rate(r2eff[i, j, k], params[1][0], params[1][1])
                gr3[i, j, k] = growth_rate(r3eff[i, j, k], params[2][0], params[2][1])
                gr_triple[i, j, k] = gr1[i, j, k] * gr2[i, j, k] * gr3[i, j, k]
    full_gr_list = [gr1, gr2, gr3, gr_triple]
    return full_gr_list


def solveReff_third(avals_list, repression_list, Ro_list):
    """Solve for the effective repression of three CRISPRi perturbations, given six coupling constants
    describing all pairwise couplings between them.
    Arguments:
    avals_list: List of coupling constants, ordered ij, ik, ji, jk, ki, kj (1-2, 1-3, 2-1, 2-3, 3-1, 3-2)
    repression_list: List of repression intensities to predict for all three knockdowns.
    params: Single-knockdown sigmoidal parameters.
    Ro_list: List of three Ro (repression at half-maximal growth rate) parameters
    Returns:
    reff_list: Three lists of each perturbation's relative repression after accounting for coupling
    resids_list: List of residuals, returned for troubleshooting and optimization
    """
    rep_1, rep_2, rep_3 = repression_list
    Ro_1, Ro_2, Ro_3 = Ro_list
    r1eff = np.zeros((len(rep_1), len(rep_2), len(rep_3)))
    r2eff = np.zeros((len(rep_1), len(rep_2), len(rep_3)))
    r3eff = np.zeros((len(rep_1), len(rep_2), len(rep_3)))
    for i in range(len(rep_1)):
        for j in range(len(rep_2)):
            for k in range(len(rep_3)):
                r1eff[:, j, k] = rep_1
                r2eff[i, :, k] = rep_2
                r3eff[i, j, :] = rep_3
    r1update = np.zeros(np.shape(r1eff))
    r2update = np.zeros(np.shape(r2eff))
    r3update = np.zeros(np.shape(r3eff))
    resids, eps, count = np.inf, 0.01, 0  # Initialize sum of residuals, desired final residual, and count iterator
    resids_r1, resids_r2, resids_r3 = [], [], []
    # Iteratively reduce the residuals using the update formulas
    # If the desired residual is not reached, exit after 100 iterations
    while resids > eps and count < 100:
        for i, r1 in enumerate(rep_1):
            for j, r2 in enumerate(rep_2):
                for k, r3 in enumerate(rep_3):
                    r1update[i, j, k] = r1 \
                    / ((1 + avals_list[0]*((r2eff[i, j, k]/Ro_2)/(1 + (r2eff[i, j, k]/Ro_2))))
                    * (1 + avals_list[2]*((r3eff[i, j, k]/Ro_3)/(1 + (r3eff[i, j, k]/Ro_3)))))
                    r2update[i, j, k] = r2 \
                    / ((1 + avals_list[1]*((r1eff[i, j, k]/Ro_1)/(1 + (r1eff[i, j, k]/Ro_1))))
                    * (1 + avals_list[4]*((r3eff[i, j, k]/Ro_3)/(1 + (r3eff[i, j, k]/Ro_3)))))
                    r3update[i, j, k] = r3 \
                    / ((1 + avals_list[3]*((r1eff[i, j, k]/Ro_1)/(1 + (r1eff[i, j, k]/Ro_1))))
                    * (1 + avals_list[5]*((r2eff[i, j, k]/Ro_2)/(1 + (r2eff[i, j, k]/Ro_2)))))
        resids_r1.append(np.sum(abs(r1eff - r1update)))
        resids_r2.append(np.sum(abs(r2eff - r2update)))
        resids_r3.append(np.sum(abs(r3eff - r3update)))
        resids = resids_r1[-1] + resids_r2[-1] + resids_r3[-1]
        r1eff = np.copy(r1update)
        r2eff = np.copy(r2update)
        r3eff = np.copy(r3update)
        count += 1
    reff_list = [r1eff, r2eff, r3eff]
    resids_list = [resids_r1, resids_r2, resids_r3]
    return reff_list, resids_list


def growth_rate(r, Ro, n):
    """Calculates an expected growth rate using a sigmoidal formula.
    Arguments:
    r: Repression level to use when predicting growth rates
    Ro: Repression level at half-maximal growth rate
    n: Steepness of the repression-growth rate function
    Returns:
    g_rate: Predicted growth rate
    """
    g_rate = 1 / (1+math.exp(n*(r-Ro)))
    return g_rate

In [None]:
def nanrms(x, axis=None):
    """Self-defined root mean square function. Used for convenience and consistency.
    Arguments:
    x: Array of residuals
    axis: In case of an array of arrays, enter the desired axis to calculate across
    Directly returns the RMSD of the residuals provided
    """
    return np.sqrt(np.nanmean(x**2, axis=axis))


def model_plot(exp_gr_mean, model_pred, null_pred, num_nont, figure_path, fig_names=None):
    """Function for plotting correlations between predicted and experimental growth rate data.
    Creates two plots: the first using the coupling-sensitive model, the second using the Null model.
    Inputs can separate graphs based on how many nontargeting sgRNAs are present in each construct.
    Arguments:
    exp_gr_mean: Experimentally determined growth rates (Real)
    model_pred: Predicted growth rates from the coupling-sensitive model (Model)
    null_pred: Predicted growth rates from the Null model (Null)
    num_nont: A list containing acceptable numbers of nontargeting sgRNAs to plot.
    For example, [0] would only plot constructs with three targeting sgRNAs.
    [1, 2, 3] plots all other constructs, including the control construct. 
    [0, 1, 2, 3] plots all data
    figure_path: Path to a folder storing figure output
    fig_names: List of names for the saved figures, if desired
    """
    # Organize data
    colors, data_dict = [], {'Exp':[], 'Epistatic':[], 'Null':[]}
    for full_sgRNA in exp_gr_mean:
        if ~np.isnan(exp_gr_mean[full_sgRNA]):
            # Check if this construct should be plotted
            if sum([sgRNA == 'negC_rand_42' for sgRNA in full_sgRNA.split('-')]) in num_nont:
                data_dict['Exp'].append(exp_gr_mean[full_sgRNA])
                data_dict['Epistatic'].append(model_pred[full_sgRNA])
                data_dict['Null'].append(null_pred[full_sgRNA])
    # Generate plots
    for i, model in enumerate(['Epistatic', 'Null']):
        fig, ax = plt.subplots(figsize=(4, 4))
        ax.scatter(data_dict['Exp'], data_dict[model], color='xkcd:gray', s=60, ec='xkcd:dark gray', zorder=2)
        ax.plot([0, 1.2], [0, 1.2], lw=1, color='xkcd:dark gray', ls='--', zorder=0)
        ax.set_xlim([0, 1.2])
        ax.set_ylim([0, 1.2])
        ax.set_xticks([0.5, 1])
        ax.set_yticks([0.5, 1])
        ax.set_xlabel('Tri sgRNA Data', fontsize=20)
        ax.set_ylabel(f'{model} Prediction', fontsize=20)
        ax.text(0.02, 1, f"RMSD: {np.round(nanrms(np.array(data_dict['Exp']) - np.array(data_dict[model])), 3)}",
                fontsize=12)
        plt.tight_layout()
        if fig_names:
            plt.savefig(f'{figure_path}/Fig{fig_names[i]}.pdf')
        plt.show()
    return

### Assign sgRNA construct IDs from FASTQ files

This code cannot run without first downloading the sequencing files for the third-order library (see publication for details). Once downloaded, define the seq_path variable to connect to the sequencing files, then run the following code. Its output is saved so this analysis can continue without downloading sequencing files.

The following code first extracts sequencing reads from Illumina paired-end FASTQ files, reverse-complenting the necessary read to standardize both sequences. Then, using regular expressions, it locates three 20 bp sgRNA homology sequences and one 6 bp BC sequence from each read. These sequences are assigned IDs based on a sequence-sgRNA lookup dictionary.

In [None]:
"""
seq_path = ''
# Extract full sequences from FASTQ files
sequences = {}
for read_file in read_file_list:
    sp = read_file.split('_')
    sequences[sp[0]] = read_seqs(read_file, seq_path)
# Identify three sgRNA sequences and the barcode sequnence from the fastq files
sgRNA1_dict, sgRNA2_dict, sgRNA3_dict, BC_dict = {}, {}, {}, {}
for read_file in read_file_list:
    sp = read_file.split('_')
    sgRNA1_dict[sp[0]] = []
    sgRNA2_dict[sp[0]] = []
    sgRNA3_dict[sp[0]] = []
    BC_dict[sp[0]] = []
    for ind in range(len(sequences[sp[0]]['R1'])):
        sgRNA2_seq, sgRNA1_seq = seq_find(sequences[sp[0]]['R2'][ind], 'CTAGCTCTAAAAC', 'ACTAGTATTATAC', 20, 2, 10)
        sgRNA3_seq = seq_find(sequences[sp[0]]['R1'][ind], 'CTAGCTCTAAAAC', 'ACTAGTATTATAC', 20, 1, 6)
        BC_seq = seq_find(sequences[sp[0]]['R1'][ind], 'GTACAGCGAGGCAAC', 'ACGGATCCCCAC', 6, 1, 6)
        sgRNA1_dict[sp[0]].append(sgRNA1_seq)
        sgRNA2_dict[sp[0]].append(sgRNA2_seq)
        sgRNA3_dict[sp[0]].append(sgRNA3_seq)
        BC_dict[sp[0]].append(BC_seq)
# Assign sgRNA names from sequences
sgRNA1_IDs = assign_ID(sgRNA1_dict, sgRNA2seq, 2, sg1targets)
sgRNA2_IDs = assign_ID(sgRNA2_dict, sgRNA2seq, 8, sg2targets)
sgRNA3_IDs = assign_ID(sgRNA3_dict, sgRNA2seq, 0, sg3targets)
# Combine individual sgRNA and BC calls to complete constructs
# Only counts sequencing reads that have three correctly called sgRNAs and a barcode across all timepoints. 
total_sgRNA_BC = {}
for tp in sgRNA1_IDs:
    total_sgRNA_BC[tp] = {}
    for BC in BC_list:
        total_sgRNA_BC[tp][BC] = {}
    for i, sgRNA1 in enumerate(sgRNA1_IDs[tp]):
        sgRNA2 = sgRNA2_IDs[tp][i]
        sgRNA3 = sgRNA3_IDs[tp][i]
        BC = BC_dict[tp][i]
        if sgRNA1 in sgRNA2seq and sgRNA2 in sgRNA2seq and sgRNA3 in sgRNA2seq and BC in BC_list:
            if sgRNA1 in sg1targets and sgRNA2 in sg2targets and sgRNA3 in sg3targets:
                full = f'{sgRNA1}-{sgRNA2}-{sgRNA3}'
                if full in total_sgRNA_BC[tp][BC]:
                    total_sgRNA_BC[tp][BC][full] += 1
                else:
                    total_sgRNA_BC[tp][BC][full] = 1
# Save outupt
with open(f'{input_path}/third_order_counts.pickle', 'wb') as handle:
    pickle.dump(total_sgRNA_BC, handle, protocol=pickle.HIGHEST_PROTOCOL)
"""
pass

### Plot the overall distribution of sgRNA constructs across all barcodes and timepoints

In [None]:
fig, ax = plt.subplots(figsize=(4, 4))
xVals = []
for tp in total_sgRNA_BC:
    for BC in BC_list:
        xVals += list(total_sgRNA_BC[tp][BC].values())
ax.hist(xVals, bins=[10**(x/4) for x in range(14)])
ax.set_xscale('log')
ax.set_xlabel('# Counts', fontsize=20)
ax.set_ylabel('# Constructs', fontsize=20)
plt.show()
# Check how many unique barcoded constructs were present at T0
num_xVals_T0 = [len(total_sgRNA_BC['T0'][BC].values()) for BC in BC_list]
print(f'Percent of sequences identified at T0: {np.round(sum(num_xVals_T0)/(5*5*9*6)*100, 2)}%')

### Calculate relative sgRNA frequency and growth rates

We first normalize raw counts by the nontargeting construct at each timepoint. Then we normalize relative frequencies at every timepoint by the relative frequency at $T_0$. For all constructs with counts present through the first three timepoints, we fit a line to $log_2$(relative frequency) vs. time data. The slope of this line is the construct's relative growth rate effect.

$relative frequency_{a,T}$ = $\frac{counts_{a,T} / counts_{Nont,T}}{counts_{a,T_0} / counts_{Nont,T_0}}$

In [None]:
nont = 'negC_rand_42-negC_rand_42-negC_rand_42'
gr_dict, b_dict = {}, {}
for BC in BC_list:
    gr_dict[BC], b_dict[BC] = {}, {}
    for sg1 in sg1targets:
        for sg2 in sg2targets:
            for sg3 in sg3targets:
                full_sgRNA = f'{sg1}-{sg2}-{sg3}'
                temp_vals, temp_tp = [], []
                for tp in total_sgRNA_BC:
                    if full_sgRNA in total_sgRNA_BC[tp][BC]:
                        temp_vals.append(total_sgRNA_BC[tp][BC][full_sgRNA]/total_sgRNA_BC[tp][BC][nont])
                        temp_tp.append(int(tp[1:])*turb_gr['tri'])
                        if total_sgRNA_BC[tp][BC][full_sgRNA] < 1:
                            break
                    else:
                        break
                if len(temp_tp) >= 3:
                    norm_vals = [np.log2(x/temp_vals[0]) for x in temp_vals]
                    gr_dict[BC][full_sgRNA], b_dict[BC][full_sgRNA], r_value, p_value, std_err = \
                    stats.linregress(temp_tp, norm_vals)

### Escaper correction and rescaling

We first remove escapers, as before, using a one-sided Dixon Q-test at 95% confidence. After removing escapers, we average all constructs with at least four barcoded measurements remaining. Then, we normalize each construct's mean growth rate by the minimum growth rate observed in the pairwise CRISPRi library, which rescales the nontargeting construct's growth rate to 1 and makes all growth rates non-negative.

In [None]:
escaper_dict_tri, gr_mean_dict, gr_std_dict, gr_sem_dict, gr_mean_rescale_dict, gr_std_rescale_dict, \
gr_sem_rescale_dict = {}, {}, {}, {}, {}, {}, {}
for sg1 in sg1targets:
    for sg2 in sg2targets:
        for sg3 in sg3targets:
            full_sgRNA = f'{sg1}-{sg2}-{sg3}'
            temp_vals = []
            for BC in BC_list:
                if full_sgRNA in gr_dict[BC]:
                    temp_vals.append(gr_dict[BC][full_sgRNA])
            if len(temp_vals) >= 4:
                v_values, escaper_val = qtest_dixon(temp_vals)
                if not np.isnan(escaper_val):
                    escaper_dict_tri[full_sgRNA] = escaper_val
                gr_mean_dict[full_sgRNA] = np.mean(v_values)
                gr_std_dict[full_sgRNA] = np.std(v_values)
                gr_sem_dict[full_sgRNA] = stats.sem(v_values)
                gr_mean_rescale_dict[full_sgRNA] = (np.mean(v_values)+abs(min_gr)) / abs(min_gr)
                gr_std_rescale_dict[full_sgRNA] = np.std(v_values) / abs(min_gr)
                gr_sem_rescale_dict[full_sgRNA] = stats.sem(v_values) / abs(min_gr)
            else:
                gr_mean_dict[full_sgRNA] = np.nan
                gr_std_dict[full_sgRNA] = np.nan
                gr_sem_dict[full_sgRNA] = np.nan
                gr_mean_rescale_dict[full_sgRNA] = np.nan
                gr_std_rescale_dict[full_sgRNA] = np.nan
                gr_sem_rescale_dict[full_sgRNA] = np.nan

### Plot single- and pairwise knockdown growth rates relative to the pairwise only library

In [None]:
fig, ax = plt.subplots(figsize=(6, 6))
# The coloring scheme and 2D error bars necessitate a bit of data reshuffling
xVals_s, yVals_s, x_sem_s, y_sem_s, xVals_p, yVals_p, x_sem_p, y_sem_p = [], [], [], [], [], [], [], []
for full_sgRNA in gr_mean_dict:
    if ~np.isnan(gr_mean_dict[full_sgRNA]):
        sp = full_sgRNA.split('-')
        guides = [x for x in sp if x != 'negC_rand_42']
        if len(guides) == 1:
            xVals_s.append(gr_mean_rescale_dict[full_sgRNA])
            yVals_s.append(pairwise_gr_rescale.loc[guides[0], 'negC_rand_42'])
            x_sem_s.append(gr_sem_rescale_dict[full_sgRNA])
            y_sem_s.append(pairwise_sem_rescale.loc[guides[0], 'negC_rand_42'])
        elif len(guides) == 2:
            if ~np.isnan(pairwise_gr_rescale.loc[guides[0], guides[1]]):
                xVals_p.append(gr_mean_rescale_dict[full_sgRNA])
                yVals_p.append(pairwise_gr_rescale.loc[guides[0], guides[1]])
                x_sem_p.append(gr_sem_rescale_dict[full_sgRNA])
                y_sem_p.append(pairwise_sem_rescale.loc[guides[0], guides[1]])
            elif ~np.isnan(pairwise_gr_rescale.loc[guides[1], guides[0]]):
                xVals_p.append(gr_mean_rescale_dict[full_sgRNA])
                yVals_p.append(pairwise_gr_rescale.loc[guides[1], guides[0]])
                x_sem_p.append(gr_sem_rescale_dict[full_sgRNA])
                y_sem_p.append(pairwise_sem_rescale.loc[guides[1], guides[0]])
ax.errorbar(xVals_s, yVals_s, xerr=x_sem_s, yerr=y_sem_s, fmt='o', mec='xkcd:dark gray', ms=10, c='xkcd:orange')
ax.errorbar(xVals_p, yVals_p, xerr=x_sem_p, yerr=y_sem_p, fmt='o', mec='xkcd:dark gray', ms=10, c='b', zorder=0)
ax.set_xlim([0, 1.2])
ax.set_ylim([0, 1.2])
ax.set_xticks([0.5, 1])
ax.set_yticks([0.5, 1])
ax.set_xlabel('Tri sgRNA GR', fontsize=20)
ax.set_ylabel('Pairwise sgRNA GR', fontsize=20)
xVals_full = np.concatenate((np.array(xVals_s), np.array(xVals_p)))
yVals_full = np.concatenate((np.array(yVals_s), np.array(yVals_p)))
ax.text(0.06, 0.9, f'RMSD: {np.round(nanrms(xVals_full - yVals_full), 3)}', fontsize=14)
ax.plot([-0.1, 1.3], [-0.1, 1.3], lw=1, color='xkcd:dark gray', ls='--')
ax.text(0.8, 0.06, 'Single sgRNA', c='xkcd:orange', fontsize=14)
ax.text(0.8, 0.16, 'Double sgRNA', c='b', fontsize=14)
plt.tight_layout()
plt.savefig(f'{figure_path}/FigS8.pdf')
plt.show()

### Predicting third-order growth rates

To predict growth rates of these third-order CRISPRi constructs, we need each sgRNA's repression intensity, which we extract from qPCR data. Then, we use these repression values, each gene's repression-growth rate function, and the relevant coupling constants to calculate effective repression values for each perturbation. Finally, we combine these to calculate a predicted growth rate following each perturbation.

In [None]:
sgRNA_dict, qPCR_dict = {}, {}
sgRNA_dict['dapA'] = ['negC_rand_42', 'dapA_3_214_B_MM9', 'dapA_3_214_C']
sgRNA_dict['dapB'] = ['negC_rand_42', 'dapB_1_18_B_MM11', 'dapB_1_18_C']
sgRNA_dict['purN'] = ['negC_rand_42', 'purN_3_238_B_MM11', 'purN_1_86_C']
sgRNA_dict['purL'] = ['negC_rand_42', 'purL_3_201_B_MM5', 'purL_3_201_C']
sgRNA_dict['folA'] = ['negC_rand_42', 'folA_1_56_B_MM10', 'folA_1_56_B_MM3']
sgRNA_dict['thyA'] = ['negC_rand_42', 'thyA_3_233_B_MM5', 'thyA_3_233_C']
sgRNA_dict['gltB'] = ['negC_rand_42', 'gltB_3_284_B_MM10', 'gltB_3_284_C']
sgRNA_dict['gdhA'] = ['negC_rand_42', 'gdhA_1_42_B_MM8', 'gdhA_1_42_C']
for gene, sgRNA_list in sgRNA_dict.items():
    qPCR_dict[gene] = [qPCR_vals[gene][sgRNA] for sgRNA in sgRNA_list]

In [None]:
gr_trip_dict, gr_null_dict = {}, {}
for gene1 in ['gdhA', 'dapA']:
    for gene2 in ['gltB', 'dapB']:
        for gene3 in ['folA', 'thyA', 'purN', 'purL']:
            gene_set, avals_list = [gene1, gene2, gene3], []
            for subset in itertools.combinations(gene_set, 2):  # All pairwise combinations
                if (subset[0], subset[1]) in pair_avals:  # This logic corrects for gene order
                    avals_list += list(pair_avals[(subset[0], subset[1])])
                else:
                    avals_list += list(pair_avals[(subset[1], subset[0])])
            repression_list, params = [], []
            for gene in gene_set:
                repression_list.append(qPCR_dict[gene])
                params.append(list(hill_elements[gene]))
            gr_list = third_order_gr(avals_list, repression_list, params)
            gr_list_null = third_order_gr(np.array([0, 0, 0, 0, 0, 0]), repression_list, params)
            for i, sg1 in enumerate(sgRNA_dict[gene1]):
                for j, sg2 in enumerate(sgRNA_dict[gene2]):
                    for k, sg3 in enumerate(sgRNA_dict[gene3]):
                        full_sgRNA = f'{sg1}-{sg2}-{sg3}'
                        gr_trip_dict[full_sgRNA] = gr_list[-1][i, j, k]
                        gr_null_dict[full_sgRNA] = gr_list_null[-1][i, j, k]

### Plot the results of the coupling-sensitive model predictions and Null model predictions

In [None]:
model_plot(gr_mean_rescale_dict, gr_trip_dict, gr_null_dict, [1, 2], figure_path, None)

In [None]:
model_plot(gr_mean_rescale_dict, gr_trip_dict, gr_null_dict, [0], figure_path, ['5C', '5D'])

In [None]:
model_plot(gr_mean_rescale_dict, gr_trip_dict, gr_null_dict, [0, 1, 2, 3], figure_path, None)

### Generate tables and export data

In [None]:
table_s5 = pd.DataFrame(np.full((len(gr_mean_rescale_dict), 7), np.nan), columns=['sgRNA1', 'sgRNA2', 'sgRNA3',
                                            'Growth Rate', 'Growth Rate SEM', 'Model Prediction', 'Null Prediction'])
for i, full_sgRNA in enumerate(gr_mean_rescale_dict):
    sgRNA_split = full_sgRNA.split('-')
    table_s5.loc[i] = [sgRNA_split[0], sgRNA_split[1], sgRNA_split[2], gr_mean_rescale_dict[full_sgRNA],
                      gr_sem_rescale_dict[full_sgRNA], gr_trip_dict[full_sgRNA], gr_null_dict[full_sgRNA]]

In [None]:
with pd.ExcelWriter(f'Supplementary_Tables.xlsx', mode='a', if_sheet_exists='replace') as writer:  
    table_s5.to_excel(writer, sheet_name='Table S5')