In [None]:
import pandas as pd
import holoviews as hv
import hvplot.pandas
import matplotlib.pyplot as plt
import seaborn as sns
import toml
import re
import urllib
from datetime import datetime
import string
import pygsheets
import requests
from tqdm.auto import tqdm
import Bio.Restriction as Restriction
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
import benchlingapi

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import paulssonlab.api as api
from paulssonlab.api.util import base_url
import paulssonlab.cloning.workflow as workflow
import paulssonlab.cloning.util as cloning_util
import paulssonlab.cloning.sequence as sequence
import paulssonlab.cloning.golden_gate as golden_gate
import paulssonlab.cloning.codon as codon
import paulssonlab.cloning.optimization as optimization

In [None]:
hv.extension("bokeh")

# Download data

In [None]:
!mkdir -p data/2013sigmafactorswitches_supp
!curl -b does_not_exist -Lo data/2013sigmafactorswitches_supp/supptable1.xlsx "https://www.embopress.org/action/downloadSupplement?doi=10.1038%2Fmsb.2013.58&file=msb201358-sup-0002.xlsx"
!curl -b does_not_exist -Lo data/2013sigmafactorswitches_supp/supptable2.xlsx "https://www.embopress.org/action/downloadSupplement?doi=10.1038%2Fmsb.2013.58&file=msb201358-sup-0003.xlsx"

# Setup

In [None]:
config = toml.load("config.toml")

In [None]:
session = benchlingapi.Session(config["benchling"]["api_key"])

In [None]:
gc = pygsheets.authorize(service_account_file="credentials.json")

In [None]:
col = workflow.get_strain_collection_sheets(gc.drive.service, "LIB")
col

In [None]:
strain_sheet = gc.open_by_key(col["strains"]).worksheet()
plasmid_sheet = gc.open_by_key(col["plasmids"]).worksheet()
part_sheet = gc.open_by_key(col["parts"]).worksheet()
part_type_sheet = gc.open_by_key(col["parts"]).worksheet_by_title("Part types")

In [None]:
drive_service = plasmid_sheet.client.drive.service
plasmid_folder = col["plasmid_maps"]
plasmid_maps = api.google.list_drive(drive_service, root=plasmid_folder)

# 2013 Voigt sigma factors

## Import data

In [None]:
sigmas = pd.read_excel(
    "data/2013sigmafactorswitches_supp/supptable1.xlsx", sheet_name=0, index_col=0
)

In [None]:
antisigmas = pd.read_excel(
    "data/2013sigmafactorswitches_supp/supptable1.xlsx", sheet_name=2, index_col=0
)

In [None]:
sigma_promoters = pd.read_excel(
    "data/2013sigmafactorswitches_supp/supptable1.xlsx", sheet_name=1, index_col=0
)

In [None]:
sigma_orthogonality_mean = pd.read_excel(
    "data/2013sigmafactorswitches_supp/supptable2.xlsx",
    sheet_name=0,
    index_col=0,
    nrows=27,
    skiprows=1,
    usecols="B:CJ",
)
sigma_orthogonality_mean.index.name = "Sigma"
sigma_orthogonality_mean.columns.name = "Antisigma"
sigma_orthogonality_stddev = pd.read_excel(
    "data/2013sigmafactorswitches_supp/supptable2.xlsx",
    sheet_name=0,
    index_col=0,
    nrows=27,
    skiprows=31,
    usecols="B:CJ",
)
sigma_orthogonality_stddev.index.name = "Sigma"
sigma_orthogonality_stddev.columns.name = "Antisigma"

In [None]:
antisigma_orthogonality_mean = pd.read_excel(
    "data/2013sigmafactorswitches_supp/supptable2.xlsx",
    sheet_name=2,
    index_col=0,
    nrows=25,
    skiprows=1,
    usecols="B:AL",
)
antisigma_orthogonality_mean.index.name = "Antisigma"
antisigma_orthogonality_mean.columns.name = "Sigma"
antisigma_orthogonality_stddev = pd.read_excel(
    "data/2013sigmafactorswitches_supp/supptable2.xlsx",
    sheet_name=2,
    index_col=0,
    nrows=25,
    skiprows=29,
    usecols="B:AL",
)
antisigma_orthogonality_stddev.index.name = "Antisigma"
antisigma_orthogonality_stddev.columns.name = "Sigma"

In [None]:
sigma_titration = pd.read_excel(
    "data/2013sigmafactorswitches_supp/supptable2.xlsx",
    sheet_name=1,
    index_col=0,
    header=(0, 1),
    nrows=52,
)

In [None]:
sigma_growth = pd.read_excel(
    "data/2013sigmafactorswitches_supp/supptable2.xlsx",
    sheet_name=3,
    skipfooter=2,
    header=(0, 1, 2),
)
sigma_growth.dropna(axis=1, how="all", inplace=True)
sigma_growth.set_index(sigma_growth.columns[0], inplace=True)
sigma_growth.index.name = "Sigma"

In [None]:
antisigma_growth = pd.read_excel(
    "data/2013sigmafactorswitches_supp/supptable2.xlsx",
    sheet_name=4,
    skipfooter=2,
    header=(0, 1, 2),
)
antisigma_growth.dropna(axis=1, how="all", inplace=True)
antisigma_growth.set_index(antisigma_growth.columns[0], inplace=True)
antisigma_growth.index.name = "Antisigma"

## Orthogonality

In [None]:
sigma_thresh = 50
sigma_subset = sigma_orthogonality_mean.loc[
    (sigma_orthogonality_mean > sigma_thresh).any(axis=1)
]
sigma_subset = sigma_subset.loc[:, (sigma_subset > sigma_thresh).any(axis=0)]

In [None]:
g = sns.clustermap(
    sigma_subset.T, metric="correlation", linewidths=2, cmap="GnBu", figsize=(13, 13)
)
g.ax_col_dendrogram.remove()

In [None]:
plt.figure(figsize=(15, 15))
sns.heatmap(antisigma_orthogonality_mean, annot=True, fmt=".0f")

In [None]:
antisigma_thresh = 30
antisigma_subset = antisigma_orthogonality_mean.loc[
    (antisigma_orthogonality_mean > antisigma_thresh).any(axis=1)
]
antisigma_subset = antisigma_subset.loc[
    :, (antisigma_subset > antisigma_thresh).any(axis=0)
]

In [None]:
g = sns.clustermap(
    antisigma_subset.T,
    metric="correlation",
    linewidths=2,
    cmap="GnBu",
    figsize=(13, 13),
)
g.ax_col_dendrogram.remove()

## Selected sigma factors

In [None]:
sigmas_to_antisigmas = antisigma_orthogonality_mean.idxmax(axis=0).rename("Antisigma")

In [None]:
sigmas_to_antisigmas

In [None]:
selected_sigmas = [
    "ECF03_1198",
    "ECF16_3622",
    "ECF20_992",
    "ECF38_1322",
    "ECF27_4265",
    "ECF22_4450",
    "ECF11_987",
]

In [None]:
sigma_subset = sigma_orthogonality_mean.loc[:, selected_sigmas]
sigma_subset = sigma_subset.loc[sigma_subset.idxmax(axis=0), :]
sns.heatmap(sigma_subset, annot=True, fmt=".0f")

In [None]:
antisigma_subset = antisigma_orthogonality_mean.loc[:, selected_sigmas]
antisigma_subset = antisigma_subset.loc[antisigma_subset.idxmax(axis=0), :]
sns.heatmap(antisigma_subset, annot=True, fmt=".0f")

In [None]:
titration_subset = sigma_titration.loc[selected_sigmas, ["Average promoter activity"]].T
titration_subset.index = titration_subset.index.droplevel()
titration_subset.plot()

In [None]:
promoter_subset = sigma_titration.loc[
    selected_sigmas, ("Unnamed: 1_level_0", "Promoter")
]
promoter_subset

In [None]:
assert set(promoter_subset.values) == set(sigma_subset.index.values)

## Tradeoffs

In [None]:
sigma_max = sigma_orthogonality_mean.max(axis=0).rename("Sigma max")
antisigma_max = antisigma_orthogonality_mean.max(axis=0).rename("Antisigma max")
sigma_growth100 = sigma_growth[
    ("Transition phase assay (8 hr OD)", "Average", "100 ÂµM")
].rename("Sigma growth")
antisigma_growth50 = antisigma_growth[
    ("Transition phase assay (8 hr OD)", "Average", "50 nM")
].rename("Antisigma growth")
antisigma_growth50 = pd.merge(
    sigmas_to_antisigmas, antisigma_growth50, left_on="Antisigma", right_index=True
)
sigma_activationrepression = pd.concat(
    (sigma_max, antisigma_max, sigma_growth100, antisigma_growth50), axis=1
)
sigma_activationrepression["Min growth"] = sigma_activationrepression[
    ["Sigma growth", "Antisigma growth"]
].min(axis=1)
sigma_activationrepression["Selected"] = sigma_activationrepression.index.isin(
    selected_sigmas
)
sigma_activationrepression["Promoter"] = sigma_titration[
    ("Unnamed: 1_level_0", "Promoter")
]
sigma_subset_info = sigma_activationrepression[
    sigma_activationrepression["Selected"] == True
]
sigma_subset_info = sigma_subset_info.reset_index().rename({"index": "Sigma"}, axis=1)

In [None]:
hover_cols = "all"  # ["index", "Antisigma"]

In [None]:
sigma_activationrepression.hvplot.scatter(
    "Sigma growth", "Sigma max", hover_cols=hover_cols
)

In [None]:
sigma_activationrepression.hvplot.scatter(
    "Sigma max",
    "Antisigma max",
    color="Selected",
    cmap="Category10",
    hover_cols=hover_cols,
)

In [None]:
sigma_activationrepression.hvplot.scatter(
    "Sigma growth",
    "Antisigma growth",
    color="Selected",
    cmap="Category10",
    hover_cols=hover_cols,
)

In [None]:
sigma_activationrepression.hvplot.scatter(
    "Sigma max", "Antisigma max", color="Sigma growth", hover_cols=hover_cols
)

In [None]:
sigma_activationrepression.hvplot.scatter(
    "Sigma max", "Antisigma max", color="Antisigma growth", hover_cols=hover_cols
)

In [None]:
sigma_activationrepression.hvplot.scatter(
    "Sigma max", "Antisigma max", color="Min growth", hover_cols=hover_cols
)

In [None]:
sigma_activationrepression

In [None]:
sigma_activationrepression.hvplot.scatter(
    "Sigma growth", "Antisigma growth", hover_cols=hover_cols
)

## Checking antisigma correspondence

In [None]:
promoter_subset

In [None]:
antisigma_orthogonality_mean.loc["AS22_1147"]

In [None]:
antisigma_orthogonality_mean.loc[:, "ECF22_4450"]