Skip to content

RNA‐diff

Etienne Kornobis edited this page Sep 7, 2023 · 8 revisions

Interaction

See: https://github.com/tavareshugo/tutorial_DESeq2_contrasts/blob/main/DESeq2_contrasts.md

https://www.bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html

Input

counts_raw.csv can be found in the counts folder in the rnadiff analysis folder.

design_interaction.csv:

label clone infection
A1 CR NI
A2 CR NI
A3 CR NI
C1 K NI
C2 K NI
C3 K NI
E1 CR I
E2 CR I
E3 CR I
G1 K I
G2 K I
G3 K I

Statistical test (R)

library(DESeq2)

counts = read.csv("rnadiff/counts/counts_raw.csv", header=T, check.names=F, row.names=1)
target = read.csv("design_interaction.csv", header=T, row.names=1)

# Selecting samples with temperature test (specific to this project)
#counts = counts[,rownames(target)]

# Define dds and its design
dds = DESeqDataSetFromMatrix(countData=counts, colData=target, design=~1 + clone + infection + clone:infection)
dds = DESeq(dds, fitType="parametric", betaPrior=FALSE)

# get the model matrix
mod_mat <- model.matrix(design(dds), colData(dds))

# Define coefficient vectors for each condition
CR_I = colMeans(mod_mat[dds$clone == "CR" & dds$infection == "I", ])
CR_NI = colMeans(mod_mat[dds$clone == "CR" & dds$infection == "NI", ])
K_I = colMeans(mod_mat[dds$clone == "K" & dds$infection == "I", ])
K_NI = colMeans(mod_mat[dds$clone == "K" & dds$infection == "NI", ])

# Interaction contrast
res <- results(dds, 
                contrast = (K_NI - K_I) - (CR_NI - CR_I),
               alpha=0.05, independentFiltering=TRUE,
               cooksCutoff=TRUE)

summary(res)

# Get only significants
subset(res, padj < 0.05)

# Order by padj and export
res = res[order(res[,"padj"]),]
write.table(res, "rnadiff_interaction.csv",
            row.names=TRUE,
            sep=",")

Interactive plots in Jupyter (Python)

gene_options = list(pd.read_csv("output/rnadiff_interaction.csv")\
               .query("padj < 0.05").index)

counts_df = pd.read_csv("rnadiff/counts/counts_normed.csv") 
design_df = pd.read_csv("design_interaction.csv").set_index("label")

df = pd.concat([counts_df.T,design_df],axis=1)

def interactive_counts(gene_options, counts_df, design_df, annot_df=None):
    """
    gene_option: a list of ids corresponding in ids in the counts matrix
    counts_df: the count matrix
    design_df: the RNA-seq design of the experiment
    annot_df: a pandas DataFrame with ids as index and a MultiIndex column ("annotation", "gene_name")
    """
    @interact(gene=Dropdown(options=gene_options,disabled=False),
          group=Dropdown(options=list(design_df.columns)))
    def plot_counts(gene, group):
        try:
            if gene == '':
                display(counts_df.T)
            else:
                df.sort_values(group, inplace=True)
                sns.swarmplot(x=group, y=gene, data=df)
                plt.xticks(rotation=45)
                if isinstance(annot_df, pd.DataFrame):
                    plt.title(annot_df.loc[gene, ("annotation", "gene_name")])
        except Exception as e:
            print(e)

interactive_counts(gene_options, counts_df, design_df)