In [3]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

# Experimental setup

In [4]:
experiments = pd.read_csv(
    "../data/intermediate/experiment_setup.txt", sep="\t")
samples = list(experiments["sample"])

sample_groups = [samples[0:3],
                 samples[3:6],
                 samples[6:9],
                 samples[9:12],
                 samples[12:15],
                 samples[15:18],
                 samples[18:21],
                 ]
sample_names = ["RPL22_oe_c",
                "RPL22_oe_t",
                "RPL22L1_oe_c",
                "RPL22L1_oe_t",
                "shluc",
                "sh704",
                "sh705"]

# Partitions

## Partition exon psis

In [19]:
splice_types = ["A3", "A5", "AF", "AL", "MX", "RI", "SE"]

psis = [pd.read_csv("../data/intermediate/suppa_exon_psis/" +
                    x+".psi", sep="\t") for x in splice_types]

for splice_type, splice_psis in zip(splice_types, psis):

    for sample_name, sample_group in zip(sample_names, sample_groups):

        sample_psis = splice_psis[sample_group]

        sample_psis.to_csv("../data/intermediate/suppa_exon_psis_grouped/"+splice_type+"_"+sample_name+".psi",
                           sep="\t",
                           na_rep='NA',
                           index_label=False
                           )

exon_psis = pd.concat(psis)

exon_psis.to_hdf("../data/processed/suppa2_exon_psis.hdf", key="exon_psis", mode="w")

## Partition transcript psis

In [20]:
transcript_psis = pd.read_csv(
    "../data/intermediate/suppa_isoforms_isoform.psi", sep="\t")

for sample_name, sample_group in zip(sample_names, sample_groups):

    sample_psis = transcript_psis[sample_group]

    sample_psis.to_csv("../data/processed/grouped_transcript_psis/"+sample_name+".txt",
                       sep="\t",
                       na_rep='NA',
                       index_label=False)

transcript_psis.to_hdf(
    "../data/processed/suppa2_transcript_psis.hdf", key="transcript_psis", mode="w")

## Partition TPMs

In [82]:
suppa_tpms = pd.read_csv("../data/processed/tpm_matrix_suppa.txt",sep="\t")

for sample_name, sample_group in zip(sample_names, sample_groups):
        
        sample_tpms = suppa_tpms[sample_group]
        
        sample_tpms.to_csv("../data/processed/grouped_tpms/"+sample_name+".txt",
                           sep="\t",
                           index_label=False)
        

# Differential splicing results

In [10]:
experiments = ["RPL22_oe", "RPL22L1_oe", "sh704", "sh705"]
splice_types = ["A3", "A5", "AF", "AL", "MX", "RI", "SE"]


def merge_splices(exp):
    merged_splices = []

    for s in splice_types:
        diffs = pd.read_csv("../data/intermediate/suppa_diff_psis/"+s+"_"+exp+".dpsi.temp.0",
                            sep="\t", index_col=0)

        diffs.columns = ["difference", "pval"]
        diffs["type"] = s
        merged_splices.append(diffs)

    return pd.concat(merged_splices, axis=0)


rpl22_oe = merge_splices("RPL22_oe")
rpl22l1_oe = merge_splices("RPL22L1_oe")
sh704 = merge_splices("sh704")
sh705 = merge_splices("sh705")

# for experiment in experiments:

In [13]:
rpl22_oe.to_hdf("../data/processed/exon_psi_diffs/rpl22_oe.hdf",
                key="rpl22_oe", mode="w")
rpl22l1_oe.to_hdf(
    "../data/processed/exon_psi_diffs/rpl22l1_oe.hdf", key="rpl22l1_oe", mode="w")
sh704.to_hdf("../data/processed/exon_psi_diffs/sh704.hdf",
             key="sh704", mode="w")
sh705.to_hdf("../data/processed/exon_psi_diffs/sh705.hdf",
             key="sh705", mode="w")