In [96]:
import pandas as pd
import numpy as np
import click

In [55]:
sample_sheet = pd.read_csv("../../../data/samples.txt", sep = ",")
sample_sheet

Unnamed: 0,M07e-J4,M07e-J11,M07e-J18,M07e-J25
0,M07e-J4-1,M07e-J11-1,M07e-J18-1,M07e-J25-1
1,M07e-J4-2,M07e-J11-2,M07e-J18-2,M07e-J25-2
2,M07e-J4-3,M07e-J11-3,M07e-J18-3,M07e-J25-3


In [91]:
def get_all_pairwise_comparaisons(samples_list):
    def compareTuples(t1, t2):
        comparaison = set(t1) & set(t2)
        return (len(comparaison) == len(t1)) & (len(comparaison) == len(t2))


    def tupleInList(t, comparaisons):
        inList = False
        for comparaison in comparaisons:
            if compareTuples(t, comparaison):
                inList = True
                break
        return inList


    def pairwiseComparaisons(samples_list):
#         samples_list = list(samples_dict.keys())
        comparaisons = []
        for i in range(0, len(samples_list)):
            for j in range(0, len(samples_list)):
                if (i != j) and not (tupleInList((i, j), comparaisons)):
                    comparaisons.append((i, j))
        return comparaisons

    comparaisons = pairwiseComparaisons(samples_list)
    return comparaisons

In [95]:
class Design:
    def __init__(self, file, control, pairwise=False):
        self.file = file
        self.sample_sheet = pd.read_csv(file, sep = ",")
        self.control_name = control
        self.conditions = self.sample_sheet.columns
        self.samples_dict = None
        self.replicates = []
        self.design_matrix = None
        
        if pairwise != False:
            self.conditions = pairwise
            
        
    def createSamplesSummary(self):
        self.samples_dict = dict.fromkeys(list(self.conditions))
        for col in self.conditions:
            self.samples_dict[col] = dict.fromkeys(['isControl', 'replicates'])
            self.samples_dict[col]['replicates'] = []
            if col == self.control_name:
                self.samples_dict[col]['isControl'] = 1
            else:
                self.samples_dict[col]['isControl'] = 0
            for replicate in self.sample_sheet[[col]].values:
                self.samples_dict[col]['replicates'].append(replicate[0])


    def getAllReplicates(self):
        for condition in self.conditions:
            self.replicates.extend(self.samples_dict[condition]['replicates'])



    def fillConditions(self):
        for replicate in self.replicates:
            if replicate in self.samples_dict[self.control_name]['replicates']:
                for col in self.conditions:
                    self.design_matrix.at[replicate, col] = 0


    def fillNan(self):
        for condition in self.conditions:
            for replicate in self.samples_dict[condition]['replicates']:
                if condition != self.control_name:
                    self.design_matrix.at[replicate, condition] = 1
        self.design_matrix = self.design_matrix.replace(np.nan,0)

    def get_empty_design_matrix(self):
        dm_cols_name = ['Samples', 'baseline']
        dm_cols_name.extend(list(self.conditions))
        self.design_matrix = pd.DataFrame(columns=dm_cols_name)
        self.design_matrix['Samples'] = self.replicates
        self.design_matrix['baseline'] = 1
        self.design_matrix.index = self.replicates

        
    def create_design_matrix(self):
        self.createSamplesSummary()
        self.getAllReplicates()
        self.get_empty_design_matrix()
        self.fillConditions()
        self.fillNan()

def test():
    file = "../../../data/samples.txt"
    control = "M07e-J25"
    design = Design(file, control, ['M07e-J4', 'M07e-J25'])
    design.create_design_matrix()
    return design
    
design = test()
design.design_matrix

Unnamed: 0,Samples,baseline,M07e-J4,M07e-J25
M07e-J4-1,M07e-J4-1,1,1,0
M07e-J4-2,M07e-J4-2,1,1,0
M07e-J4-3,M07e-J4-3,1,1,0
M07e-J25-1,M07e-J25-1,1,0,0
M07e-J25-2,M07e-J25-2,1,0,0
M07e-J25-3,M07e-J25-3,1,0,0


In [66]:
comparaisons = get_all_pairwise_comparaisons(design.samples_dict)
comparaisons

[(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)]

In [94]:
file = "../../../data/samples.txt"
conditions = list(pd.read_csv(file, sep = ",").columns)
comparaisons = get_all_pairwise_comparaisons(conditions)
for comparaison in comparaisons:
    control = conditions[comparaison[0]]
    treatment = conditions[comparaison[1]]
    print(treatment, 'vs', control)
    design = Design(file, control, [control, treatment])
    design.create_design_matrix()
    print(design.design_matrix)
    print("\n")


M07e-J11 vs M07e-J4
               Samples  baseline  M07e-J4  M07e-J11
M07e-J4-1    M07e-J4-1         1        0         0
M07e-J4-2    M07e-J4-2         1        0         0
M07e-J4-3    M07e-J4-3         1        0         0
M07e-J11-1  M07e-J11-1         1        0         1
M07e-J11-2  M07e-J11-2         1        0         1
M07e-J11-3  M07e-J11-3         1        0         1


M07e-J18 vs M07e-J4
               Samples  baseline  M07e-J4  M07e-J18
M07e-J4-1    M07e-J4-1         1        0         0
M07e-J4-2    M07e-J4-2         1        0         0
M07e-J4-3    M07e-J4-3         1        0         0
M07e-J18-1  M07e-J18-1         1        0         1
M07e-J18-2  M07e-J18-2         1        0         1
M07e-J18-3  M07e-J18-3         1        0         1


M07e-J25 vs M07e-J4
               Samples  baseline  M07e-J4  M07e-J25
M07e-J4-1    M07e-J4-1         1        0         0
M07e-J4-2    M07e-J4-2         1        0         0
M07e-J4-3    M07e-J4-3         1        0         0


In [100]:
design.design_matrix.to_csv("M07e-J25_vs_M07e-J4_design_matrix.txt", index=False, sep="\t")