In [194]:
import os, re
    
class seq_experiment(object):
    def __init__(self):
        self.seq_samples = []
    
    #function to allow addition of new seq_samples to seq_experiment
    def add_sample(self, seq_sample):
        self.seq_samples.append(seq_sample)
    
    #function to return a set of all genotypes for each seq_sample in seq_experiment
    def unique_genotypes(self):
        return set(seq_sample.genotype for seq_sample in self.seq_samples)
    
    #will take two genotype arguemnts and return all samples with either of those genotypes
    def filter_genotypes(self, geno1, geno2):
        filtered_samples = []
        for seq_sample in self.seq_samples:
            if seq_sample.genotype == geno1 or seq_sample.genotype == geno2:
                filtered_samples.append(seq_sample.make_list())
        return filtered_samples
    
    #using unique genotypes function above, make all possible unique combinations of genotypes
    def geno_comparisons(self):
        comps = []
        for x, y in [(x,y) for x in self.unique_genotypes() for y in self.unique_genotypes()]:
            if x != y and sorted((x,y)) not in comps:
                comps.append(sorted((x,y)))
        return comps
    
    
    def sample_geno_comp(self):
        Filtered = []
        for x in self.geno_comparisons():
            y=self.filter_genotypes(x[0],x[1])
            if y != None:
                Filtered.append(y)
        return Filtered
    
class seq_sample(seq_experiment):
    def __init__(self, seq_name, genotype, path):
        self.seq_name = seq_name
        self.genotype = genotype
        self.path = path
        
    def make_tupple(self):
        return (self.seq_name, self.genotype, self.path)
    
    def compare_genotype(self, geno1, geno2):
        if self.genotype == geno1 or self.genotype == geno2:
            return(self.make_tupple())
        
    def make_list(self):
        return [self.seq_name, self.genotype, self.path]

In [195]:
# write dictionary with sample names as keys and genotype as value 
sample_file = "/Users/manager/mystuff/kallisto/Sample_Report.txt"
sample_list = {}
with open(sample_file) as s:
    next(s) #skip header
    for line in s:
        line = line.split()
        sample_list[line[0].rsplit(".fastq.gz")[0]] = line[19] #make dictionary with sample : genotype


#write table comrised of seq_sample objects each saved to seq_experiment object called experiment
base_dir = "/Users/manager/mystuff/kallisto/results/"     
experiment = seq_experiment()
for dct in os.listdir(base_dir):
    if not re.search(r"\.", dct):
        # build each seq_sample from an individual library to the seq_experiment
        experiment.add_sample(seq_sample(dct, sample_list[dct], base_dir + dct))
        
#get unique genotypes from sequencing samples, make list of all possible comparisons
genotypes = experiment.unique_genotypes() #produces set of unique genotypes in experiment
comparisons = experiment.geno_comparisons() #produces all comparisons for all unique genotypes in experiment
tables = experiment.sample_geno_comp()

print("Genotyes:")
print(genotypes)
print("Genotype comparisons:")
print(comparisons)
print("Sample comparisons by genotype:")
for table in tables:
    for t in table:
        print(t)

Genotyes:
{'trm12', 'wt', 'trm12_tgs1', 'tgs1'}
Genotype comparisons:
[['trm12', 'wt'], ['trm12', 'trm12_tgs1'], ['tgs1', 'trm12'], ['trm12_tgs1', 'wt'], ['tgs1', 'wt'], ['tgs1', 'trm12_tgs1']]
Sample comparisons by genotype:
['s_1_1_ATGTCA', 'trm12', '/Users/manager/mystuff/kallisto/results/s_1_1_ATGTCA']
['s_1_1_CCGTCC', 'trm12', '/Users/manager/mystuff/kallisto/results/s_1_1_CCGTCC']
['s_1_1_GTGGCC', 'wt', '/Users/manager/mystuff/kallisto/results/s_1_1_GTGGCC']
['s_1_1_GTTTCG', 'wt', '/Users/manager/mystuff/kallisto/results/s_1_1_GTTTCG']
['s_1_1_AGTCAA', 'trm12_tgs1', '/Users/manager/mystuff/kallisto/results/s_1_1_AGTCAA']
['s_1_1_AGTTCC', 'trm12_tgs1', '/Users/manager/mystuff/kallisto/results/s_1_1_AGTTCC']
['s_1_1_ATGTCA', 'trm12', '/Users/manager/mystuff/kallisto/results/s_1_1_ATGTCA']
['s_1_1_CCGTCC', 'trm12', '/Users/manager/mystuff/kallisto/results/s_1_1_CCGTCC']
['s_1_1_ATGTCA', 'trm12', '/Users/manager/mystuff/kallisto/results/s_1_1_ATGTCA']
['s_1_1_CCGTCC', 'trm12', '/User