In [153]:
#importing needed packages 
import csv 
import matplotlib.pyplot as plt
import math
import pandas as pd

In [154]:
# paths to csv files storing DeSeq2 data 
# this may need to be changed
pathToGuideData = '/home/data/refined/bc_dcis/EJM_Data_Output/Dixit_Analysis/Dixit_N4_Pseudobulk_Count_DeSeq2_Analysis.Controls_vs_SingleGuide.csv'
#pathToGeneData = '/home/data/refined/bc_dcis/EJM_Data_Output/Dixit_Analysis/Dixit_N4_Pseudobulk_Count_DeSeq2_Analysis.JustControls_vs_SingleTargets.csv'
pathToGeneData = '/home/data/refined/bc_dcis/EJM_Data_Output/Dixit_Analysis/Dixit_N4_Pseudobulk_Count_DeSeq2_Analysis.Controls_vs_SingleTargets.csv' 
pathToGeneExpandedData = '/home/data/refined/bc_dcis/EJM_Data_Output/Dixit_Analysis/Dixit_N4_Pseudobulk_Count_DeSeq2_Analysis.Controls_vs_SingleTargets.csv' 
pathToDualGeneData = '/home/data/refined/bc_dcis/EJM_Data_Output/Dixit_Analysis/Dixit_N4_Pseudobulk_Count_DeSeq2_Analysis.DualIntergenic_vs_MultiTargets.csv'

In [155]:
# getter method to get a list of all target genes
# for the deseq2 by gene comparisions, there is a target gene column
# in the case of the dixit data, this was one of 10 genes encoding a transcription factor 
# this method returns a list of all unqiue target genes

def genes():
    
    unique = []
    fileToRead = pathToGeneData 

    with open(fileToRead, 'r') as file:
        csv_reader = csv.reader(file)
        for row in csv_reader:

            if (row[6] != 'NA') and (row[6] != 'perturbation'): # index 6 because the target genes are stored in the 7th column of the table, ignore first header row
                if row[6] not in unique:
                    unique.append(row[6])
    
    return(unique)

In [156]:
# getter method to list all unique dual-perturbatios
# in the dual guide cells only comparisons the 7th column contains different combinations of target genes
# this method finds all unqiue dual-gene-target perturbations and returns a list storing them 

def dualGenes():
    
    dualGenes = []
    fileToRead = pathToDualGeneData

    with open(fileToRead, 'r') as file:
        csv_reader = csv.reader(file)
        for row in csv_reader:

            if (row[6] != 'NA') and (row[6] != 'perturbation'):
                if row[6] not in dualGenes:
                    dualGenes.append(row[6])
    
    return(dualGenes)


In [157]:
# this function allows you to specify a gene and see if there are any dual perturbation defined for it
# i.e. what are the dual perturbations that include a perturbation to parameter gene 
# this function returns a list of all found dual-gene-target perturbations

def findDual(gene):

    found = []
    dualList = dualGenes()
    for element in dualList:
        if gene in element:
            found.append(element)

    return(found)

In [158]:
# given a gene* name this function will return lists of which genes get upregulated and downregulated when gene* is perturbed 
# this is only considering the target gene not specific perturbation 
 
def geneTarget(targetGene):

    upReg = []
    downReg = [] 

    fileToRead = pathToGeneData

    with open(fileToRead, 'r') as file:
        csv_reader = csv.reader(file)
        for row in csv_reader:

            if row[6] == targetGene: #row of index 6 stores target genes, if it matches the passed parameter, we add the log2_fc (stores in row index 3) to one of two lists
                if float(row[3]) < 0: #negative log2_fc values means the perturbation (row index 1) and log2_fc value should be appended to the down regulation list
                    downReg.append((row[1], row[3]))
                if float(row[3]) > 0: #postitive log2_fc values means the perturbation (row index 1) and log2_fc value should be appended to the up regulation list
                    upReg.append((row[1], row[3]))

    if (len(upReg) == 0) and (len(downReg) == 0):
        return 0 # if the passed parameter does not correspond to a defind target gene the function returns 0
    
    else: 
        return ((upReg), (downReg)) # returns lists as a tuple so we can store this information after a function call in a variable 


In [159]:
# a method to get a list of all perturbations specified in the data file for a specified target gene
# the gene parameter means 'list all perturbations that target [gene]' 

def perturbations(gene):
    
    listOfPerturbations = []
    fileToRead = pathToGuideData
    
    with open(fileToRead, 'r') as file:
        csv_reader = csv.reader(file)
        for row in csv_reader:

            if gene in row[6]: #
                if row[6] not in listOfPerturbations:
                    listOfPerturbations.append(row[6])
    
    
    return(listOfPerturbations) # returns a list storing the perturbations that target the gene passed as the parameter

In [160]:
# given a guide* name this function will return a list of which genes get upregulated and downregulated when the perturbation is caused by guide* 
# i.e., when  we have guide x, returns a list of genes that get upregulated, and a list of genes that get downregulated as a result of having that guide/perturbation present

def guideImpact(guideRNA):

    upRegP = []
    downRegP = [] 

    fileToRead = pathToGuideData

    with open(fileToRead, 'r') as file:
        csv_reader = csv.reader(file)
        for row in csv_reader:

            if row[6] == guideRNA: # column index 6 stores different perturbations, if the perturbation matches the passed parameter, we add the name of the gene (that was differentially expressed) to a list
                if float(row[3]) < 0: # if the log2_fc is negative, it means the item should be added to the down regulated list
                    downRegP.append((row[1], row[3]))
                if float(row[3]) > 0: # if the log2_fc is positive, it means the item should be added to the up regulated list
                    upRegP.append((row[1], row[3]))

    if (len(upRegP) == 0) and (len(downRegP) == 0):
        return 0
    
    else: 
        return ((upRegP), (downRegP)) #returns lists as a tuple so we can store this information after a function call in a variable 


In [161]:
# same function as above but specify 2 target genes
# the parameter is a dual gene perturbation (you can print out all the different dual gene perturbations defined in the file using the dualGenes() method 
# when we have perturbation dualPerturb, it returns lists of up and down regulated genes

def dualTarget(dualPerturb):
    
    if dualPerturb != dualPerturb:
        print("The dual-pertubation passed is not found in the data file")
        return 1
    
    upReg = []
    downReg = []

    fileToRead = pathToDualGeneData
    with open(fileToRead, 'r') as file:
        csv_reader = csv.reader(file)
        for row in csv_reader:
            if row[6] == dualPerturb:
                if float(row[3]) < 0:
                    downReg.append((row[1], row[3]))
                if float(row[3]) > 0:
                    upReg.append((row[1], row[3]))
    
    return((upReg, downReg))


In [162]:
# function that compares 2 lists 
# helper method for compareM method
# creates 3 lists: 1 that contains elements only in the first list
# 1 that contains elements only in the second list
# 1 that contains elements that are in both lists 

def compare(list1, list2):

    unique_list1 = []
    unique_list2 = []
    same_list = []
    
    # Check for unique tuples in list1
    for tup1 in list1:
        unique = True
        for tup2 in list2:
            if tup1[0] == tup2[0]:
                unique = False
                break
        if unique:
            unique_list1.append(tup1)
    
    # Check for unique tuples in list2
    for tup2 in list2:
        unique = True
        for tup1 in list1:
            if tup2[0] == tup1[0]:
                unique = False
                break
        if unique:
            unique_list2.append(tup2)
        else:
            same_list.append((tup1[0], tup1[1], tup2[1]))

    return ((unique_list1), (unique_list2), (same_list)) #returns lists of unique genes in the first parameter, list of unique genes in the second parameter, and common genes in both parameters


In [163]:
# mutation1 is going to be a specific gene or perturbation
# if mutation 1 is a specific perturbation put type1 as 'p', if it is a target gene put 't', and if you are intrested in the upregulated list of this mutation put direction as 'u' (or 'd' for downregulated list)
# mutation2 is going to be a specific gene or perturbatipn
# if mutation 2 is a speciifc perturbation put type2 as 'p', if it is a target gene put 't', and if you are intrested in the upregulated list of this mutation put direction as 'u' (or 'd' for downregulated list)

def compareM(mutation1, type1, direction1, mutation2, type2, direction2):
    
    mut1U, mut1D, mut2U, mut2D = [], [], [], []
    u1, u2, s = [], [], []
   
    #list storing information about first mutation
    if type1 == 'p':
        
        mut1U = (guideImpact(mutation1))[0]

          
        
        mut1D = (guideImpact(mutation1))[1]
       

    elif type1 == 't':
        mut1U = (geneTarget(mutation1))[0]
       

        mut1D = (geneTarget(mutation1))[1]
        

    else: 
        print("error: invalid type entered")
        return 0

    #list storing information about second mutation
    if type2 == 'p':
        mut2U = (guideImpact(mutation2))[0]
        

        mut2D = (guideImpact(mutation2))[1]
        

    elif type2 == 't':
        mut2U = (geneTarget(mutation2))[0]
       

        mut2D = (geneTarget(mutation2))[1]
        
            
    else: 
        print("error: invalid type entered")
        return 0


    # case1: type1 = u type2 = u
    # comparing mutation1 upregulated genes and mutation2 upregulated genes
    if (direction1 == 'u') and (direction2 == 'u'):
        u1, u2, s = compare(mut1U, mut2U)[0], compare(mut1U, mut2U)[1], compare(mut1U, mut2U)[2]
        

    # case2: type1 = u type2 = d
    # comparing mutation1 upregulated genes and mutation2 downregulated genes 
    elif (direction1 == 'u') and (direction2 == 'd'):
        u1, u2, s = compare(mut1U, mut2D)[0], compare(mut1U, mut2D)[1], compare(mut1U, mut2D)[2]
        

    # case3: type1 = d type2 = d
    # comparing mutation1 downregulated genes and mutation2 downregulated genes
    elif (direction1 == 'd') and (direction2 == 'd'):
        u1, u2, s = compare(mut1D, mut2D)[0], compare(mut1D, mut2D)[1], compare(mut1D, mut2D)[2]
        

    # case4: type1 = d type2 = u
    # comparing mutation1 downregulated genes and mutation2 upregulated genes 
    elif (direction1 == 'd') and (direction2 == 'u'):
        u1, u2, s = compare(mut1D, mut2U)[0], compare(mut1D, mut2U)[1], compare(mut1D, mut2U)[2]
        
    return((u1), (u2), (s))

In [164]:
# log2_fc convert to ratio helper function
def convert_log2fc_to_ratio(lst):
    converted_lst = []
    for tup in lst:
        gene_id, log2fc = tup
        ratio = 2 ** float(log2fc)
        converted_tup = (gene_id, ratio)
        converted_lst.append(converted_tup)
    return converted_lst

In [165]:
# ratio convert to log2_fc helper function
def convert_ratio_to_log2fc(lst):
 
    converted_lst = []
    for tup in lst:
        gene_id, ratio = tup
        log2fc = math.log2(ratio)
        log2fc = round(log2fc, 4)
        converted_tup = (gene_id, log2fc)
        converted_lst.append(converted_tup)
    return converted_lst

In [166]:
#helper method
def listMerger(list1, list2):

    merged_dict = {}

    # Merge list1 into the dictionary
    for item in list1:
        key, value = item
        if key in merged_dict:
            merged_dict[key] *= float(value)
        else:
            merged_dict[key] = float(value)

    # Merge list2 into the dictionary
    for item in list2:
        key, value = item
        if key in merged_dict:
            merged_dict[key] *= float(value)
        else:
            merged_dict[key] = float(value)

    # Convert the dictionary back into a list of tuples
    merged_list = [(key, value) for key, value in merged_dict.items()]

    return merged_list

In [167]:
#finding genes that get upregulated in one situation, but downregulated in another 
def match(list1, list2):
   
    match = []
    for element in list1:
        gene = element[0]
        for element2 in list2:
            if gene == element2[0]:
                found = (gene, element[1], element2[1])
                match.append(found)
    
    return match


In [168]:
# helper method to create a theoretical merged list 
# it will access the up and down regulated lists for each of perturbation in the dual-perturbation pair
# it will merge the up and down regulated lists so we end up with one upregulated list and one down regulated list
# any shared genes in each list will have their log2_fc added together
# the two lists will be compared to ensure no gene appears in both the up and down regulated list, if it does (i.e. shows up in the shared list) the log2_fc get added together
# move any genes with a postive log2_fc to the upregulated list, move any genes with a negative log2_fc to the downregulated list
# add the log values, this will show the theoretical gene expression we should see when these two perturbations are comnbinded 

def merge(target1, target2):

    target1UP, target1DOWN = [], []
    target2UP, target2DOWN = [], []

    # we are accessing the 2 up regulated and 2 down regulated lists of each of the targets
    # the log2_fc must get converted back to a ratio so if there are any duplicated values we can add them 
    target1UP = geneTarget(target1)[0]
    target1UP = convert_log2fc_to_ratio(target1UP)
    target1DOWN = geneTarget(target1)[1]
    target1DOWN = convert_log2fc_to_ratio(target1DOWN)
    target2UP = geneTarget(target2)[0]
    target2UP = convert_log2fc_to_ratio(target2UP)
    target2DOWN = geneTarget(target2)[1]
    target2DOWN = convert_log2fc_to_ratio(target2DOWN)

    # create a theoretical up regulated and down regulate list of genes if effects of the perturbations were purely linear
    mergedUp = listMerger(target1UP, target2UP)
    mergedDown = listMerger(target1DOWN, target2DOWN)


    # change the ratios back to log2_fc
    mergedUp = convert_ratio_to_log2fc(mergedUp)
    mergedDown = convert_ratio_to_log2fc(mergedDown)

    
    # make sure we only have postive log2_fc tuples in the mergeUp list, and only negative log2_fc tuples in the mergeDown list, remove 0 
    for gene in mergedUp:
        if gene[1] < 0:
            mergedDown.append(gene)
            mergedUp.remove(gene)
        elif gene[1] == 0:
            mergedUp.remove(gene)
    
    for gene in mergedDown:
        if gene[1] > 0:
            mergedUp.append(gene)
            mergedDown.remove(gene)
        elif gene[1] == 0:
            mergedDown.remove(gene)

    return (mergedUp, mergedDown)

In [169]:
# for comparing a dual target to its individuals targets
# compare the up and down regulated lists from the theoretical target merge to the real dual-target data (up and down regulated lists)
# error checking so we only perform this on dual-targets that are defined

def analyze(dualGeneP):

    if dualGeneP not in dualGenes():
        print("error: perturbation not found")
        return 1
    
    # get the 2 individual gene targets
    target1 = ''
    target2 = ''
    for gene in genes():
        if gene in dualGeneP:
            if target1 == '':
                target1 = gene
            elif target2 == '':
                target2 = gene
   

    theoreticalUp, theoreticalDown = merge(target1, target2)[0], merge(target1, target2)[1]
    actualUp, actualDown = dualTarget(dualGeneP)[0], dualTarget(dualGeneP)[1]

    # this returns 3 lists: 
    # genes that only get upregulated in the theoretical situation
    # genes that only get upregulated in the real situation
    # genes that get upregulated in both situations 
    theoU, realU, sameU = compare(theoreticalUp, actualUp)[0], compare(theoreticalUp, actualUp)[1], compare(theoreticalUp, actualUp)[2]

    # this returns 3 lists: 
    # genes that only get downregulated in the theoretical situation
    # genes that only get downregulated in the real situation
    # genes that get downregulated in both situations 
    theoD, realD, sameD = compare(theoreticalDown, actualDown)[0], compare(theoreticalDown, actualDown)[1], compare(theoreticalDown, actualDown)[2]


    #see if any theoretically go up, but actually go down
    sameUD = compare(theoreticalUp, actualDown)[2]
    #see if any theoretically go down, but actually go up
    sameDU = compare(theoreticalDown, actualUp)[2]

    return ((theoU), (realU), (sameU), (theoD), (realD), (sameD), (sameUD), (sameDU))

In [170]:
# get number of theoretically DEGs 

def getTLength(dualPerturb):
        
    if dualPerturb not in dualGenes():
        print("error: perturbation not found")
        return 1
    
    # get the 2 individual gene targets
    target1 = ''
    target2 = ''
    for gene in genes():
        if gene in dualPerturb:
            if target1 == '':
                target1 = gene
            elif target2 == '':
                target2 = gene
    
    upMergeL, downMergeL = len(merge(target1, target2)[0]), len(merge(target1, target2)[1])

    return (upMergeL, downMergeL)


In [171]:
#for getting single fold change
def getSignleFC(dualPerturb, gene1):

    if dualPerturb not in dualGenes():
        print("error: perturbation not found")
        return 1
    
    # get the 2 individual gene targets
    target1 = ''
    target2 = ''
    for gene in genes():
        if gene in dualPerturb:
            if target1 == '':
                target1 = gene
            elif target2 == '':
                target2 = gene

    value1 = 'NA'
    value2 = 'NA'

    # checking target1 up regulated genes
    for element in geneTarget(target1)[0]:
        if element[0] == gene1:
            value1 = element[1]
    
    # checking target2 up regulated genes
    for element in geneTarget(target2)[0]:
        if element[0] == gene1:
            value2 = element[1]

    # checking target1 down regulated genes:
    for element in geneTarget(target1)[1]:
        if element[0] == gene1:
            value1 = element[1]

    # checking target2 down regulated genes:
    for element in geneTarget(target2)[1]:
        if element[0] == gene1:
            value2 = element[1]
    
    return(value1, value2)



In [172]:
# for testing

def opposite(dualPerturb):
    if dualPerturb not in dualGenes():
        print("error: perturbation not found")
        return 1
    
    # get the 2 individual gene targets
    target1 = ''
    target2 = ''
    for gene in genes():
        if gene in dualPerturb:
            if target1 == '':
                target1 = gene
            elif target2 == '':
                target2 = gene

    upMerge, downMerge = (merge(target1, target2)[0]), (merge(target1, target2)[1])
    same = []
    
    for element in upMerge:
        for element2 in downMerge:
            if element[0] == element2[0]:
                same.append((dualPerturb, element[0]))

    return same


In [173]:
#to generate a text file comparing perturbations that target the same guide
def perturbationsCompare():
        
    for gene in genes():
        with open('comparingGuidesUP.txt', 'a') as file1:
            for i in range(len(perturbations(gene))):
                for j in range(i+1, len(perturbations(gene))):
                    u1 = compareM(perturbations(gene)[i], 'p', 'u', perturbations(gene)[j], 'p', 'u')[0]
                    u2 = compareM(perturbations(gene)[i], 'p', 'u', perturbations(gene)[j], 'p', 'u')[1]
                    s = compareM(perturbations(gene)[i], 'p', 'u', perturbations(gene)[j], 'p', 'u')[2]
                    print("comparing up regulation of", perturbations(gene)[i],  " and ", perturbations(gene)[j], ": ", file=file1)
                    print("the unqiue", len(u1), " elements in ", perturbations(gene)[i], file=file1)
                    print(u1, file=file1)
                    print("the unique", len(u2), " elements in ", perturbations(gene)[j], file=file1)
                    print(u2, file=file1)
                    print("the shared", len(s), " elements: ", file=file1)
                    print(s, "\n\n", file=file1)
        
        with open('comparingGuidesDOWN.txt', 'a') as file2:
            for i in range(len(perturbations(gene))):
                for j in range(i+1, len(perturbations(gene))):
                    u1 = compareM(perturbations(gene)[i], 'p', 'd', perturbations(gene)[j], 'p', 'd')[0]
                    u2 = compareM(perturbations(gene)[i], 'p', 'd', perturbations(gene)[j], 'p', 'd')[1]
                    s = compareM(perturbations(gene)[i], 'p', 'd', perturbations(gene)[j], 'p', 'd')[2]
                    print("comparing down regulation of", perturbations(gene)[i],  " and ", perturbations(gene)[j], ": ", file=file2)
                    print("the unqiue", len(u1), " elements in ", perturbations(gene)[i], file=file2)
                    print(u1, file=file2)
                    print("the unique", len(u2), " elements in ", perturbations(gene)[j], file=file2)
                    print(u2, file=file2)
                    print("the shared", len(s), " elements: ", file=file2)
                    print(s, "\n\n", file=file2)
        
        with open('comparingGuidesUP_DOWN.txt', 'a') as file3:
            for i in range(len(perturbations(gene))):
                for j in range(i+1, len(perturbations(gene))):
                    u1 = compareM(perturbations(gene)[i], 'p', 'u', perturbations(gene)[j], 'p', 'd')[0]
                    u2 = compareM(perturbations(gene)[i], 'p', 'u', perturbations(gene)[j], 'p', 'd')[1]
                    s = compareM(perturbations(gene)[i], 'p', 'u', perturbations(gene)[j], 'p', 'd')[2]
                    print("comparing up regulation of ", perturbations(gene)[i],  " and down regulation of ", perturbations(gene)[j], ": ", file=file3)
                    print("the unqiue", len(u1), " elements in ", perturbations(gene)[i], file=file3)
                    print(u1, file=file3)
                    print("the unique", len(u2), " elements in ", perturbations(gene)[j], file=file3)
                    print(u2, file=file3)
                    print("the shared", len(s), " elements: ", file=file3)
                    print(s, "\n\n", file=file3)  
        

        with open('comparingGuidesDOWN_UP.txt', 'a') as file4:
            for i in range(len(perturbations(gene))):
                for j in range(i+1, len(perturbations(gene))):
                    u1 = compareM(perturbations(gene)[i], 'p', 'd', perturbations(gene)[j], 'p', 'u')[0]
                    u2 = compareM(perturbations(gene)[i], 'p', 'd', perturbations(gene)[j], 'p', 'u')[1]
                    s = compareM(perturbations(gene)[i], 'p', 'd', perturbations(gene)[j], 'p', 'u')[2]
                    print("comparing down regulation of ", perturbations(gene)[i],  " and up regulation of ", perturbations(gene)[j], ": ", file=file4)
                    print("the unqiue", len(u1), " elements in ", perturbations(gene)[i], file=file4)
                    print(u1, file=file4)
                    print("the unique", len(u2), " elements in ", perturbations(gene)[j], file=file4)
                    print(u2, file=file4)
                    print("the shared", len(s), " elements: ", file=file4)
                    print(s, "\n\n", file=file4)


perturbationsCompare()


In [174]:
# main function 

def main():

# base methods testing: 

    # this lists out all the target genes specified in the file, (the items in this list can be passed to the geneTarget function)
    #print(genes())
    

    # this method lists out all the unique combinations of dual-gene targets in the specified file (path stored in var 'pathToDualGeneData')
    #print(dualGenes())


    # this method is used to find is a specific dual-gene perturbation exists in the data, specifying one of the genes in the pair
    #print(findDual("ELF1")) #example of a gene that is defined as a dual pair in the data
    #print(findDual("ELF1Z")) #example of a gene that is not defined as a dual pair in the data


    # this method takes in a gene* name as a parameter and returns 2 lists which are the upregulated and downregulated genes as a result of the perturbation to the gene* 
    #print("upregulated genes: ")
    #print(geneTarget("YY1")[0])
    #print("downregulated genes: ")
    #print(geneTarget("YY1")[1])


    # this lists all perturbations that are associated with a specific gene 
    #print(len(perturbations("YY1")), " items found")
    #print(perturbations("YY1"))


    # this takes in a name of a specific perturbation and returns a list of upregulated genes, and a list of downregulated genes as a result of the perturbaiton 
    #print("upregulated genes: ")
    #print(guideImpact("p-sgELF1-2")[0])
    #print("downregulated genes: ")
    #print(guideImpact("p-sgELF1-2")[1])


    # this method does the same thing as geneTarget method and guideImpact methods, expect it takes in a specific dual-guide as a parameter
    #print(dualTarget("ELF1-ELK1"))



#comparision methods testing:

    # comparing specific situations (not for dual-targets)
    # example: i want to compare which genes get upregulated when ELF1 is targeted, and which genes get upregulated when CREB1 is targeted
    # mutation, type (either p or t) and direction (either u or d) as parameters 
    # note: the last list output (for shared elements) the elements in the list are tuples in the form (gene, log2_fc associated with first mutation parameter, log2_fc associated with second mutation parameter)
    #compareM("CREB1", 't', 'u', "CREB1", 't', 'u')
    #compareM("CREB1", 't', 'u', "CREB1", 't', 'u')
    #compareM("YY1", 't', 'u', "CREB1", 't', 'u')


    # same thing as above put this example is for specific perturbations 
    # note YY1 only has two guides associated with it (found using print(perturbations("YY1"))) good example is seeing what happens when you change the 3rd and 6th parameters
    #compareM("p-sgYY1-3", 'p', 'u', 'p-sgYY1-10', 'p', 'u') 

    # for all dual-target perturbations defined, a theoretical DGE list is generated if the two gene impacts added in a linear way
    # so we obtain a list of theoretically up regulated genes (union of perturbation1 and perturbation2 upregulations), and same for the down regulated genes
    # if there are any genes that are expressed in both lists, there log2_fc are combinded (log addition is performed)
    # these lists are than compared to the up regulated and down regulated gene lists for the sitation in which the real dual-target perturbation was delivered
    # up regulated matches means the gene listed was up regulated in both situtations (gene, theoretical expression, real expression)
    
    # this information is then stored in a dictonary, and then converted to a csv file 
    upMatchDict = {}
    downMatchDict = {}
    tUp_aDownDict = {}
    tDown_aUpDict = {}
    



    # for genes that were up regulated in both cases
    for element in dualGenes():
            if len(analyze(element)[2]) != 0: #ignoring the empty lines
                upMatchDict[element] = analyze(element)[2]


    with open('upMatchDict.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['dual-target', 'gene', 'e log2_fc', 'a log2_fc', 'FC1', 'FC2', 'numEstim', 'numDual', 'numSame'])
        for key, values in upMatchDict.items():
            for value in values:
                writer.writerow([key] + list(value) + [getSignleFC(key,value[0])[0]] + [getSignleFC(key,value[0])[1]] + [getTLength(key)[0]] + [len(dualTarget(key)[0])] + [len(values)])
                




    # for genes that were down regulated in both vases
    for element in dualGenes():
        if len(analyze(element)[5]) != 0:
            downMatchDict[element] = analyze(element)[5]

    with open('downMatchDict.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['dual-target', 'gene', 'e log2_fc', 'a log2_fc', 'FC1', 'FC2', 'numEstim', 'numDual', 'numSame'])
        for key, values in downMatchDict.items():
            for value in values:
                writer.writerow([key] + list(value) + [getSignleFC(key,value[0])[0]] + [getSignleFC(key,value[0])[1]] + [getTLength(key)[1]] + [len(dualTarget(key)[1])]+ [len(values)])




    # for genes that were up regulated theoretically, but actually down regulated with both guides present  
    for element in dualGenes():
        if len(analyze(element)[6]) != 0:
            tUp_aDownDict[element] = analyze(element)[6]
    
    with open('tUp_aDownDict.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['dual-target', 'gene', 'e log2_fc', 'a log2_fc', 'FC1', 'FC2', 'numEstim', 'numDual', 'numSame'])
        for key, values in tUp_aDownDict.items():
            for value in values:
                writer.writerow([key] + list(value) + [getSignleFC(key,value[0])[0]] + [getSignleFC(key,value[0])[1]] + [getTLength(key)[0]] + [len(dualTarget(key)[1])] + [len(values)])



    # for genes that were down regulated theoretically, but actually up regulated with both guides present
    for element in dualGenes():
        if len(analyze(element)[7]) != 0:
            tDown_aUpDict[element] = analyze(element)[7]

    with open('tDown_aUpDict.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['dual-target', 'gene', 'e log2_fc', 'a log2_fc', 'FC1', 'FC2', 'numEstim', 'numDual', 'numSame'])
        for key, values in tDown_aUpDict.items():
            for value in values:
                writer.writerow([key] + list(value) + [getSignleFC(key,value[0])[0]] + [getSignleFC(key,value[0])[1]] + [getTLength(key)[1]] + [len(dualTarget(key)[0])] + [len(values)])

    # printing out whats in dictonaries
    print("up regulated matches: ")
    df1 = pd.read_csv('upMatchDict.csv')
    print(df1, "\n\n")
    print("down regulated matches: ")
    df2 = pd.read_csv('downMatchDict.csv')
    print(df2, "\n\n")
    print("theoretically up regulated, actually down regulated: ")
    df3 = pd.read_csv('tUp_aDownDict.csv')
    print(df3, "\n\n")
    print("theoretically down regulated, actually up regulated: ")
    df4 = pd.read_csv('tDown_aUpDict.csv')
    print(df4, "\n\n")


    #any dual targets that have one of their targets up regulate a gene, but the other target down regulates it 
    print("checking opposite: ")
    for element in dualGenes():
        if len(opposite(element)) != 0:
            print(opposite(element)[0])
    
    for element in geneTarget("NR2C2")[0]:
        if element[0] == "DLK1":
            print("up regulated by NR2C2: ", element[1])
    for element in geneTarget("NR2C2")[1]:
        if element[0] == "DLK1":
            print("down regulated by NR2C2: ", element[1])
    for element in geneTarget("YY1")[0]:
        if element[0] == "DLK1":
            print("up regulated by YY1: ", element[1])
    for element in geneTarget("YY1")[1]:
        if element[0] == "DLK1":
            print("down regulated by YY1: ", element[1])
    
    
    print("dual expression of DLK1 by NR2C2-YY1: ", end='')
    found = False
    for element in dualTarget("NR2C2-YY1")[0]:
        if element[0] == "DLK1":
            print(element[1])
            found = True
    
    for element in dualTarget("NR2C2-YY1")[1]:
            if element[0] == "DLK1":
                print(element[1])
                found = True
    
    if found == False:
        print("0")

    return 
    
main()

up regulated matches: 
  dual-target   gene  e log2_fc  a log2_fc     FC1     FC2  numEstim  numDual  \
0   ELF1-ELK1    CA1     0.2618     1.4162  0.2618     NaN        13       12   
1   ELF1-E2F4   HBE1     0.2824     0.5193     NaN  0.2824        24        9   
2   ELF1-E2F4   MT1X     0.6206     1.0896     NaN  0.6206        24        9   
3    EGR1-YY1   DLK1     0.3366     0.4258     NaN  0.3366        65        6   
4   GABPA-YY1   DLK1     0.3366     0.6108     NaN  0.3366        69       26   
5    YY1-E2F4  KLHL8     0.2663     1.3177  0.2663     NaN        81       16   
6    YY1-E2F4  MS4A3     0.2335     0.9063     NaN  0.2335        81       16   
7    YY1-E2F4   DLK1     0.3366     0.5664  0.3366     NaN        81       16   
8    YY1-E2F4   MT1X     0.6206     2.1446     NaN  0.6206        81       16   

   numSame  
0        1  
1        2  
2        2  
3        1  
4        1  
5        4  
6        4  
7        4  
8        4   


down regulated matches: 
  dual-t