In [182]:
#importing needed packages 
import csv # built in python csv reader
import matplotlib.pyplot as plt
from matplotlib_venn import venn3


In [183]:
# paths to csv files storing DeSeq2 data 
# this may need to be changed!
pathToGuideData = '/home/data/refined/bc_dcis/EJM_Data_Output/Dixit_Analysis/Dixit_N4_Pseudobulk_Count_DeSeq2_Analysis.Controls_vs_SingleGuide.csv'
pathToGeneData = '/home/data/refined/bc_dcis/EJM_Data_Output/Dixit_Analysis/Dixit_N4_Pseudobulk_Count_DeSeq2_Analysis.JustControls_vs_SingleTargets.csv'
pathToGeneExpandedData = '/home/data/refined/bc_dcis/EJM_Data_Output/Dixit_Analysis/Dixit_N4_Pseudobulk_Count_DeSeq2_Analysis.Controls_vs_SingleTargets.csv' 
pathToDualGeneData = '/home/data/refined/bc_dcis/EJM_Data_Output/Dixit_Analysis/Dixit_N4_Pseudobulk_Count_DeSeq2_Analysis.DualIntergenic_vs_MultiTargets.csv'

In [184]:
# return a list of all target genes specified in the data file

def genes():
    
    unique = []
    fileToRead = pathToGeneData 

    with open(fileToRead, 'r') as file:
        csv_reader = csv.reader(file)
        for row in csv_reader:

            if (row[6] != 'NA') and (row[6] != 'perturbation'):
                if row[6] not in unique:
                    unique.append(row[6])
    
    print(len(unique), " items found")
    print(unique)
    return(unique)

In [185]:
# getter method to list all unique dual-perturbatios 

def dualGenes():
    
    dualGenes = []
    fileToRead = pathToDualGeneData

    with open(fileToRead, 'r') as file:
        csv_reader = csv.reader(file)
        for row in csv_reader:

            if (row[6] != 'NA') and (row[6] != 'perturbation'):
                if row[6] not in dualGenes:
                    dualGenes.append(row[6])
    
    print(len(dualGenes), " items found")
    print(dualGenes)
    return(dualGenes)


In [186]:
# function so you can specify one gene and see if there is any dual perturbation defined for it
# i.e. what are the dual perturbations that include a perturbation to parameter gene 

def findDual(gene):

    found = []

    print("all dual-gene perturbations: ")
    dualList = dualGenes()
    for element in dualList:
        if gene in element:
            found.append(element)

    print("dual perturbations including a perturbation of the specified gene: ")
    print(len(found), " items found")
    print(found)
    return(found)

In [187]:
# given a gene (1) name this function will return a list of which genes get upregulated and downregulated when gene (1) is perturbed 
# this is only considering the target gene not specific perturbation 
 

def geneTarget(targetGene):

    upReg = []
    downReg = [] 

    fileToRead = pathToGeneData

    with open(fileToRead, 'r') as file:
        csv_reader = csv.reader(file)
        for row in csv_reader:

            if row[6] == targetGene:
                if float(row[3]) < 0:
                    downReg.append((row[1], row[3]))
                if float(row[3]) > 0:
                    upReg.append((row[1], row[3]))

    if (len(upReg) == 0) and (len(downReg) == 0):
        return 0
    
    else: 
        return ((upReg), (downReg)) #returns lists as a tuple so we can store this information after a function call in a variable 




In [188]:
# return a list of all perturbations specified in the data file 
# the gene parameter means 'list all perturbations that target [gene]' 

listOfPerturbations = []
def perturbations(gene):
    
    fileToRead = pathToGuideData
    

    with open(fileToRead, 'r') as file:
        csv_reader = csv.reader(file)
        for row in csv_reader:

            if gene in row[6]:
                if row[6] not in listOfPerturbations:
                    listOfPerturbations.append(row[6])
    
    
    return(listOfPerturbations)

In [189]:
# given a gene (1) name this function will return a list of which genes get upregulated and downregulated when gene (1) is perturbed 
# this is only considering the target gene not specific perturbation 
 
upRegP = []
downRegP = [] 

def guideImpact(guideRNA):

    fileToRead = pathToGuideData

    with open(fileToRead, 'r') as file:
        csv_reader = csv.reader(file)
        for row in csv_reader:

            if row[6] == guideRNA:
                if float(row[3]) < 0:
                    downRegP.append((row[1], row[3]))
                if float(row[3]) > 0:
                    upRegP.append((row[1], row[3]))

    if (len(upRegP) == 0) and (len(downRegP) == 0):
        return 0
    
    else: 
        return ((upRegP), (downRegP)) #returns lists as a tuple so we can store this information after a function call in a variable 




In [190]:
# same function as above but specify 2 target genes
# the parameter is a dual gene perturbation (you can print out all the different dual gene perturbations defined in the file using the dualGenes() method )

def dualTarget(dualPerturb):
    
    if dualPerturb != dualPerturb:
        print("The dual-pertubation passed is not found in the data file")
        return 1
    
    upReg = []
    downReg = []

    fileToRead = pathToDualGeneData
    with open(fileToRead, 'r') as file:
        csv_reader = csv.reader(file)
        for row in csv_reader:
            if row[6] == dualPerturb:
                if float(row[3]) < 0:
                    downReg.append((row[1], row[3]))
                if float(row[3]) > 0:
                    upReg.append((row[1], row[3]))
    
    print("The genes that got upregulated as a result of the dual perturbation (as well as the corresponding log2_fc values) include: ")
    print(upReg)
    print("The genes that got downregulated as a result of the dual perturbation (as well as the corresponding log2_fc values) include: ")
    print(downReg)
    return((upReg, downReg))


In [191]:
# function that compares 2 lists 
# creates 3 lists: 1 that contains elements only in the first list
# 1 that contains elements only in the second list
# 1 that contains elements that are in both lists 

def compare(list1, list2):

    unique_list1 = []
    unique_list2 = []
    same_list = []
    
    # Check for unique tuples in list1
    for tup1 in list1:
        unique = True
        for tup2 in list2:
            if tup1[0] == tup2[0]:
                unique = False
                break
        if unique:
            unique_list1.append(tup1)
    
    # Check for unique tuples in list2
    for tup2 in list2:
        unique = True
        for tup1 in list1:
            if tup2[0] == tup1[0]:
                unique = False
                break
        if unique:
            unique_list2.append(tup2)
        else:
            same_list.append((tup1[0], tup1[1], tup2[1]))

    
    
    return ((unique_list1), (unique_list2), (same_list))


In [192]:
# mutation1 is going to be a specific gene or perturbation
# if mutation 1 is a specific perturbation put type1 as 'p', if it is a target gene put 't', and if you are intrested in the upregulated list of this mutation put direction as 'u' (or 'd' for downregulated list)
# mutation2 is going to be a specific gene or perturbatipn
# if mutation 2 is a speciifc perturbation put type2 as 'p', if it is a target gene put 't', and if you are intrested in the upregulated list of this mutation put direction as 'u' (or 'd' for downregulated list)

def compareM(mutation1, type1, direction1, mutation2, type2, direction2):
    
    mut1U = []
    mut1D = []
    mut2U = []
    mut2D = []

    u1 = []
    u2 = []
    s = []

    s1 = ""
    s2 = ""

    #list storing information about first mutation
    if type1 == 'p':
        
        mut1U = (guideImpact(mutation1))[0]
        if direction1 == 'u':
            print("perturbation ", mutation1, " upregulated: ")
            print(mut1U)
            print("\n")
        
        mut1D = (guideImpact(mutation1))[1]
        if direction1 == 'd':
            print("perturbation ", mutation1, " downregulated: ")
            print(mut1D)
            print("\n")

    elif type1 == 't':
        mut1U = (geneTarget(mutation1))[0]
        if direction1 == 'u':
            print("target ", mutation1, " upregulated: ")
            print(mut1U)
            print("\n")

        mut1D = (geneTarget(mutation1))[1]
        if direction1 == 'd':
            print("target ", mutation1, " downregulated: ")
            print(mut1D)
            print("\n")

    else: 
        print("error: invalid type entered")
        return 0

    #list storing information about second mutation
    if type2 == 'p':
        mut2U = (guideImpact(mutation2))[0]
        if direction2 == 'u':
            print("perturbation ", mutation2, " upregulated: ")
            print(mut2U)
            print("\n")

        mut2D = (guideImpact(mutation2))[1]
        if direction2 == 'd':
            print("perturbation ", mutation2, " downregulated: ")
            print(mut2D)
            print("\n")

    elif type2 == 't':
        mut2U = (geneTarget(mutation2))[0]
        if direction2 == 'u':
            print("target ", mutation2, " upregulated: ")
            print(mut2U)
            print("\n")

        mut2D = (geneTarget(mutation2))[1]
        if direction2 == 'd':
            print("target ", mutation2, " downregulated: ")
            print(mut2D)
            print("\n")
    else: 
        print("error: invalid type entered")
        return 0


    # case1: type1 = u type2 = u
    # comparing mutation1 upregulated genes and mutation2 upregulated genes
    if (direction1 == 'u') and (direction2 == 'u'):
        u1, u2, s = compare(mut1U, mut2U)[0], compare(mut1U, mut2U)[1], compare(mut1U, mut2U)[2]
        

    # case2: type1 = u type2 = d
    # comparing mutation1 upregulated genes and mutation2 downregulated genes 
    elif (direction1 == 'u') and (direction2 == 'd'):
        u1, u2, s = compare(mut1U, mut2D)[0], compare(mut1U, mut2D)[1], compare(mut1U, mut2D)[2]
        

    # case3: type1 = d type2 = d
    # comparing mutation1 downregulated genes and mutation2 downregulated genes
    elif (direction1 == 'd') and (direction2 == 'd'):
        u1, u2, s = compare(mut1D, mut2D)[0], compare(mut1D, mut2D)[1], compare(mut1D, mut2D)[2]
        

    # case4: type1 = d type2 = u
    # comparing mutation1 downregulated genes and mutation2 upregulated genes 
    elif (direction1 == 'd') and (direction2 == 'u'):
        u1, u2, s = compare(mut1D, mut2U)[0], compare(mut1D, mut2U)[1], compare(mut1D, mut2U)[2]
        
    print("\n")
    print("******************************************************************************", end='')
    print("******************************************************************************", end='')
    print("******************************************************************************", end='')
    print("The unique elements in the first list are: ")
    print(u1)
    print("\n")
    print("The unique elements in the second list are: ")
    print(u2)
    print("\n")
    print("The elements that are in both lists with the format (gene, log2_FC for first list, log2_FC for second list)")
    print(s)

In [193]:
#for comparing a dual target to its individuals targets

In [None]:
# for chart generation 

In [194]:
#search a list 

In [195]:
# main function 

def main():

    # this lists out all the target genes specified in the file, (the items in this list can be pased to the geneTarget function)
    #genes()
    #print("\n")
    
    #print(geneTarget("ELF1"))
    #geneTarget("CREB1")

    # specify a target gene and get a list of genes that get upregulated and downregulated when that gene is perturbed
    targetGene1 = "YY1"
    print(geneTarget(targetGene1)[0])
    print(geneTarget(targetGene1)[1])

    """
    if (len(upReg) != 0) and (len(downReg) != 0):
        print("The genes that got upregulated as a result of the target " , targetGene1, " (as well as the corresponding log2_fc values) include: ")
        print(upReg)
        print("The genes that got downregulated as a result of the target ", targetGene1,  " (as well as the corresponding log2_fc values) include: ")
        print(downReg)
    else: 
        print(print("The specified perturbation was not found in the data"))
    
    print("\n")
    

    targetGene2 = "YY1"
    geneTarget(targetGene2)
    

   if (len(upReg) != 0) and (len(downReg) != 0):
        print("The genes that got upregulated as a result of the target " , targetGene2, " (as well as the corresponding log2_fc values) include: ")
        print(upReg)
        print("The genes that got downregulated as a result of the target ", targetGene2,  " (as well as the corresponding log2_fc values) include: ")
        print(downReg)
    else: 
        print(print("The specified perturbation was not found in the data"))
    """




    # this lists all perturbations that are associated with a specific gene 
    #perturbations('ELF1')
    #print(len(listOfPerturbations), " items found")
    #print(listOfPerturbations)

    """
    # this lits the up and down regulation effects of a specific perturbation
    targetPerturb1 = "p-sgELF1-2"
    guideImpact(targetPerturb1)


    if (len(upRegP) != 0) and (len(downRegP) != 0):
        print("The genes that got upregulated as a result of the perturbation " , targetPerturb1, " (as well as the corresponding log2_fc values) include: ")
        print(upRegP)
        print("The genes that got downregulated as a result of the perturbation ", targetPerturb1,  " (as well as the corresponding log2_fc values) include: ")
        print(downRegP)
    else: 
        print(print("The specified perturbation was not found in the data"))


    """

    # this lists all the dual gene perturbations that are defined in the file specifed at the top of the file (pathToDualGeneData)
    #dualGenes()


    # this lists all dual gene perturbations defined that include a perturbation to the gene passed as a parameter 
    #find = 'NR2C2'
    #findDual(find)


    # this lists all the genes that get upregulated and downregulated as a result of dual gene perturbation
    #dualTargetgene = "ELF1-CREB1"
    #dualTarget(dualTargetgene)


    # comparing lists 
    #example: i want to compare which genes get upregulated when ELF1 is targeted, and which genes get upregulated when CREB1 is targeted
    #mutation, type (either p or t) and direction (either u or d) as parameters 
    compareM("ELF1", 't', 'd', "ELF1", 't', 'u')
    
    return
    
main()

[('SPRR2D', '2.5086'), ('RCSD1', '0.2538'), ('FAM89A', '0.2652'), ('TMSB10', '0.2424'), ('H1FX', '0.3314'), ('LINC01088', '0.5153'), ('CXCL14', '0.3507'), ('RNF130', '0.3601'), ('HIST1H4C', '0.2194'), ('GTF2H5', '0.2155'), ('BRI3', '0.2413'), ('MAGED1', '0.735'), ('RPL39', '0.2492'), ('VIM', '0.2018'), ('SNCG', '0.292'), ('TAF10', '0.206'), ('POLD4', '0.2433'), ('ACRBP', '0.4237'), ('C12orf57', '0.2103'), ('CDKN3', '0.2302'), ('DLK1', '0.2343'), ('CBX1', '0.2764'), ('RP11-433M22.2', '4.5965'), ('C18orf56', '0.2216'), ('TYROBP', '0.3731'), ('TRAPPC6A', '0.2497'), ('PRR24', '0.2609'), ('ATF5', '0.2316'), ('C19orf81', '0.212')]
[('CDK11A', '-0.2415'), ('DDOST', '-0.2183'), ('HSPG2', '-0.2701'), ('C1orf63', '-0.2764'), ('SLC2A1', '-0.3383'), ('PIGK', '-0.4892'), ('SLC16A1', '-0.2305'), ('ATP1A1', '-0.2089'), ('TMCO1', '-0.2118'), ('LBR', '-0.2206'), ('LAPTM4A', '-0.2275'), ('EPCAM', '-0.233'), ('CBWD2', '-0.4684'), ('CDCA7', '-0.317'), ('RBM45', '-0.8026'), ('TFPI', '-0.3775'), ('EMC3', '-