In [32]:
%run reFABSwithVectors.ipynb
%run ontologyPackage/ontologySTATanalysis.ipynb

In [48]:
# Given a list of lists with go term and its odds-ratio,p-val return a list of just GO terms 
def getGOfromAnalysis(goAnalysis):
    goTermL = []
    for k in goAnalysis:
        goTermL.append(k)
    return goTermL

In [49]:
# Given a list of GO terms returns a dict with key as GO term and values are gene entrez ids associated with term
def updateGOdict(goL):
    GOdict = { key: go2gene[key] for key in goL }
    return GOdict

In [58]:
# First run that sets up for the simulation
def firstRun(userSitesL, window, method='TSS', simN=100): # userSites must be ordered by chromosome number (List of lists) # Default simN = 10000
    if method == 'TSS':
        geneL = addWindowTSS(window)
    elif method == 'BODY': 
        geneL = addWindowBODY(window)
    else:
        return 'Only TSS or BODY method allowed!'
    mappedGenes = geneReadSites(userSitesL, geneL)
    numMapped = len(mappedGenes)
    numsites = 0
    for l in userSitesL:
        numsites += len(l)
    
    goAnalysis = conductAnalysis(mappedGenes)
    newGOlist = getGOfromAnalysis(goAnalysis)
    
    # estimate num sites to sample: numSitesSamp
    nBSL = nRandSitesSim(numsites,5)
    nPrime2 = 0 # Total num genes mapped to figure out nPrime value eventually for estimation
    for BSL in nBSL:
        nPrime2 += len(geneReadSites(BSL, geneL))
    nPrime = int(nPrime2 / 5)
    numSitesSamp = int((numMapped * numsites) / nPrime) # Num mapped genes * num sites inputted divided by nPrime 
    
    return simulation(newGOlist, geneL, simN, numSitesSamp, goAnalysis) # GeneL is list of genes with window bounds 

In [59]:
# Given original analysis and simulated analysis. Adds count to go term in original list if the random go term has a lower p-val and higher odds ratio 
def compareGOAnalysis(origAnalysis, simAnalysis):
    for k in simAnalysis:
        if (simAnalysis[k][0] > origAnalysis[k][0]) and (simAnalysis[k][1] < origAnalysis[k][1]):
            origAnalysis[k][2] += 1
    return origAnalysis        

In [60]:
def simulation(GOlist, geneL, nSim, nSites, origAnalysis):
    sitesL = nRandSitesSim(nSites, nSim)
    output = [] # Genes mapped ordered by simulation
    outputAnalysis = origAnalysis
    
    for sim in range(nSim):
        mapped = geneReadSites(sitesL[sim], geneL)
        randAnalysis = conductAnalysis(mapped, OntologyL=GOlist)
        outputAnalysis = compareGOAnalysis(outputAnalysis, randAnalysis)
        
    return outputAnalysis