version 2.1.0.1 9/25/16

nsalomonis · Sep 26, 2016 · 50436bf · 50436bf
1 parent c7f3042
commit 50436bf
Show file tree

Hide file tree

Showing 13 changed files with 296 additions and 69 deletions.
diff --git a/AltDatabase/.DS_Store b/AltDatabase/.DS_Store
diff --git a/Config/Data/Splicing/Statistical Results/Percent-Spliced-In/location.txt b/Config/Data/Splicing/Statistical Results/Percent-Spliced-In/location.txt
@@ -1,2 +1,2 @@
-AltResults/AlternativeOutput/Search:PSI.txt
+AltResults/AlternativeOutput/Search:PSI
 Restriction:.txt

diff --git a/ExpressionBuilder.py b/ExpressionBuilder.py
@@ -112,6 +112,8 @@ def checkExpressionFileFormat(expFile,reportNegatives=False):
             except Exception: uid = key
             if '' in t[1:]:
                 values = [0 if x=='' else x for x in t[startIndex:]]
+            elif 'NA' in t[1:]:
+                values = [0 if x=='NA' else x for x in t[startIndex:]]
             else:
                 values = t[1:]
             try: values = map(lambda x: float(x), values)

diff --git a/RNASeq.py b/RNASeq.py
@@ -2675,7 +2675,15 @@ def getMaxCounts(fn,cutoff,filterExport=False,filterExportDir=False):
                 try: uid, coordinates = string.split(key,'=')
                 except Exception: uid = key
                 try: maxExp = max(map(lambda x: float(x), t[1:]))
-                except Exception: maxExp=cutoff+1
+                except Exception:
+                    if 'NA' in t[1:]:
+                        tn = [0 if x=='NA' else x for x in t[1:]] ### Replace NAs
+                        maxExp = max(map(lambda x: float(x), tn))
+                    elif '' in t[1:]:
+                        tn = [0 if x=='' else x for x in t[1:]] ### Replace blanks
+                        maxExp = max(map(lambda x: float(x), tn))
+                    else:
+                        maxExp=cutoff+1
 
                 #gene = string.split(uid,':')[0]
                 if maxExp > cutoff:
@@ -3021,18 +3029,30 @@ def optimizeNumberOfGenesForDiscovery(expFile,platform,expressed_uids,fold=2,sam
             except Exception: uid = key
             try: values = map(lambda x: float(x), t[1:])
             except Exception:
-                values=[]
-                for value in t[1:]:
-                    try: values.append(float(value))
-                    except Exception: values.append(-9999)
-                values = numpy.ma.masked_values(values, -9999.)
+                values = t[1:]
+                if 'NA' in values:
+                    values = [0 if x=='NA' else x for x in values] ### Replace NAs
+                    values = map(lambda x: float(x), values)
+                else:  
+                    values=[]
+                    for value in t[1:]:
+                        try: values.append(float(value))
+                        except Exception: values.append(-9999)
+                    values = numpy.ma.masked_values(values, -9999.)
             #gene = string.split(uid,':')[0]
             #if uid == 'ENSMUSG00000041515': print 'IRF8'
             if uid in expressed_uids:
                 #slope_exp_ratio = determinePattern(vs)
                 #if slope_exp_ratio<2 and slope_exp_ratio>0.5:
                 if platform == 'RNASeq':
-                    values = map(lambda x: math.log(x+1,2),values)
+                    try: values = map(lambda x: math.log(x+1,2),values)
+                    except Exception:
+                        if 'NA' in values:
+                            values = [0 if x=='NA' else x for x in values] ### Replace NAs
+                            values = map(lambda x: math.log(x+1,2),values)
+                        elif '' in values:
+                            values = [0 if x=='' else x for x in values] ### Replace NAs
+                            values = map(lambda x: math.log(x+1,2),values)
                     vs = list(values); vs.sort()
                     if (vs[-1*samplesDiffering]-vs[samplesDiffering-1])>math.log(fold,2): ### Ensures that atleast 4 samples are significantly different in the set
                         expressed_values[uid] = values
@@ -4648,6 +4668,12 @@ def runKallisto(species,dataset_name,root_dir,fastq_folder,returnSampleNames=Fal
                 ### If installed globally
                 retcode = subprocess.call(['kallisto', "index","-i", kallisto_root+species, fasta_file])
 
+    reimportExistingKallistoOutput = False
+
+    if reimportExistingKallistoOutput:
+        ### Just get the existing Kallisto output folders
+        fastq_paths = read_directory(output_dir)
+
     kallisto_folders=[]
     expMatrix={}
     countMatrix={}
@@ -4656,34 +4682,35 @@ def runKallisto(species,dataset_name,root_dir,fastq_folder,returnSampleNames=Fal
     for n in fastq_paths:
         output_path = output_dir+n
         kallisto_folders.append(output_path)
-        begin_time = time.time()
-        print 'Running kallisto on:',n,
-        p=fastq_paths[n]
-        b=[" > "+n+'.sam']
-        #"""
-        if paired == 'paired':
-            try:
-                #retcode = subprocess.call([kallisto_file, "quant","-i", indexFile, "-o", output_path,"--pseudobam"]+p+b)
-                retcode = subprocess.call([kallisto_file, "quant","-i", indexFile, "-o", output_path]+p)
-            except Exception:
-                retcode = subprocess.call(['kallisto', "quant","-i", indexFile, "-o", output_path]+p)
-        else:
-            if os.name == 'nt':
+        if reimportExistingKallistoOutput == False:
+            begin_time = time.time()
+            print 'Running kallisto on:',n,
+            p=fastq_paths[n]
+            b=[" > "+n+'.sam']
+            #"""
+            if paired == 'paired':
                 try:
-                    try: retcode = subprocess.call([kallisto_file, "quant","-i", indexFile, "-o", output_path,"--single","-l","200"]+p)
-                    except Exception: retcode = subprocess.call([kallisto_file, "quant","-i", indexFile, "-o", output_path,"--single","-l","200","-s","20"]+p)
+                    #retcode = subprocess.call([kallisto_file, "quant","-i", indexFile, "-o", output_path,"--pseudobam"]+p+b)
+                    retcode = subprocess.call([kallisto_file, "quant","-i", indexFile, "-o", output_path]+p)
                 except Exception:
-                    try: retcode = subprocess.call(['kallisto', "quant","-i", indexFile, "-o", output_path,"--single","-l","200"]+p)
+                    retcode = subprocess.call(['kallisto', "quant","-i", indexFile, "-o", output_path]+p)
+            else:
+                if os.name == 'nt':
+                    try:
+                        try: retcode = subprocess.call([kallisto_file, "quant","-i", indexFile, "-o", output_path,"--single","-l","200"]+p)
+                        except Exception: retcode = subprocess.call([kallisto_file, "quant","-i", indexFile, "-o", output_path,"--single","-l","200","-s","20"]+p)
+                    except Exception:
+                        try: retcode = subprocess.call(['kallisto', "quant","-i", indexFile, "-o", output_path,"--single","-l","200"]+p)
+                        except Exception:
+                            retcode = subprocess.call(['kallisto', "quant","-i", indexFile, "-o", output_path,"--single","-l","200","-s","20"]+p)
+                else:
+                    try: retcode = subprocess.call([kallisto_file, "quant","-i", indexFile, "-o", output_path,"--single","-l","200","-s","20"]+p)
                     except Exception:
                         retcode = subprocess.call(['kallisto', "quant","-i", indexFile, "-o", output_path,"--single","-l","200","-s","20"]+p)
-            else:
-                try: retcode = subprocess.call([kallisto_file, "quant","-i", indexFile, "-o", output_path,"--single","-l","200","-s","20"]+p)
-                except Exception:
-                    retcode = subprocess.call(['kallisto', "quant","-i", indexFile, "-o", output_path,"--single","-l","200","-s","20"]+p)
-        if retcode == 0: print 'completed in', int(time.time()-begin_time), 'seconds'
-        else: print 'kallisto failed due to an unknown error (report to altanalyze.org help).'
+            if retcode == 0: print 'completed in', int(time.time()-begin_time), 'seconds'
+            else: print 'kallisto failed due to an unknown error (report to altanalyze.org help).'
 
-        #"""
+            #"""
         input_path = output_path+'/abundance.txt'
         try:
             try: expMatrix,countMatrix=importTPMs(n,input_path,expMatrix,countMatrix)
@@ -4901,9 +4928,9 @@ def getFASTAFile(species):
 
     filename = '/Users/saljh8/Downloads/counts.GSE81682_HTSeq.txt'
     #fastRPKMCalculate(filename);sys.exit()
-    calculateRPKMsFromGeneCounts(filename,'Mm',AdjustExpression=False);sys.exit()
+    #calculateRPKMsFromGeneCounts(filename,'Mm',AdjustExpression=False);sys.exit()
     #copyICGSfiles('','');sys.exit()
-    runKallisto('Dr','Etv2 GFP Single Cell','/Volumes/Seagate Exp/Single Cell RNA seq Run 1696 FASTQ','/Volumes/Seagate Exp/');sys.exit()
+    runKallisto('Hs','BC','/Volumes/salomonis2/GSE45419-Breast-Cancer-cell-line/RestrictedAnalysis/','/Volumes/salomonis2/GSE45419-Breast-Cancer-cell-line/RestrictedAnalysis/');sys.exit()
     import multiprocessing as mlp
     import UI
     species='Mm'; platform = "3'array"; vendor = 'Ensembl'

diff --git a/R_interface.py b/R_interface.py
@@ -184,6 +184,7 @@ def checkForDuplicateIDs(input_file):
     return input_file
 
 def importHopachOutput(filename):
+    print filename
     """ Import the ID order information """
     db={} ### Used to store the cluster data
     hopach_clusters=[]

diff --git a/SashimiPlot.py b/SashimiPlot.py
@@ -173,24 +173,67 @@ def sashmi_plot_list(bamdir,eventsToVisualizeFilename,PSIFilename,events=None):
             #print traceback.format_exc()
             pass
 
-    processed_events = formatAndSubmitSplicingEventsToSashimiPlot(PSIFilename,bamdir,splicing_events,sample_group_db,groups,expandedSearch)
-    mopup_events=[]
+    processed_events = formatAndSubmitSplicingEventsToSashimiPlot(PSIFilename, bamdir, splicing_events, sample_group_db, groups, False)
+    mopup_events = getMopUpEvents(splicing_events, processed_events)
+
+    ### Do the same for supplied gene queries or junctions that didn't map above using the gene expression values as a guide
+    #print len(splicing_events),len(processed_events),len(mopup_events)
+    processed_events = formatAndSubmitSplicingEventsToSashimiPlot(steady_state_exp_file,bamdir,mopup_events,sample_group_db,groups,expandedSearch)
+    if len(processed_events)>0:
+        mopup_events = getMopUpEvents(mopup_events, processed_events)
+        processed_events = formatAndSubmitSplicingEventsToSashimiPlot(PSIFilename, bamdir, mopup_events, sample_group_db, groups, True)
+    return gene_to_symbol
+
+def getMopUpEvents(splicing_events, processed_events):
+    mopup_events = []
     for event in splicing_events:
         add = True
         if event in processed_events:
             add = False
         if ' ' in event:
             try:
-                j1,j2 = string.split(event,' ')
-                if j1 in processed_events: add = False
-                if j2 in processed_events: add = False
-            except Exception: pass
-        if add: mopup_events.append(event)
+                j1, j2 = string.split(event, ' ')
+                if j1 in processed_events:
+                    add = False
+                if j2 in processed_events:
+                    add = False
+            except Exception:
+                pass
+        if add:
+            mopup_events.append(event)
+    return mopup_events
 
-    ### Do the same for supplied gene queries or junctions that didn't map above using the gene expression values as a guide
-    #print len(splicing_events),len(processed_events),len(mopup_events)
-    processed_events = formatAndSubmitSplicingEventsToSashimiPlot(steady_state_exp_file,bamdir,mopup_events,sample_group_db,groups,expandedSearch)
-    return gene_to_symbol
+def reorderEvents(events):
+    splicing_events = events
+    index = 0
+    for e in events:
+        j1o, j2o = string.split(e, ' ')
+        gene, j1 = string.split(j1o, ':')
+        gene, j2 = string.split(j2o, ':')
+        if '-' in j1 and '-' in j2:
+            j1a, j1b = string.split(j1, '-')
+            j2a, j2b = string.split(j2, '-')
+            j1a_block, j1a_region = string.split(j1a[1:], '.')
+            j2a_block, j2a_region = string.split(j2a[1:], '.')
+            j1b_block, j1b_region = string.split(j1b[1:], '.')
+            j2b_block, j2b_region = string.split(j2b[1:], '.')
+            if int(j1b_block) < int(j2b_block) and int(j1a_block) < int(j2a_block):
+                pass ### Occurs for complex cassette exon splicing events but matches SashimiIndex's selection for exclusion
+            elif int(j1b_block) > int(j2b_block):
+                new_e = j2o + ' ' + j1o
+                splicing_events[index] = new_e
+            elif int(j1a_block) < int(j2a_block):
+                new_e = j2o + ' ' + j1o
+                splicing_events[index] = new_e
+            elif int(j1b_region) > int(j2b_region):
+                new_e = j2o + ' ' + j1o
+                splicing_events[index] = new_e
+            elif int(j1a_region) < int(j2a_region):
+                new_e = j2o + ' ' + j1o
+                splicing_events[index] = new_e
+
+        index += 1
+    return splicing_events
 
 def formatAndSubmitSplicingEventsToSashimiPlot(filename,bamdir,splicing_events,sample_group_db,groups,expandedSearch):
     ### Begin exporting parameters and events for SashimiPlot visualization
@@ -235,15 +278,15 @@ def formatAndSubmitSplicingEventsToSashimiPlot(filename,bamdir,splicing_events,s
             else:
                 sampleIndexBegin = 1
                 sample_headers = t[sampleIndexBegin:]
-		if '.bed' not in sample_headers[0]: ### Add .bed if removed manually
-		    sample_headers = map(lambda s: s+'.bed',sample_headers)
+                if '.bed' not in sample_headers[0]: ### Add .bed if removed manually
+                    sample_headers = map(lambda s: s+'.bed',sample_headers)
             index=0
             sample_group_index={}
             for s in sample_headers:
                 group = sample_group_db[s]
                 sample_group_index[index]=group
                 try: sampleReadDepth[index]=count_sum_array_db[s]
-		except Exception: sampleReadDepth[index]=count_sum_array_db[s]
+                except Exception: sampleReadDepth[index]=count_sum_array_db[s]
                 index+=1
             firstLine = False
         else:
@@ -308,13 +351,17 @@ def formatAndSubmitSplicingEventsToSashimiPlot(filename,bamdir,splicing_events,s
                         group_psi_values['low']=filtered_group_index1
                         group_psi_values['high']=filtered_group_index2
                     else:
-			gn=0
+                        gn=0
                         for group in groups:
 			    gn+=1
 			    #if gn>4: break
                             if group in initial_group_psi_values:
                                 initial_group_psi_values[group].sort()
-				if len(groups)>3:
+				if len(groups)>7:
+				    filtered_group_indexes = map(lambda x: x[1], initial_group_psi_values[group][:1])
+				elif len(groups)>5:
+				    filtered_group_indexes = map(lambda x: x[1], initial_group_psi_values[group][:2])
+				elif len(groups)>3:
 				    filtered_group_indexes = map(lambda x: x[1], initial_group_psi_values[group][:4])
 				else:
 				    filtered_group_indexes = map(lambda x: x[1], initial_group_psi_values[group][:5])
@@ -323,8 +370,15 @@ def formatAndSubmitSplicingEventsToSashimiPlot(filename,bamdir,splicing_events,s
 		    except Exception:
 			print 'Cannot update the settings file. Likely permissions issue.'
 
+		    try:
+			reordered = reorderEvents([t[2] + ' ' + t[3]])
+			reordered = string.split(reordered[0], ' ')
+		    except Exception:
+			reordered = [t[2] + ' ' + t[3]]
+			reordered = string.split(reordered[0], ' ')
+		    #print reordered
                     if 'PSI' in filename:
-                        try: formatted_splice_event=string.replace(t[3],':','__')
+                        try: formatted_splice_event = string.replace(reordered[1], ':', '__')
                         except Exception: pass
                         ### Submit the query
                         try: ssp.plot_event(formatted_splice_event,index_dir,setting,outputdir); success = True
@@ -342,13 +396,14 @@ def formatAndSubmitSplicingEventsToSashimiPlot(filename,bamdir,splicing_events,s
                                 except Exception:
                                     success = False
                                     #print traceback.format_exc()
-
+		    """
                     ### Second attempt
                     if 'PSI' in filename and success==False: ### Only relevant when parsing the junction pairs but not genes
-                        try: formatted_splice_event=string.replace(t[2],':','__')
+                        try: formatted_splice_event=string.replace(reordered[0],':','__')
                         except Exception: pass
                         try: ssp.plot_event(formatted_splice_event,index_dir,setting,outputdir); # print 'success'
-                        except Exception: pass    
+                        except Exception: pass
+		    """
     return processed_events
 
 def findParentDir(filename):
@@ -491,11 +546,11 @@ def justConvertFilenames(species,outputdir):
                 continue
 
 if __name__ == '__main__':
-    root_dir = 'C:/Users/Nathan Salomonis/Desktop/BAMS/'
+    root_dir = '/Volumes/SEQ-DATA/BreastCancerTargetted/BAMs/'
     events = ['Aldh3a2']
-    #events = None
-    eventsToVisualizeFilename = 'C:/Users/Nathan Salomonis/Desktop/BAMS/AltResults/Clustering/top50/Combined-junction-exon-evidence.txt'
+    events = None
     eventsToVisualizeFilename = None
+    eventsToVisualizeFilename = '/Volumes/SEQ-DATA/BreastCancerTargetted/BAMs/AltResults/AlternativeOutput/Hs_RNASeq_top_alt_junctions-PSI-clust-pairwise.txt'
     bamdir = root_dir
-    remoteSashimiPlot('Mm',root_dir,bamdir,eventsToVisualizeFilename,events=events,show=False)
+    remoteSashimiPlot('Hs', root_dir, bamdir, eventsToVisualizeFilename, events=events, show=False)
     sys.exit()
diff --git a/UI.py b/UI.py
@@ -1374,6 +1374,8 @@ def displayPathway(self):
         else:
             try:
                 self.graphic_link = WikiPathways_webservice.visualizePathwayAssociations(filename,species_code,mod_type,wpid)
+                if len(self.graphic_link)==0:
+                    force_no_matching_error
                 self.wp_status = 'Pathway images colored and saved to disk by webservice\n(see image title for location)'
                 self.label_status_name.set(self.wp_status)
                 tl = Toplevel()

diff --git a/WikiPathways_webservice.py b/WikiPathways_webservice.py
@@ -288,7 +288,7 @@ def getColoredPathway(root_dir,graphID_db,file_type,dataset_name,WPID=None):
             #print len(hex_color_list),hex_color_list
             #print file_type
             if len(graphID_list)==0:
-                force_no_matching_error
+                continue
             ### revision = 0 is the most current version
             #file = client.service.getColoredPathway(pwId=wpid,revision=0,graphId=graphID_list,color=hex_color_list,fileType=file_type)