Skip to content

Commit

Permalink
version 2.1.0.1 9/25/16
Browse files Browse the repository at this point in the history
  • Loading branch information
nsalomonis committed Sep 26, 2016
1 parent c7f3042 commit 50436bf
Show file tree
Hide file tree
Showing 13 changed files with 296 additions and 69 deletions.
Binary file modified AltDatabase/.DS_Store
Binary file not shown.
@@ -1,2 +1,2 @@
AltResults/AlternativeOutput/Search:PSI.txt
AltResults/AlternativeOutput/Search:PSI
Restriction:.txt
Expand Down
2 changes: 2 additions & 0 deletions ExpressionBuilder.py
Expand Up @@ -112,6 +112,8 @@ def checkExpressionFileFormat(expFile,reportNegatives=False):
except Exception: uid = key
if '' in t[1:]:
values = [0 if x=='' else x for x in t[startIndex:]]
elif 'NA' in t[1:]:
values = [0 if x=='NA' else x for x in t[startIndex:]]
else:
values = t[1:]
try: values = map(lambda x: float(x), values)
Expand Down
91 changes: 59 additions & 32 deletions RNASeq.py
Expand Up @@ -2675,7 +2675,15 @@ def getMaxCounts(fn,cutoff,filterExport=False,filterExportDir=False):
try: uid, coordinates = string.split(key,'=')
except Exception: uid = key
try: maxExp = max(map(lambda x: float(x), t[1:]))
except Exception: maxExp=cutoff+1
except Exception:
if 'NA' in t[1:]:
tn = [0 if x=='NA' else x for x in t[1:]] ### Replace NAs
maxExp = max(map(lambda x: float(x), tn))
elif '' in t[1:]:
tn = [0 if x=='' else x for x in t[1:]] ### Replace blanks
maxExp = max(map(lambda x: float(x), tn))
else:
maxExp=cutoff+1

#gene = string.split(uid,':')[0]
if maxExp > cutoff:
Expand Down Expand Up @@ -3021,18 +3029,30 @@ def optimizeNumberOfGenesForDiscovery(expFile,platform,expressed_uids,fold=2,sam
except Exception: uid = key
try: values = map(lambda x: float(x), t[1:])
except Exception:
values=[]
for value in t[1:]:
try: values.append(float(value))
except Exception: values.append(-9999)
values = numpy.ma.masked_values(values, -9999.)
values = t[1:]
if 'NA' in values:
values = [0 if x=='NA' else x for x in values] ### Replace NAs
values = map(lambda x: float(x), values)
else:
values=[]
for value in t[1:]:
try: values.append(float(value))
except Exception: values.append(-9999)
values = numpy.ma.masked_values(values, -9999.)
#gene = string.split(uid,':')[0]
#if uid == 'ENSMUSG00000041515': print 'IRF8'
if uid in expressed_uids:
#slope_exp_ratio = determinePattern(vs)
#if slope_exp_ratio<2 and slope_exp_ratio>0.5:
if platform == 'RNASeq':
values = map(lambda x: math.log(x+1,2),values)
try: values = map(lambda x: math.log(x+1,2),values)
except Exception:
if 'NA' in values:
values = [0 if x=='NA' else x for x in values] ### Replace NAs
values = map(lambda x: math.log(x+1,2),values)
elif '' in values:
values = [0 if x=='' else x for x in values] ### Replace NAs
values = map(lambda x: math.log(x+1,2),values)
vs = list(values); vs.sort()
if (vs[-1*samplesDiffering]-vs[samplesDiffering-1])>math.log(fold,2): ### Ensures that atleast 4 samples are significantly different in the set
expressed_values[uid] = values
Expand Down Expand Up @@ -4648,6 +4668,12 @@ def runKallisto(species,dataset_name,root_dir,fastq_folder,returnSampleNames=Fal
### If installed globally
retcode = subprocess.call(['kallisto', "index","-i", kallisto_root+species, fasta_file])

reimportExistingKallistoOutput = False

if reimportExistingKallistoOutput:
### Just get the existing Kallisto output folders
fastq_paths = read_directory(output_dir)

kallisto_folders=[]
expMatrix={}
countMatrix={}
Expand All @@ -4656,34 +4682,35 @@ def runKallisto(species,dataset_name,root_dir,fastq_folder,returnSampleNames=Fal
for n in fastq_paths:
output_path = output_dir+n
kallisto_folders.append(output_path)
begin_time = time.time()
print 'Running kallisto on:',n,
p=fastq_paths[n]
b=[" > "+n+'.sam']
#"""
if paired == 'paired':
try:
#retcode = subprocess.call([kallisto_file, "quant","-i", indexFile, "-o", output_path,"--pseudobam"]+p+b)
retcode = subprocess.call([kallisto_file, "quant","-i", indexFile, "-o", output_path]+p)
except Exception:
retcode = subprocess.call(['kallisto', "quant","-i", indexFile, "-o", output_path]+p)
else:
if os.name == 'nt':
if reimportExistingKallistoOutput == False:
begin_time = time.time()
print 'Running kallisto on:',n,
p=fastq_paths[n]
b=[" > "+n+'.sam']
#"""
if paired == 'paired':
try:
try: retcode = subprocess.call([kallisto_file, "quant","-i", indexFile, "-o", output_path,"--single","-l","200"]+p)
except Exception: retcode = subprocess.call([kallisto_file, "quant","-i", indexFile, "-o", output_path,"--single","-l","200","-s","20"]+p)
#retcode = subprocess.call([kallisto_file, "quant","-i", indexFile, "-o", output_path,"--pseudobam"]+p+b)
retcode = subprocess.call([kallisto_file, "quant","-i", indexFile, "-o", output_path]+p)
except Exception:
try: retcode = subprocess.call(['kallisto', "quant","-i", indexFile, "-o", output_path,"--single","-l","200"]+p)
retcode = subprocess.call(['kallisto', "quant","-i", indexFile, "-o", output_path]+p)
else:
if os.name == 'nt':
try:
try: retcode = subprocess.call([kallisto_file, "quant","-i", indexFile, "-o", output_path,"--single","-l","200"]+p)
except Exception: retcode = subprocess.call([kallisto_file, "quant","-i", indexFile, "-o", output_path,"--single","-l","200","-s","20"]+p)
except Exception:
try: retcode = subprocess.call(['kallisto', "quant","-i", indexFile, "-o", output_path,"--single","-l","200"]+p)
except Exception:
retcode = subprocess.call(['kallisto', "quant","-i", indexFile, "-o", output_path,"--single","-l","200","-s","20"]+p)
else:
try: retcode = subprocess.call([kallisto_file, "quant","-i", indexFile, "-o", output_path,"--single","-l","200","-s","20"]+p)
except Exception:
retcode = subprocess.call(['kallisto', "quant","-i", indexFile, "-o", output_path,"--single","-l","200","-s","20"]+p)
else:
try: retcode = subprocess.call([kallisto_file, "quant","-i", indexFile, "-o", output_path,"--single","-l","200","-s","20"]+p)
except Exception:
retcode = subprocess.call(['kallisto', "quant","-i", indexFile, "-o", output_path,"--single","-l","200","-s","20"]+p)
if retcode == 0: print 'completed in', int(time.time()-begin_time), 'seconds'
else: print 'kallisto failed due to an unknown error (report to altanalyze.org help).'
if retcode == 0: print 'completed in', int(time.time()-begin_time), 'seconds'
else: print 'kallisto failed due to an unknown error (report to altanalyze.org help).'

#"""
#"""
input_path = output_path+'/abundance.txt'
try:
try: expMatrix,countMatrix=importTPMs(n,input_path,expMatrix,countMatrix)
Expand Down Expand Up @@ -4901,9 +4928,9 @@ def getFASTAFile(species):

filename = '/Users/saljh8/Downloads/counts.GSE81682_HTSeq.txt'
#fastRPKMCalculate(filename);sys.exit()
calculateRPKMsFromGeneCounts(filename,'Mm',AdjustExpression=False);sys.exit()
#calculateRPKMsFromGeneCounts(filename,'Mm',AdjustExpression=False);sys.exit()
#copyICGSfiles('','');sys.exit()
runKallisto('Dr','Etv2 GFP Single Cell','/Volumes/Seagate Exp/Single Cell RNA seq Run 1696 FASTQ','/Volumes/Seagate Exp/');sys.exit()
runKallisto('Hs','BC','/Volumes/salomonis2/GSE45419-Breast-Cancer-cell-line/RestrictedAnalysis/','/Volumes/salomonis2/GSE45419-Breast-Cancer-cell-line/RestrictedAnalysis/');sys.exit()
import multiprocessing as mlp
import UI
species='Mm'; platform = "3'array"; vendor = 'Ensembl'
Expand Down
1 change: 1 addition & 0 deletions R_interface.py
Expand Up @@ -184,6 +184,7 @@ def checkForDuplicateIDs(input_file):
return input_file

def importHopachOutput(filename):
print filename
""" Import the ID order information """
db={} ### Used to store the cluster data
hopach_clusters=[]
Expand Down
103 changes: 79 additions & 24 deletions SashimiPlot.py
Expand Up @@ -173,24 +173,67 @@ def sashmi_plot_list(bamdir,eventsToVisualizeFilename,PSIFilename,events=None):
#print traceback.format_exc()
pass

processed_events = formatAndSubmitSplicingEventsToSashimiPlot(PSIFilename,bamdir,splicing_events,sample_group_db,groups,expandedSearch)
mopup_events=[]
processed_events = formatAndSubmitSplicingEventsToSashimiPlot(PSIFilename, bamdir, splicing_events, sample_group_db, groups, False)
mopup_events = getMopUpEvents(splicing_events, processed_events)

### Do the same for supplied gene queries or junctions that didn't map above using the gene expression values as a guide
#print len(splicing_events),len(processed_events),len(mopup_events)
processed_events = formatAndSubmitSplicingEventsToSashimiPlot(steady_state_exp_file,bamdir,mopup_events,sample_group_db,groups,expandedSearch)
if len(processed_events)>0:
mopup_events = getMopUpEvents(mopup_events, processed_events)
processed_events = formatAndSubmitSplicingEventsToSashimiPlot(PSIFilename, bamdir, mopup_events, sample_group_db, groups, True)
return gene_to_symbol

def getMopUpEvents(splicing_events, processed_events):
mopup_events = []
for event in splicing_events:
add = True
if event in processed_events:
add = False
if ' ' in event:
try:
j1,j2 = string.split(event,' ')
if j1 in processed_events: add = False
if j2 in processed_events: add = False
except Exception: pass
if add: mopup_events.append(event)
j1, j2 = string.split(event, ' ')
if j1 in processed_events:
add = False
if j2 in processed_events:
add = False
except Exception:
pass
if add:
mopup_events.append(event)
return mopup_events

### Do the same for supplied gene queries or junctions that didn't map above using the gene expression values as a guide
#print len(splicing_events),len(processed_events),len(mopup_events)
processed_events = formatAndSubmitSplicingEventsToSashimiPlot(steady_state_exp_file,bamdir,mopup_events,sample_group_db,groups,expandedSearch)
return gene_to_symbol
def reorderEvents(events):
splicing_events = events
index = 0
for e in events:
j1o, j2o = string.split(e, ' ')
gene, j1 = string.split(j1o, ':')
gene, j2 = string.split(j2o, ':')
if '-' in j1 and '-' in j2:
j1a, j1b = string.split(j1, '-')
j2a, j2b = string.split(j2, '-')
j1a_block, j1a_region = string.split(j1a[1:], '.')
j2a_block, j2a_region = string.split(j2a[1:], '.')
j1b_block, j1b_region = string.split(j1b[1:], '.')
j2b_block, j2b_region = string.split(j2b[1:], '.')
if int(j1b_block) < int(j2b_block) and int(j1a_block) < int(j2a_block):
pass ### Occurs for complex cassette exon splicing events but matches SashimiIndex's selection for exclusion
elif int(j1b_block) > int(j2b_block):
new_e = j2o + ' ' + j1o
splicing_events[index] = new_e
elif int(j1a_block) < int(j2a_block):
new_e = j2o + ' ' + j1o
splicing_events[index] = new_e
elif int(j1b_region) > int(j2b_region):
new_e = j2o + ' ' + j1o
splicing_events[index] = new_e
elif int(j1a_region) < int(j2a_region):
new_e = j2o + ' ' + j1o
splicing_events[index] = new_e

index += 1
return splicing_events

def formatAndSubmitSplicingEventsToSashimiPlot(filename,bamdir,splicing_events,sample_group_db,groups,expandedSearch):
### Begin exporting parameters and events for SashimiPlot visualization
Expand Down Expand Up @@ -235,15 +278,15 @@ def formatAndSubmitSplicingEventsToSashimiPlot(filename,bamdir,splicing_events,s
else:
sampleIndexBegin = 1
sample_headers = t[sampleIndexBegin:]
if '.bed' not in sample_headers[0]: ### Add .bed if removed manually
sample_headers = map(lambda s: s+'.bed',sample_headers)
if '.bed' not in sample_headers[0]: ### Add .bed if removed manually
sample_headers = map(lambda s: s+'.bed',sample_headers)
index=0
sample_group_index={}
for s in sample_headers:
group = sample_group_db[s]
sample_group_index[index]=group
try: sampleReadDepth[index]=count_sum_array_db[s]
except Exception: sampleReadDepth[index]=count_sum_array_db[s]
except Exception: sampleReadDepth[index]=count_sum_array_db[s]
index+=1
firstLine = False
else:
Expand Down Expand Up @@ -308,13 +351,17 @@ def formatAndSubmitSplicingEventsToSashimiPlot(filename,bamdir,splicing_events,s
group_psi_values['low']=filtered_group_index1
group_psi_values['high']=filtered_group_index2
else:
gn=0
gn=0
for group in groups:
gn+=1
#if gn>4: break
if group in initial_group_psi_values:
initial_group_psi_values[group].sort()
if len(groups)>3:
if len(groups)>7:
filtered_group_indexes = map(lambda x: x[1], initial_group_psi_values[group][:1])
elif len(groups)>5:
filtered_group_indexes = map(lambda x: x[1], initial_group_psi_values[group][:2])
elif len(groups)>3:
filtered_group_indexes = map(lambda x: x[1], initial_group_psi_values[group][:4])
else:
filtered_group_indexes = map(lambda x: x[1], initial_group_psi_values[group][:5])
Expand All @@ -323,8 +370,15 @@ def formatAndSubmitSplicingEventsToSashimiPlot(filename,bamdir,splicing_events,s
except Exception:
print 'Cannot update the settings file. Likely permissions issue.'

try:
reordered = reorderEvents([t[2] + ' ' + t[3]])
reordered = string.split(reordered[0], ' ')
except Exception:
reordered = [t[2] + ' ' + t[3]]
reordered = string.split(reordered[0], ' ')
#print reordered
if 'PSI' in filename:
try: formatted_splice_event=string.replace(t[3],':','__')
try: formatted_splice_event = string.replace(reordered[1], ':', '__')
except Exception: pass
### Submit the query
try: ssp.plot_event(formatted_splice_event,index_dir,setting,outputdir); success = True
Expand All @@ -342,13 +396,14 @@ def formatAndSubmitSplicingEventsToSashimiPlot(filename,bamdir,splicing_events,s
except Exception:
success = False
#print traceback.format_exc()

"""
### Second attempt
if 'PSI' in filename and success==False: ### Only relevant when parsing the junction pairs but not genes
try: formatted_splice_event=string.replace(t[2],':','__')
try: formatted_splice_event=string.replace(reordered[0],':','__')
except Exception: pass
try: ssp.plot_event(formatted_splice_event,index_dir,setting,outputdir); # print 'success'
except Exception: pass
except Exception: pass
"""
return processed_events

def findParentDir(filename):
Expand Down Expand Up @@ -491,11 +546,11 @@ def justConvertFilenames(species,outputdir):
continue

if __name__ == '__main__':
root_dir = 'C:/Users/Nathan Salomonis/Desktop/BAMS/'
root_dir = '/Volumes/SEQ-DATA/BreastCancerTargetted/BAMs/'
events = ['Aldh3a2']
#events = None
eventsToVisualizeFilename = 'C:/Users/Nathan Salomonis/Desktop/BAMS/AltResults/Clustering/top50/Combined-junction-exon-evidence.txt'
events = None
eventsToVisualizeFilename = None
eventsToVisualizeFilename = '/Volumes/SEQ-DATA/BreastCancerTargetted/BAMs/AltResults/AlternativeOutput/Hs_RNASeq_top_alt_junctions-PSI-clust-pairwise.txt'
bamdir = root_dir
remoteSashimiPlot('Mm',root_dir,bamdir,eventsToVisualizeFilename,events=events,show=False)
remoteSashimiPlot('Hs', root_dir, bamdir, eventsToVisualizeFilename, events=events, show=False)
sys.exit()
2 changes: 2 additions & 0 deletions UI.py
Expand Up @@ -1374,6 +1374,8 @@ def displayPathway(self):
else:
try:
self.graphic_link = WikiPathways_webservice.visualizePathwayAssociations(filename,species_code,mod_type,wpid)
if len(self.graphic_link)==0:
force_no_matching_error
self.wp_status = 'Pathway images colored and saved to disk by webservice\n(see image title for location)'
self.label_status_name.set(self.wp_status)
tl = Toplevel()
Expand Down
2 changes: 1 addition & 1 deletion WikiPathways_webservice.py
Expand Up @@ -288,7 +288,7 @@ def getColoredPathway(root_dir,graphID_db,file_type,dataset_name,WPID=None):
#print len(hex_color_list),hex_color_list
#print file_type
if len(graphID_list)==0:
force_no_matching_error
continue
### revision = 0 is the most current version
#file = client.service.getColoredPathway(pwId=wpid,revision=0,graphId=graphID_list,color=hex_color_list,fileType=file_type)

Expand Down

0 comments on commit 50436bf

Please sign in to comment.