Skip to content

Commit

Permalink
Polishing works in RELION pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
mcianfrocco committed Jul 2, 2017
1 parent 8dd8f17 commit 843e347
Show file tree
Hide file tree
Showing 2 changed files with 130 additions and 58 deletions.
94 changes: 65 additions & 29 deletions relion/qsub_aws
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def s3_to_ebs(IP,keypair,bucketname,dironebs,rclonepath,keyid,secretid,region,nu
env.host_string='ubuntu@%s' %(IP)
env.key_filename = '%s' %(keypair)
rcloneexe='rclone'
exec_remote_cmd('%s/%s copy rclonename:%s %s --max-size 1G --quiet --transfers %i' %(homedir,rcloneexe,bucketname.split('s3://')[-1],dironebs,numfilesAtATime))
exec_remote_cmd('%s/%s copy rclonename:%s %s --quiet --transfers %i' %(homedir,rcloneexe,bucketname.split('s3://')[-1],dironebs,numfilesAtATime))

fileonly=dironebs.split('/')[-1]
if dironebs.split('.')[-1] == 'mrcs' or dironebs.split('.')[-1] == 'spi':
Expand Down Expand Up @@ -4075,14 +4075,16 @@ def relion_polish(project):
numParticles=float(subprocess.Popen('cat %s | wc -l' %(starfilename),shell=True, stdout=subprocess.PIPE).stdout.read().strip())

if numParticles > 1000000:
#instance='x1.32xlarge'
#mpi=128
#numfiles=128
#cost=13.338
instance='m4.16xlarge'
mpi=64
numfiles=64
cost=3.2
instance='x1.32xlarge'
mpi=128
numfiles=40
cost=13.338
Refdrives='/dev/xvdb /dev/xvdc'
RefnumRaid=2
#instance='m4.16xlarge'
#mpi=64
#numfiles=64
#cost=3.2
if numParticles < 1000000:
instance='m4.16xlarge'
mpi=64
Expand Down Expand Up @@ -4153,8 +4155,28 @@ def relion_polish(project):
inputfilesize=subprocess.Popen('du %s' %(particledir), shell=True, stdout=subprocess.PIPE).stdout.read().split()[-2]
if len(otherPartDir) > 0:
inputfilesize=subprocess.Popen('du %s' %(otherPartDir), shell=True, stdout=subprocess.PIPE).stdout.read().split()[-2]
sizeneeded='%.0f' %(math.ceil((float(inputfilesize)*4)/1000000))
actualsize='%.0f' %(math.ceil((float(inputfilesize)/1000000)))
if s3_exist is True:
cmd='aws s3 ls --summarize --human-readable --recursive %s > %s/s3list.log' %(bucketname,outdir)
subprocess.Popen(cmd,shell=True).wait()

s3size=float(subprocess.Popen('cat %s/s3list.log | grep "Total Size"' %(outdir), shell=True, stdout=subprocess.PIPE).stdout.read().split()[-2].strip())
s3size_units=subprocess.Popen('cat %s/s3list.log | grep "Total Size"' %(outdir), shell=True, stdout=subprocess.PIPE).stdout.read().split()[-1].strip()

##Put into GB
if s3size_units == 'TiB':
s3size=s3size*1000
if s3size_units == 'MiB':
s3size=s3size/1000
sizeneeded='%.0f' %(s3size*2)
actualsize='%.0f' %(s3size)

if int(actualsize) > 160000:
writeToLog('Error: Cannot create EBS volume large enough to accomodate %s GB. Exiting' %(actualsize), '%s/run.err' %(outdir))
sys.exit()

if s3_exist is False:
sizeneeded='%.0f' %(math.ceil((float(inputfilesize)*4)/1000000))
actualsize='%.0f' %(math.ceil((float(inputfilesize)/1000000)))
#Upload data to S3\
stack=False
if s3_exist is False:
Expand All @@ -4174,16 +4196,16 @@ def relion_polish(project):
if len(otherPartRclone) > 0:
bucketname=rclone_to_s3(otherPartDir,numCPUs*2.4,awsregion,key_ID,secret_ID,bucketname,bucketname,awsdir,project,otherPartRclone)
writeToLog('Finished at %s' %(time.asctime(time.localtime(time.time()))),'%s/run.out' %(outdir))
if ebs_exist is False:
writeToLog('Creating data storage drive ...','%s/run.out' %(outdir))
if ebs_exist is False:
#writeToLog('Creating data storage drive ...','%s/run.out' %(outdir))
#Create EBS volume
if os.path.exists('%s/awsebs.log' %(outdir)) :
os.remove('%s/awsebs.log' %(outdir))
cmd='%s/create_volume.py %i %s%s "rln-aws-tmp-%s-%s"'%(awsdir,int(sizeneeded),awsregion,AZ_letter,teamname,particledir)+'> %s/awsebs.log' %(outdir)
subprocess.Popen(cmd,shell=True).wait()
#cmd='%s/create_volume.py %i %s%s "rln-aws-tmp-%s-%s"'%(awsdir,int(sizeneeded),awsregion,AZ_letter,teamname,particledir)+'> %s/awsebs.log' %(outdir)
#subprocess.Popen(cmd,shell=True).wait()

#Get volID from logfile
volID=linecache.getline('%s/awsebs.log' %(outdir),5).split('ID: ')[-1].split()[0]
#volID=linecache.getline('%s/awsebs.log' %(outdir),5).split('ID: ')[-1].split()[0]

#Restore volume, returning with it volID for later steps
writeToLog('Launching virtual machine %s...' %(instance),'%s/run.out' %(outdir))
Expand All @@ -4195,7 +4217,7 @@ def relion_polish(project):
#Launch instance
if os.path.exists('%s/awslog.log' %(outdir)):
os.remove('%s/awslog.log' %(outdir))
cmd='%s/launch_AWS_instance.py --relion2 --instance=%s --availZone=%s%s --volume=%s > %s/awslog.log' %(awsdir,instance,awsregion,AZ_letter,volID,outdir)
cmd='%s/launch_AWS_instance.py --AMI=ami-5d26b83d --alwaysOn --instance=%s --availZone=%s%s --noEBS > %s/awslog.log' %(awsdir,instance,awsregion,AZ_letter,outdir)
subprocess.Popen(cmd,shell=True).wait()
#Get instance ID, keypair, and username:IP
instanceID=subprocess.Popen('cat %s/awslog.log | grep ID' %(outdir), shell=True, stdout=subprocess.PIPE).stdout.read().split('ID:')[-1]
Expand All @@ -4205,7 +4227,11 @@ def relion_polish(project):
#Create directories on AWS
env.host_string='ubuntu@%s' %(userIP)
env.key_filename = '%s' %(keypair)
if ebs_exist is False:
exec_remote_cmd('sudo mdadm --create --verbose /dev/md0 --level=stripe --raid-devices=%i %s' %(RefnumRaid,Refdrives))
exec_remote_cmd('sudo mkfs.ext4 -L MY_RAID /dev/md0')
exec_remote_cmd('sudo mount LABEL=MY_RAID /data')
exec_remote_cmd('sudo chmod 777 /data/')
if ebs_exist is False:
writeToLog('Started transferring %sGB at %s' %(actualsize,time.asctime(time.localtime(time.time()))),'%s/run.out' %(outdir))
dirlocation='/data'
if stack is False:
Expand All @@ -4215,11 +4241,18 @@ def relion_polish(project):
dirlocation=dirlocation+'/'+entry
if len(otherPartDir) == 0:
if stack is False:
s3_to_ebs(userIP,keypair,bucketname,'/data/%s/' %(particledir),'%s/rclone' %(awsdir),key_ID,secret_ID,awsregion,numfiles)
s3_to_ebs(userIP,keypair,bucketname,'/data/%s/' %(particledir),'%s/rclone' %(awsdir),key_ID,secret_ID,awsregion,numfiles-4)
if stack is True:
s3_to_ebs(userIP,keypair,bucketname,'/data/%s' %(particledir),'%s/rclone' %(awsdir),key_ID,secret_ID,awsregion,numfiles)
s3_to_ebs(userIP,keypair,bucketname,'/data/%s' %(particledir),'%s/rclone' %(awsdir),key_ID,secret_ID,awsregion,numfiles-4)
if len(otherPartDir) > 0:
s3_to_ebs(userIP,keypair,bucketname,'/data/%s/' %(otherPartDir),'%s/rclone' %(awsdir),key_ID,secret_ID,awsregion,numfiles)

cmd='rsync --rsync-path="rsync" --log-file="%s/rsync.log" -avzu -e "ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -i %s" %s/*.star ubuntu@%s:/data/%s/ > %s/rsync.log' %(outdir,keypair,particledir,userIP,particledir,outdir)
subprocess.Popen(cmd,shell=True).wait()

cmd='rsync --rsync-path="rsync" --log-file="%s/rsync.log" -avzu -e "ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -i %s" %s/*.mrc ubuntu@%s:/data/%s/ > %s/rsync.log' %(outdir,keypair,particledir,userIP,particledir,outdir)
subprocess.Popen(cmd,shell=True).wait()

writeToLog('Finished transfer at %s' %(time.asctime( time.localtime(time.time()) )),'%s/run.out' %(outdir))

#Make output directories
Expand Down Expand Up @@ -4269,23 +4302,26 @@ def relion_polish(project):
#Check if there are any errors
if isdone == 0:
if os.path.exists('%s/run.err' %(outdir)):
if float(subprocess.Popen('cat %s/run.err | wc -l' %(outdir),shell=True, stdout=subprocess.PIPE).stdout.read().strip()) > 0:
if float(subprocess.Popen('cat %s/run.err | wc -l' %(outdir),shell=True, stdout=subprocess.PIPE).stdout.read().strip()) > 3:
writeToLog('\nError detected in run.err. Shutting down instance.','%s/run.out' %(outdir))
isdone=1
time.sleep(10)
time.sleep(30)

writeToLog('Job finished!','%s/run.out' %(outdir))
writeToLog('Shutting everything down ...','%s/run.out' %(outdir))
cmd=subprocess.Popen('aws ec2 terminate-instances --instance-ids %s > %s/tmp4949585940.txt' %(instanceID,outdir),shell=True, stdout=subprocess.PIPE).stdout.read().strip()
cmd=subprocess.Popen('aws ec2 terminate-instances --instance-ids %s > %s/tmp4949585940.txt' %(instanceID.strip(),outdir),shell=True, stdout=subprocess.PIPE).stdout.read().strip()
isdone=0
#while isdone == 0:
# status=subprocess.Popen('aws ec2 describe-instances --instance-ids %s --query "Reservations[*].Instances[*].{State:State}" | grep Name'%(instanceID),shell=True, stdout=subprocess.PIPE).stdout.read().strip().split()[-1].split('"')[1]
# if status == 'terminated':
# isdone=1
# time.sleep(10)
while isdone == 0:
status=subprocess.Popen('aws ec2 describe-instances --instance-ids %s --query "Reservations[*].Instances[*].{State:State}" | grep Name'%(instanceID.strip()),shell=True, stdout=subprocess.PIPE).stdout.read().strip().split()[-1].split('"')[1]
if status == 'terminated':
isdone=1
time.sleep(10)

now=datetime.datetime.now()
if os.path.exists('awslog.log'):
os.remove('awslog.log')

now=datetime.datetime.now()
finday=now.day
finhr=now.hour
finmin=now.minute
Expand Down Expand Up @@ -4315,7 +4351,7 @@ def relion_polish(project):
tmpout.close()
os.remove('.aws_relion_tmp')

cmd='echo "%s %s %s" >> .aws_relion' %(particledir,bucketname,volID)
cmd='echo "%s %s %s" >> .aws_relion' %(particledir,bucketname,'--')
subprocess.Popen(cmd,shell=True).wait()

if len(project) > 0:
Expand Down

0 comments on commit 843e347

Please sign in to comment.