Skip to content

Commit

Permalink
Merge branch 'master' of github.com:biocore/qiita into moreweb
Browse files Browse the repository at this point in the history
  • Loading branch information
squirrelo committed Jun 20, 2014
2 parents c519417 + 19f2957 commit 82639c4
Show file tree
Hide file tree
Showing 15 changed files with 221 additions and 133 deletions.
48 changes: 45 additions & 3 deletions qiita_db/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
# The full license is in the file LICENSE, distributed with this software.
# -----------------------------------------------------------------------------

from dateutil.parser import parse
from os import listdir
from os.path import join
from functools import partial
try:
# Python 2
Expand All @@ -15,7 +18,6 @@
from configparser import ConfigParser

import pandas as pd
from dateutil.parser import parse

from .study import Study, StudyPerson
from .user import User
Expand All @@ -24,8 +26,19 @@
from .metadata_template import SampleTemplate


def make_study_from_cmd(owner, title, info):
def load_study_from_cmd(owner, title, info):
r"""Adds a study to the database
Parameters
----------
owner : str
The email address of the owner of the study_abstract
title : str
The title of the study_abstract
info : file-like object
File-like object containing study information
"""
# Parse the configuration file
config = ConfigParser()
config.readfp(info)
Expand Down Expand Up @@ -71,6 +84,35 @@ def make_study_from_cmd(owner, title, info):
Study.create(User(owner), title, efo_ids, infodict)


def load_preprocessed_data_from_cmd(study_id, filedir, filepathtype,
params_table, params_id,
submitted_to_insdc):
r"""Adds preprocessed data to the database
Parameters
----------
study_id : int
The study id to which the preprocessed data belongs
filedir : str
Directory path of the preprocessed data
filepathtype: str
The filepath_type of the preprecessed data
params_table_name : str
The name of the table which contains the parameters of the
preprocessing
params_id : int
The id of parameters int the params_table
submitted_to_insdc : bool
Has the data been submitted to insdc
"""
fp_types_dict = get_filepath_types()
fp_type = fp_types_dict[filepathtype]
filepaths = [(join(filedir, fp), fp_type) for fp in listdir(filedir)]
return PreprocessedData.create(Study(study_id), params_table, params_id,
filepaths,
submitted_to_insdc=submitted_to_insdc)


def sample_template_adder(sample_temp_path, study_id):
r"""Adds a sample template to the database
Expand All @@ -79,7 +121,7 @@ def sample_template_adder(sample_temp_path, study_id):
sample_temp_path : str
Path to the sample template file
study_id : int
The study id to wich the sample template belongs to
The study id to which the sample template belongs
"""
sample_temp = pd.DataFrame.from_csv(sample_temp_path, sep='\t',
infer_datetime_format=True)
Expand Down
60 changes: 10 additions & 50 deletions qiita_db/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,12 @@
# -----------------------------------------------------------------------------
from __future__ import division
from json import dumps, loads
from os import remove
from os.path import basename, join, commonprefix
from shutil import copy
from os.path import join
from time import strftime
from datetime import date
from tarfile import open as taropen

from .base import QiitaStatusObject
from .util import (insert_filepaths, get_db_files_base_dir, get_work_base_dir,
convert_to_id)
from .exceptions import QiitaDBDuplicateError
from .util import insert_filepaths, convert_to_id
from .sql_connection import SQLConnectionHandler


Expand Down Expand Up @@ -177,35 +172,16 @@ def results(self):
-------
list
Filepaths to the result files
Notes
-----
All files are automatically copied into the working directory and
untar-ed if necessary. The filepaths point to these files/folders in
the working directory.
"""
# Copy files to working dir, untar if necessary, then return filepaths
sql = ("SELECT filepath, filepath_type_id FROM qiita.filepath WHERE "
"filepath_id IN (SELECT filepath_id FROM "
"qiita.job_results_filepath WHERE job_id = %s)")
conn_handler = SQLConnectionHandler()
results = conn_handler.execute_fetchall(sql, (self._id, ))
# create new list, untaring as necessary
results_untar = []
outpath = get_work_base_dir()
for fp, fp_type in results:
if fp_type == 7:
# untar to work directory
with taropen(join(get_db_files_base_dir(),
self._table, fp)) as tar:
base = commonprefix(tar.getnames())
tar.extractall(path=outpath)
else:
# copy to work directory
copy(join(get_db_files_base_dir(), self._table, fp), outpath)
base = fp
results_untar.append(join(outpath, base))
return results_untar
results = conn_handler.execute_fetchall(
"SELECT filepath FROM qiita.filepath WHERE filepath_id IN "
"(SELECT filepath_id FROM qiita.job_results_filepath "
"WHERE job_id = %s)",
(self._id, ))
# create new list, with relative paths from db base
return [join("job", fp[0]) for fp in results]

@property
def error_msg(self):
Expand Down Expand Up @@ -270,28 +246,12 @@ def add_results(self, results):
[1] http://stackoverflow.com/questions/2032403/
how-to-create-full-compressed-tar-file-using-python
"""
# go though the list and tar any folders if necessary
cleanup = []
addpaths = []
for fp, fp_type in results:
if fp_type == 7:
outpath = join("/tmp", ''.join((basename(fp), ".tar")))
with taropen(outpath, "w") as tar:
tar.add(fp)
addpaths.append((outpath, 7))
cleanup.append(outpath)
else:
addpaths.append((fp, fp_type))

# add filepaths to the job
conn_handler = SQLConnectionHandler()
file_ids = insert_filepaths(addpaths, self._id, self._table,
file_ids = insert_filepaths(results, self._id, self._table,
"filepath", conn_handler)

# associate filepaths with job
sql = ("INSERT INTO qiita.{0}_results_filepath (job_id, filepath_id) "
"VALUES (%s, %s)".format(self._table))
conn_handler.executemany(sql, [(self._id, fid) for fid in file_ids])

# clean up the created tars from the temp directory
map(remove, cleanup)
2 changes: 1 addition & 1 deletion qiita_db/support_files/initialize.sql
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ INSERT INTO qiita.portal_type (portal, description) VALUES ('QIIME', 'QIIME port
INSERT INTO qiita.required_sample_info_status (status) VALUES ('received'), ('in_preparation'), ('running'), ('completed');

-- Populate filepath_type table
INSERT INTO qiita.filepath_type (filepath_type) VALUES ('raw_sequences'), ('raw_barcodes'), ('raw_spectra'), ('preprocessed_sequences'), ('preprocessed_sequences_qual'), ('biom'), ('tar'), ('plain_text');
INSERT INTO qiita.filepath_type (filepath_type) VALUES ('raw_sequences'), ('raw_barcodes'), ('raw_spectra'), ('preprocessed_sequences'), ('preprocessed_sequences_qual'), ('biom'), ('directory'), ('plain_text');

-- Populate checksum_algorithm table
INSERT INTO qiita.checksum_algorithm (name) VALUES ('crc32');
Expand Down
2 changes: 1 addition & 1 deletion qiita_db/support_files/populate_test_db.sql
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum, checksum_algor
INSERT INTO qiita.processed_filepath (processed_data_id, filepath_id) VALUES (1, 7);

-- Insert filepath for job results files
INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum, checksum_algorithm_id) VALUES ('job1result.txt', 8, '852952723', 1), ('job2tar.tar', 7, '852952723', 1);
INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum, checksum_algorithm_id) VALUES ('1_job_result.txt', 8, '852952723', 1), ('2_test_folder', 7, '852952723', 1);

-- Insert jobs
INSERT INTO qiita.job (data_type_id, job_status_id, command_id, options) VALUES (1, 1, 1, '{"option1":true,"option2":12,"option3":"FCM"}'), (1, 3, 2, 'options2'), (1, 1, 2, '{"option1":true,"option2":12,"option3":"FCM"}');
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DATA
Binary file removed qiita_db/support_files/test_data/job/job2tar.tar
Binary file not shown.
59 changes: 53 additions & 6 deletions qiita_db/test/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

from os import remove, close
from os.path import exists, join, basename
from tempfile import mkstemp
from tempfile import mkstemp, mkdtemp
from shutil import rmtree
from unittest import TestCase, main
from future.utils.six import StringIO
try:
Expand All @@ -18,12 +19,12 @@
# Python 3
from configparser import NoOptionError

from qiita_db.commands import (make_study_from_cmd, load_raw_data_cmd,
sample_template_adder, load_processed_data_cmd)
from qiita_db.commands import (load_study_from_cmd, load_raw_data_cmd,
sample_template_adder, load_processed_data_cmd,
load_preprocessed_data_from_cmd)
from qiita_db.study import Study, StudyPerson
from qiita_db.user import User
from qiita_db.util import get_count, check_count, get_db_files_base_dir
from qiita_db.data import PreprocessedData
from qiita_core.util import qiita_test_checker


Expand All @@ -38,7 +39,7 @@ def setUp(self):

def test_make_study_from_cmd(self):
fh = StringIO(self.config1)
make_study_from_cmd('test@test.com', 'newstudy', fh)
load_study_from_cmd('test@test.com', 'newstudy', fh)
sql = ("select study_id from qiita.study where email = %s and "
"study_title = %s")
study_id = self.conn_handler.execute_fetchone(sql, ('test@test.com',
Expand All @@ -47,7 +48,53 @@ def test_make_study_from_cmd(self):

fh2 = StringIO(self.config2)
with self.assertRaises(NoOptionError):
make_study_from_cmd('test@test.com', 'newstudy2', fh2)
load_study_from_cmd('test@test.com', 'newstudy2', fh2)


@qiita_test_checker()
class TestImportPreprocessedData(TestCase):
def setUp(self):
self.tmpdir = mkdtemp()
fd, self.file1 = mkstemp(dir=self.tmpdir)
close(fd)
fd, self.file2 = mkstemp(dir=self.tmpdir)
close(fd)
with open(self.file1, "w") as f:
f.write("\n")
with open(self.file2, "w") as f:
f.write("\n")

self.files_to_remove = [self.file1, self.file2]
self.dirs_to_remove = [self.tmpdir]

self.db_test_ppd_dir = join(get_db_files_base_dir(),
'preprocessed_data')

def tearDown(self):
for fp in self.files_to_remove:
if exists(fp):
remove(fp)
for dp in self.dirs_to_remove:
if exists(dp):
rmtree(dp)

def test_import_preprocessed_data(self):
initial_ppd_count = get_count('qiita.preprocessed_data')
initial_fp_count = get_count('qiita.filepath')
ppd = load_preprocessed_data_from_cmd(
1, self.tmpdir, 'preprocessed_sequences',
'preprocessed_sequence_illumina_params',
1, False)
self.files_to_remove.append(
join(self.db_test_ppd_dir,
'%d_%s' % (ppd.id, basename(self.file1))))
self.files_to_remove.append(
join(self.db_test_ppd_dir,
'%d_%s' % (ppd.id, basename(self.file2))))
self.assertEqual(ppd.id, 3)
self.assertTrue(check_count('qiita.preprocessed_data',
initial_ppd_count + 1))
self.assertTrue(check_count('qiita.filepath', initial_fp_count+2))


@qiita_test_checker()
Expand Down
74 changes: 29 additions & 45 deletions qiita_db/test/test_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@
# -----------------------------------------------------------------------------

from unittest import TestCase, main
from os import remove, makedirs
from os.path import exists, join
from os import remove, close
from os.path import exists, join, basename
from shutil import rmtree
from datetime import datetime
from tempfile import mkdtemp, mkstemp

from qiita_core.util import qiita_test_checker
from qiita_db.job import Job
from qiita_db.util import get_db_files_base_dir, get_work_base_dir
from qiita_db.analysis import Analysis
from qiita_db.exceptions import QiitaDBDuplicateError


@qiita_test_checker()
Expand Down Expand Up @@ -107,29 +107,14 @@ def test_retrieve_options(self):
'option3': 'FCM'})

def test_retrieve_results(self):
obs = self.job.results
self._delete_path = obs
self.assertEqual(self.job.results, [join("job", "1_job_result.txt")])

self.assertEqual(self.job.results, [join(get_work_base_dir(),
"job1result.txt")])
# make sure files copied correctly
self.assertTrue(exists(join(get_work_base_dir(), "job1result.txt")))
def test_retrieve_results_empty(self):
new = Job.create("18S", "Beta Diversity", self.options, Analysis(1))
self.assertEqual(new.results, [])

def test_retrieve_results_blank(self):
new = Job.create("18S", "Beta Diversity",
self.options, Analysis(1))
obs = new.results
self._delete_path = obs
self.assertEqual(obs, [])

def test_retrieve_results_tar(self):
obs = Job(2).results
self._delete_dir = obs
self.assertEqual(obs, [join(get_work_base_dir(), "test_folder")])
# make sure files copied correctly
self.assertTrue(exists(join(get_work_base_dir(), "test_folder")))
self.assertTrue(exists(join(get_work_base_dir(),
"test_folder/testfile.txt")))
def test_retrieve_results_dir(self):
self.assertEqual(Job(2).results, [join("job", "2_test_folder")])

def test_set_error(self):
timestamp = datetime(2014, 6, 13, 14, 19, 25)
Expand Down Expand Up @@ -159,34 +144,33 @@ def test_add_results(self):
"1_placeholder.txt")))

# make sure files attached to job properly
obs = self.conn_handler.execute_fetchall("SELECT * FROM "
"qiita.job_results_filepath "
"WHERE job_id = 1")
obs = self.conn_handler.execute_fetchall(
"SELECT * FROM qiita.job_results_filepath WHERE job_id = 1")

self.assertEqual(obs, [[1, 8], [1, 10]])

def test_add_results_tar(self):
# make test directory to tar, inclluding internal file
basedir = join(get_work_base_dir(), "tar_folder")
self._delete_dir = [basedir]
self._delete_path = [join(get_db_files_base_dir(), "job",
"1_tar_folder.tar")]
makedirs(basedir)
with open(join(basedir, "tar_data.txt"), 'w'):
pass
def test_add_results_dir(self):
# Create a test directory
test_dir = mkdtemp(dir=get_work_base_dir())
self._delete_dir.append(test_dir)
fd, test_file = mkstemp(dir=test_dir, suffix='.txt')
close(fd)
with open(test_file, "w") as f:
f.write('\n')
self._delete_path.append(test_file)

# add folder to job
self.job.add_results([(basedir, 7)])
# make sure tar file copied correctly
self.assertTrue(exists(join(get_db_files_base_dir(), "job",
"1_tar_folder.tar")))
self.job.add_results([(test_dir, 7)])

# make sure temp tar files cleaned up properly
self.assertFalse(exists("/tmp/1_tar_folder.tar"))
# check that the directory was copied correctly
db_path = join(get_db_files_base_dir(), "job",
"1_%s" % basename(test_dir))
self._delete_dir.append(db_path)
self.assertTrue(exists(db_path))

# make sure files attached to job properly
obs = self.conn_handler.execute_fetchall("SELECT * FROM "
"qiita.job_results_filepath "
"WHERE job_id = 1")
obs = self.conn_handler.execute_fetchall(
"SELECT * FROM qiita.job_results_filepath WHERE job_id = 1")
self.assertEqual(obs, [[1, 8], [1, 10]])


Expand Down
Loading

0 comments on commit 82639c4

Please sign in to comment.