Skip to content

Commit

Permalink
Merge branch 'simon-reformatted-python' into simon-add-black-check
Browse files Browse the repository at this point in the history
  • Loading branch information
EC2 Default User committed Nov 29, 2023
2 parents 36638f5 + 1f224b0 commit d0f42b1
Show file tree
Hide file tree
Showing 27 changed files with 1,498 additions and 1,128 deletions.
13 changes: 8 additions & 5 deletions bioprojects/PRJEB13833/metadata/prepare-metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
with open("metadata.tsv", "w") as outf:
for line in inf:
line = line.strip()
if line.startswith("run"): continue
if line.startswith("run"):
continue

run_accession, sample_accession, sample_alias, sample_title = \
line.split("\t")
run_accession, sample_accession, sample_alias, sample_title = line.split(
"\t"
)

_, yyyy, mm, dd, site = sample_title.split("_")

outf.write("%s\t%s-%s-%s\tCluster %s\n" % (
run_accession, yyyy, mm, dd, site))
outf.write(
"%s\t%s-%s-%s\tCluster %s\n" % (run_accession, yyyy, mm, dd, site)
)
18 changes: 12 additions & 6 deletions bioprojects/PRJEB14051/metadata/prepare_meatadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,24 @@
with open("table-s4.tsv") as inf:
for line in inf:
bits = line.strip().split("\t")
if not bits[0].isdigit(): continue
if not bits[0].isdigit():
continue

sample_info[bits[0]] = bits[1], bits[2]

with open("raw_metadata.tsv") as inf:
with open("metadata.tsv", "w") as outf:
for line in inf:
if line.startswith("run_accession"): continue
if line.startswith("run_accession"):
continue

run_accession, library_name = line.strip().split("\t")

outf.write("%s\t%s\t%s\n" % (
run_accession,
sample_info[library_name][0],
sample_info[library_name][1]))
outf.write(
"%s\t%s\t%s\n"
% (
run_accession,
sample_info[library_name][0],
sample_info[library_name][1],
)
)
6 changes: 4 additions & 2 deletions bioprojects/PRJEB28033/metadata/parse_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import sys


def start(raw_metadata_in, common_metadata_out):
data = []
with open(raw_metadata_in) as inf:
Expand All @@ -13,15 +14,16 @@ def start(raw_metadata_in, common_metadata_out):
accession, fastq_ftps, sampleid = line.split("\t")

if fastq_ftps == "fastq_ftp":
continue # skip header line
continue # skip header line

sampleid = "NYC-%s"%(sampleid.split("-")[-1].zfill(2))
sampleid = "NYC-%s" % (sampleid.split("-")[-1].zfill(2))

data.append([accession, sampleid])

with open(common_metadata_out, "w") as outf:
for accession, sampleid in data:
outf.write("\t".join([accession, sampleid]) + "\n")


if __name__ == "__main__":
start(*sys.argv[1:])
17 changes: 10 additions & 7 deletions bioprojects/PRJEB49260/metadata/prepare_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,14 @@
with open("raw_metadata.tsv") as inf:
with open("metadata.tsv", "w") as outf:
for line in inf:
run_accession, sample_accession, read_counts, sample_alias = \
line.strip().split("\t")
if run_accession == "run_accession": continue
(
run_accession,
sample_accession,
read_counts,
sample_alias,
) = line.strip().split("\t")
if run_accession == "run_accession":
continue

location_id, date = sample_alias.rsplit("_", 1)

Expand All @@ -20,7 +25,5 @@
mm = date[0:2]
dd = date[2:4]
yy = date[4:6]

outf.write("%s\t20%s-%s-%s\t%s\n" % (
run_accession, yy, mm, dd, location
))

outf.write("%s\t20%s-%s-%s\t%s\n" % (run_accession, yy, mm, dd, location))
12 changes: 7 additions & 5 deletions bioprojects/PRJNA438174/metadata/process-metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
with open("table-s1.tsv") as inf:
for line in inf:
line = line.strip()
if line.startswith("Sample type"): continue
if line.startswith("Sample type"):
continue

bits = line.split("\t")
sample_type, date_sampled, _, _, _, _, sra_experiment_accession = bits
Expand All @@ -22,13 +23,14 @@
}[month]

date_sampled = "%s-%s" % (year, month)

sample_metadata[sra_experiment_accession] = sample_type, date_sampled

with open("raw_metadata.tsv") as inf:
with open("metadata.tsv", "w") as outf:
for line in inf:
run_accession, sra_experiment_accession = line.strip().split("\t")
outf.write("%s\t%s\t%s\n" % (
run_accession, *sample_metadata[sra_experiment_accession]))

outf.write(
"%s\t%s\t%s\n"
% (run_accession, *sample_metadata[sra_experiment_accession])
)
19 changes: 10 additions & 9 deletions bioprojects/PRJNA645711/metadata/prepare-metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
with open("raw_metadata.tsv") as inf:
with open("metadata.tsv", "w") as outf:
for line in inf:
(run_accession,
sample_accession,
run_alias,
sample_alias,
sample_title) = line.strip().split("\t")
(
run_accession,
sample_accession,
run_alias,
sample_alias,
sample_title,
) = line.strip().split("\t")

if run_accession == "run_accession": continue
if run_accession == "run_accession":
continue

outf.write("%s\t%s\n" % (
run_accession,
sample_title.split()[-1]))
outf.write("%s\t%s\n" % (run_accession, sample_title.split()[-1]))
43 changes: 29 additions & 14 deletions bioprojects/PRJNA661613/metadata/process-metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
accession_to_sample_id = {}
with open("raw-metadata.tsv") as inf:
for line in inf:
if not line.strip(): continue
if line.startswith("run_accession"): continue
if not line.strip():
continue
if line.startswith("run_accession"):
continue
line = line[:-1] # drop final newline
accession, sample_id, upload_date = line.split("\t")
accession_to_sample_id[accession] = sample_id
Expand All @@ -13,12 +15,22 @@
sample_info = {}
with open("sample_metadata.tsv") as inf:
for line in inf:
if not line.strip(): continue
if line.startswith("sample"): continue # header row
if not line.strip():
continue
if line.startswith("sample"):
continue # header row
line = line[:-1] # drop final newline

sample, core, location, method, date, mean_ct, ct_from_replicate, \
sequencing = line.split("\t")
(
sample,
core,
location,
method,
date,
mean_ct,
ct_from_replicate,
sequencing,
) = line.split("\t")

sample = sample.strip()
if sample == "9_09_S1":
Expand All @@ -40,11 +52,14 @@
}

with open("metadata.tsv", "w") as outf:
for accession, sample_id in sorted(
accession_to_sample_id.items()):
outf.write("%s\t%s\t%s\t%s\t%s\n" % (
accession,
sample_info[sample_id]["location"],
sample_info[sample_id]["date"],
sample_info[sample_id]["method"],
sample_info[sample_id]["sequencing"]))
for accession, sample_id in sorted(accession_to_sample_id.items()):
outf.write(
"%s\t%s\t%s\t%s\t%s\n"
% (
accession,
sample_info[sample_id]["location"],
sample_info[sample_id]["date"],
sample_info[sample_id]["method"],
sample_info[sample_id]["sequencing"],
)
)
9 changes: 6 additions & 3 deletions bioprojects/PRJNA729801/metadata/parse_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import sys


def start(raw_metadata_in, parsed_metadata_out):
data = []
with open(raw_metadata_in) as inf:
Expand All @@ -13,7 +14,7 @@ def start(raw_metadata_in, parsed_metadata_out):
_, _, fastq_ftps, alias = line.split("\t")

if fastq_ftps == "fastq_ftp":
continue # skip header line
continue # skip header line

enriched = not alias.endswith("_unenriched")
alias = alias.removesuffix("_unenriched")
Expand All @@ -23,12 +24,13 @@ def start(raw_metadata_in, parsed_metadata_out):
plant, month, day, year = alias.split("_")

if len(year) == 2:
year = '20%s' % year
year = "20%s" % year

date = "%s-%s-%s" % (year, month.zfill(2), day.zfill(2))

for fastq_ftp in fastq_ftps.split(";"):
if not fastq_ftp: continue
if not fastq_ftp:
continue

filename = fastq_ftp.split("/")[-1]

Expand All @@ -41,5 +43,6 @@ def start(raw_metadata_in, parsed_metadata_out):
for plant, date, filename, is_enriched in data:
outf.write("\t".join([filename, date, plant, is_enriched]) + "\n")


if __name__ == "__main__":
start(*sys.argv[1:])
1 change: 0 additions & 1 deletion bioprojects/PRJNA774620/metadata/prepare-metadata.py

This file was deleted.

15 changes: 7 additions & 8 deletions bioprojects/PRJNA812772/metadata/prepare_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

with open("dataset-s4.tsv") as inf:
for line in inf:
if line.startswith("SI no."): continue # header
if line.startswith("SI no."):
continue # header

bits = line.strip().split("\t")
sample_accession = bits[2]
Expand All @@ -15,13 +16,11 @@
with open("raw_metadata.tsv") as inf:
with open("metadata.tsv", "w") as outf:
for line in inf:
sample_accession, run_accession, sample_alias = \
line.strip().split("\t")
sample_accession, run_accession, sample_alias = line.strip().split("\t")

_, strategy = sample_alias.split("_")
if strategy == "sarscov2": continue
if strategy == "sarscov2":
continue

collection_date = sample_accession_to_collection_date[
sample_accession]
outf.write("%s\t%s\t%s\n" % (
run_accession, strategy, collection_date))
collection_date = sample_accession_to_collection_date[sample_accession]
outf.write("%s\t%s\t%s\n" % (run_accession, strategy, collection_date))
25 changes: 14 additions & 11 deletions bioprojects/PRJNA924011/metadata/prepare_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,23 @@
with open("metadata.tsv", "w") as outf:
with open("raw_metadata.tsv") as inf:
for line in inf:
(run_accession,
sample_accession,
instrument_model,
fastq_ftp,
sample_alias,
first_created) = line.strip().split("\t")
(
run_accession,
sample_accession,
instrument_model,
fastq_ftp,
sample_alias,
first_created,
) = line.strip().split("\t")

if run_accession == "run_accession":
continue

if run_accession == "run_accession": continue

if instrument_model == "MinION":
continue # Pipeline can't handle long-read yet

location, raw_date = sample_alias.split("_")

if len(raw_date) == 5:
mm = "0" + raw_date[0]
dd = raw_date[1:3]
Expand All @@ -24,7 +27,7 @@
mm = raw_date[0:2]
dd = raw_date[2:4]
yy = raw_date[4:6]

date = "20%s-%s-%s" % (yy, mm, dd)

outf.write("%s\t%s\t%s\n" % (run_accession, location, date))
8 changes: 5 additions & 3 deletions bioprojects/PRJNA966185/metadata/prepare-metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@
continue
run_accession, run_alias, _, sample_id = line.split("\t")

record = [run_accession,
sample_id,
"0" if "unenriched" in run_alias else "1"]
record = [
run_accession,
sample_id,
"0" if "unenriched" in run_alias else "1",
]
record.extend(sample_info[sample_id])
outf.write("\t".join(record) + "\n")
Loading

0 comments on commit d0f42b1

Please sign in to comment.