naobservatory · lennijusten · Sep 25, 2023 · Sep 25, 2023 · Sep 25, 2023 · Sep 25, 2023
diff --git a/dashboard/prepare-dashboard-data.py b/dashboard/prepare-dashboard-data.py
@@ -274,6 +274,7 @@ def count_dups(hvr_fname):
 
 # sample -> {metadata}
 sample_metadata = defaultdict(dict)
+
 for project in projects:
     with open("%s/bioprojects/%s/metadata/metadata.tsv" % (
             ROOT_DIR, project)) as inf:
@@ -289,6 +290,15 @@ def count_dups(hvr_fname):
         sample_metadata[sample]["reads"] = \
             project_sample_reads[project][sample]
 
+        rc_fname = "ribocounts/%s.ribocounts.txt" % sample
+        try:
+            with open(rc_fname, 'r') as file:
+                content = file.read().strip()
+                ribocount = int(content)
+            sample_metadata[sample]["ribocounts"] = ribocount
+        except FileNotFoundError:
+            continue
+
 for taxid in observed_taxids:
     for project in projects:
         for sample in project_sample_reads[project]:

diff --git a/dashboard/prepare-dashboard-data.sh b/dashboard/prepare-dashboard-data.sh
@@ -20,6 +20,7 @@ cd $ROOT_DIR/dashboard
 mkdir -p allmatches/
 mkdir -p hvreads/
 mkdir -p hvrfull/
+mkdir -p ribocounts/
 
 if [ ! -e names.dmp ] ; then
     wget https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump_archive/taxdmp_2022-12-01.zip
@@ -48,6 +49,15 @@ for study in $(aws s3 ls $S3_DIR | awk '{print $NF}'); do
     done
 done | xargs -I {} -P 32 aws s3 cp {} hvreads/
 
+for study in $(aws s3 ls $S3_DIR | awk '{print $NF}'); do
+    for rc in $(aws s3 ls $S3_DIR${study}ribocounts/ | \
+                    awk '{print $NF}'); do
+    	if [ ! -s ribocounts/$rc ]; then
+	    echo $S3_DIR${study}ribocounts/$rc
+	fi
+     done
+done | xargs -I {} -P 32 aws s3 cp {} ribocounts/
+
 $MGS_PIPELINE_DIR/dashboard/prepare-dashboard-data.py $ROOT_DIR $MGS_PIPELINE_DIR
 
 echo "Now check in data.js and the json files and check out on prod"

diff --git a/run.py b/run.py
@@ -264,7 +264,8 @@ def calculate_average_read_length(file_path):
                 ribodetector_cmd = [
                     "ribodetector_cpu",
                     "--ensure", "rrna",
-                    "--threads", "24"
+                    "--threads", "24",
+                    "--chunk_size", "256"
                     ]
                 ribodetector_cmd.extend(["--len", str(avg_length)])