In [None]:
import os
import glob
import json
import tensorflow as tf

import matplotlib.pyplot as plt
import seaborn as sns

bucket_name = "stuartlab"
project_name = "tabula-muris"  # Dataset folder and output location

os.chdir(os.path.expanduser("~/data/tabula-muris"))

In [None]:
tf.enable_eager_execution()
tf.executing_eagerly()

In [31]:
import boto3
session = boto3.session.Session(profile_name=os.getenv("AWS_PROFILE"))
bucket = session.resource(
    "s3", endpoint_url=os.getenv("AWS_S3_ENDPOINT")).Bucket(bucket_name)
print("S3 Profile: {} Endpoint: {} Project: {}".format(
    os.getenv("AWS_PROFILE"), os.getenv("AWS_S3_ENDPOINT"), project_name))

metadata = json.loads(bucket.Object(
    project_name + "/dataset/metadata.json").get()['Body'].read().decode('utf-8'))
print("Dataset metadata keys:", list(metadata.keys()))

bucket_name = "stuartlab"
project_name = "tabula-muris"  # Dataset folder and output location

files = ["s3://{}/{}".format(bucket_name, o.key) 
         for o in bucket.objects.filter(Prefix=project_name + "/dataset/") 
         if o.key.endswith("gzip.tfrecord")]

# files = tf.data.Dataset.list_files(glob.glob("FACS/*.gzip.tfrecord"), shuffle=False)

datasets = [tf.data.TFRecordDataset(f, compression_type="GZIP") for f in files]

total_records = 0
for (f, d) in zip(files, datasets):
    num_records = 0
    for record in d:
        num_records += 1
    print(f)
    print(num_records)   
    total_records += num_records
print("Total:", total_records)

S3 Profile: prp Endpoint: https://s3.nautilus.optiputer.net Project: tabula-muris
Dataset metadata keys: ['num_train_samples', 'genes', 'num_test_samples', 'tissues']
s3://stuartlab/tabula-muris/dataset/Bladder.test.gzip.tfrecord
328
s3://stuartlab/tabula-muris/dataset/Bladder.train.gzip.tfrecord
1310
s3://stuartlab/tabula-muris/dataset/Brain_Microglia.test.gzip.tfrecord
953
s3://stuartlab/tabula-muris/dataset/Brain_Microglia.train.gzip.tfrecord
3809
s3://stuartlab/tabula-muris/dataset/Brain_Neurons.test.gzip.tfrecord
1160
s3://stuartlab/tabula-muris/dataset/Brain_Neurons.train.gzip.tfrecord
4639
s3://stuartlab/tabula-muris/dataset/Colon.test.gzip.tfrecord
830
s3://stuartlab/tabula-muris/dataset/Colon.train.gzip.tfrecord
3319
s3://stuartlab/tabula-muris/dataset/Fat.test.gzip.tfrecord
1173
s3://stuartlab/tabula-muris/dataset/Fat.train.gzip.tfrecord
4689
s3://stuartlab/tabula-muris/dataset/Heart.test.gzip.tfrecord
1423
s3://stuartlab/tabula-muris/dataset/Heart.train.gzip.tfrecord
5692
s3

In [None]:
!mkdir -p model
# !aws --profile {os.getenv("AWS_PROFILE")} --endpoint {os.getenv("AWS_S3_ENDPOINT")} s3 sync --dryrun s3://{bucker_name}/{project_name}/model/ model/
    
!aws --profile {os.getenv("AWS_PROFILE")} --endpoint {os.getenv("AWS_S3_ENDPOINT")} \
    s3 sync s3://{bucket_name}/{project_name}/model/ model/

In [None]:
metadata = json.loads(open("FACS/metadata.json").read())
params = json.loads(open("model/params.json").read())
model = tf.keras.models.load_model("model/model.h5")