In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from dotenv import load_dotenv

load_dotenv("../../.env.localhost")

True

# S3 init

In [None]:
from functools import lru_cache
import os

import boto3

API_SITE_BUCKET = os.getenv("API_SITE_ID")


@lru_cache
def get_s3_client():
    _session = boto3.session.Session()
    return _session.client(
        "s3",
        region_name=os.getenv("DO_REGION_NAME"),
        endpoint_url=os.getenv("DO_ENDPOINT_URL"),
        aws_access_key_id=os.getenv("DO_SPACES_KEY"),
        aws_secret_access_key=os.getenv("DO_SPACES_SECRET"),
    )


def presigned_url_to_put(
    key, client=None, mime_type=None, bucket=API_SITE_BUCKET, expires_in=300
):
    return client.generate_presigned_url(
        ClientMethod="put_object",
        Params={"Bucket": bucket, "Key": key, "ContentType": mime_type},
        ExpiresIn=expires_in,
    )

In [None]:
s3client = get_s3_client()

In [None]:
response = s3client.list_buckets()
for space in response['Buckets']:
    print(space['Name'])
print(f"\nusing Bucket {API_SITE_BUCKET}\n")
response = s3client.list_objects(Bucket=API_SITE_BUCKET)
for obj in response['Contents']:
    print(obj['Key'])

# Site Client Session

In [3]:
from nmdc_runtime.pipelines.core import preset_normal_env
from nmdc_runtime.resources.core import get_runtime_api_site_client


client = get_runtime_api_site_client(preset_normal_env.run_config)

In [4]:
!echo '{"hello": "donny"}' > test.json

In [9]:
rv = client.put_object_in_site({"mime_type": "application/json", "name": "test.json"})

In [10]:
op = rv.json()

In [11]:
import put_object

rv = put_object("test.json", op["metadata"]["url"])
rv

<Response [200]>

In [21]:
from nmdc_runtime.util import drs_object_in_for

result = drs_object_in_for("test.json", op)

In [22]:
import json
from pprint import pprint

op_patch = {"done": True, "result": result}

In [23]:
rv = client.update_operation(op["id"], op_patch)

In [24]:
pprint(rv.json())

{'done': True,
 'expire_time': '2021-07-24T18:29:07.042000',
 'id': '9428-5tpf-73',
 'metadata': {'expires_in_seconds': 300,
              'model': 'nmdc_runtime.api.models.operation.ObjectPutMetadata',
              'object_id': 'e27s-9fsv-88',
              'site_id': 'nmdc-runtime',
              'url': 'https://nyc3.digitaloceanspaces.com/nmdc-runtime/do/e27s-9fsv-88?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=3YNLWOW2XZ2PLSB56PR2%2F20210624%2Fnyc3%2Fs3%2Faws4_request&X-Amz-Date=20210624T182407Z&X-Amz-Expires=300&X-Amz-SignedHeaders=content-type%3Bhost&X-Amz-Signature=5d79bb3a1be23b3831001127ab9d9b6fed702b28274cd2fc5c503de990e0347f'},
 'result': {'access_methods': [{'access_id': 'nmdc-runtime:e27s-9fsv-88'}],
            'checksums': [{'checksum': 'f13668fae869a343e638ffaf792b9471edbedef9e3cc5b774ea49e9f2f75b38a',
                           'type': 'sha-256'}],
            'created_time': '2021-06-24T18:17:53.983398+00:00',
            'mime_type': 'application/json',
       

# GSP schema  / Cordra stuff

Need "id" in payload, e.g.:
```
"results": [
    {
      "id": "test/activity",
      "type": "Schema",
      "content": {
        "name": "Activity",
        "schema": collschemas["activity_set"]
      }
    }
  ]
```

In [None]:
from time import time
import os

tic = time()

from dotenv import load_dotenv
load_dotenv(os.path.expanduser("~/.nmdc_mongo.env"))

In [None]:
os.environ["NMDC_JSON_SCHEMA_FILE"] = "/Users/dwinston/Desktop/nmdc.schema.gsp.json"

In [None]:
import json
import re
from toolz import assoc_in, dissoc
from zipfile import ZipFile

from mongospawn.schema import collschemas_for

from nmdc_mongo import (
    add_to_db,
    correct_metaP_doc,
    dbschema,
    fetch_and_validate_json,
    fetch_conform_and_persist_from_manifest,
    fetch_json,
    get_db,
    reset_database,
    snake_case_set_name
)

In [None]:
###########################
# Adjustments for GSP below
###########################

defined_object_names = set(dbschema["definitions"])

set_for_object_name = {
    spec["items"]["$ref"].split("#/definitions/")[-1]: set_name
    for set_name, spec in dbschema["properties"].items()
}

existing_set_names = set(dbschema["properties"])

for object_without_set in (defined_object_names - set(set_for_object_name.keys())):
    proposed_set_name = snake_case_set_name(object_without_set)
    if proposed_set_name not in existing_set_names:
        dbschema["properties"][proposed_set_name] = {
            "description": (f"This property links a database object to the set of"
                            f" {object_without_set} objects within it."),
            "items": {"$ref": f"#/definitions/{object_without_set}"},
            "type": "array",
        }
        
dbschema = assoc_in(dbschema, ["definitions", "ControlledTermValue", "properties", "term", "type"], "string")
del dbschema["definitions"]["ControlledTermValue"]["properties"]["term"]["$ref"]

# 'k' not capitalized upstream perhaps. should conform!
#dbschema = assoc_in(dbschema, ["definitions", "MetagenomeAssembly", "properties", "scaf_l_gt50k", "type"], "number")

In [None]:
collschemas = collschemas_for(dbschema)

# Reconstruct
set_for_object_name = {
    spec["items"]["$ref"].split("#/definitions/")[-1]: set_name
    for set_name, spec in dbschema["properties"].items()
}

In [None]:
sorted(collschemas.keys())

In [None]:
collschemas["biosample_set"]

In [None]:
import requests

In [None]:
rv = requests.post("http://localhost:8080/auth/token",
              {"grant_type": "password", "username": "admin", "password": "nmdcrulez"})

In [None]:
rv.json()

In [None]:
auth_header = {"Authorization": f'Bearer {rv.json()["access_token"]}'}

In [None]:
auth_header

In [None]:
rv = requests.post("http://localhost:8080/uploadObjects", json={
    "results": [
        {
          "type": "Schema",
          "content": {
            "name": "Biosample",
            "schema": collschemas["biosample_set"]
          }
        }
      ]
}, headers=auth_header)

In [None]:
rv

In [None]:
import json
with open('/Users/dwinston/Desktop/cordra-upload.json','w') as f:
    json.dump({
        "results": [
            {
              "id": "test/activity",
              "type": "Schema",
              "content": {
                "name": "Activity",
                "schema": collschemas["activity_set"]
              }
            }
          ]
    }, f, indent=2)

In [None]:
import json
with open('/Users/dwinston/Desktop/cordra-upload.json','w') as f:
    json.dump({
        "results": [
            {
              "id": "test/study",
              "type": "Schema",
              "content": {
                "name": "Study",
                "schema": collschemas["study_set"]
              }
            }
          ]
    }, f, indent=2)

In [None]:
rv = requests.get("http://localhost:8080/search?query=type:%22Schema%22", headers=auth_header)

In [None]:
from pprint import pprint

template = rv.json()
del template["pageNum"]
del template["pageSize"]
del template["size"]
del template["results"][0]
del template["results"][0]
template["results"][0]["id"] = "test/abcd1234"
template["results"][0]["content"]["name"] = "Document2"
template["results"][0]["content"]["schema"]["title"] = "Document2"
del template["results"][0]["content"]["identifier"]

pprint(template)

In [None]:
import json
with open('/Users/dwinston/Desktop/cordra-upload.json','w') as f:
    json.dump(template, f, indent=2)

In [None]:
!cat /Users/dwinston/Desktop/cordra-upload.json

In [None]:
rv = requests.post("http://localhost:8080/uploadObjects", json=template, headers=auth_header)
rv