# Migration and sync of assets between prod and staging

## Summary

Currently the production API is the one that has the latest updated data by the WRI team. 
This notebook copies assets from `production` to `staging` maintening the match between IDs. Optionally, it would be possible to copy assets back from `staging` to `production`. 

### Steps:
1. upload/update assest to `production`
2. make a copy of the assests from `production` to `staging` using this script
3. synchronise the ids of the assets.


## Instructions

1. run the `Functions`.
2. create a list with the assets urls to copy.
3. `Processing` has the steps to carry out the migration. 

## Functions
These are the functions we need to create and synchronise assets from `staging` to `production`.

In [5]:
import getpass
import requests as re
import json
from datetime import datetime
import logging
import time
import os
import dictdiffer
logger = logging.getLogger()
logger.setLevel(logging.INFO)

In [6]:
staging_server = "https://staging-api.resourcewatch.org"
prod_server = "https://api.resourcewatch.org"

In [7]:
class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKCYAN = '\033[96m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'

In [8]:
def auth(env='prod'):
    serverUrl = {
        'prod': prod_server,
        'staging': staging_server
    }
    print(f'You are login into {bcolors.HEADER}{bcolors.BOLD}{env}{bcolors.ENDC}')
    with re.Session() as s:
        headers = {'Content-Type': 'application/json'}
        payload = json.dumps({ 'email': f'{input(f"Email: ")}',
                               'password': f'{getpass.getpass(prompt="Password: ")}'})
        response = s.post(f'{serverUrl[env]}/auth/login',  headers = headers,  data = payload)
        response.raise_for_status()
        print(f'{bcolors.OKGREEN}Successfully logged into {env}{bcolors.ENDC}')
    return response.json().get('data').get('token')

In [9]:
token = {
    'staging': auth('staging'),
    'prod':auth('prod')
}

You are login into [95m[1mstaging[0m
Email: alicia.arenzana@vizzuality.com
Password: ········
[92mSuccessfully logged into staging[0m
You are login into [95m[1mprod[0m
Email: alicia.arenzana@vizzuality.com
Password: ········
[92mSuccessfully logged into prod[0m


In [6]:
# @TODO 
# * Migrate one day the body payloads to data model classes and refactor to classes following inheritance and recursive property copies
# * Type function with Mypy
# * Add proper method descriptions
# * Refactor methods to reuse more code
# * https://findwork.dev/blog/advanced-usage-python-requests-timeouts-retries-hooks/ 
# retries and calls should be taking into account retry and backof factor from Request
#from typing import List
#from pydantic import BaseModel, parse_obj_as
# class DatasetModel(BaseModel):

# class LayerModel(BaseModel):

# class widgetModel(BaseModel):

# class metadataModel(BaseModel):
     
# class vocabularyModel(BaseModel):

In [11]:
def setTokenHeader(env, token=token):
    '''
    set up the token
    '''
    return {'Authorization':f'Bearer {token[env]}', 
            'Content-Type': 'application/json'}

def logResponseErrors(status_code, response = None, url = None, body = None):
    '''
    log errors in http calls
    '''
    if status_code !=200:
        logging.error('response: ')
        logging.error(response)
        logging.error(response.text) if response else None
        logging.error(response.json()) if response else None
        logging.error('url: ')
        logging.error(url) if url else None
        logging.error('body: ')
        logging.error(json.dumps(body)) if body else None
    

def getAssets(url, payload=None):
    '''
    Get asset operation
    '''
    response = re.get(url, payload)

    try_num = 1
    while response.status_code == 504 and try_num <= 3:
        time.sleep(30)
        response = re.get(url, payload)
        try_num += 1

    logResponseErrors(response.status_code, response, url, payload)
    
    response.raise_for_status()
    
    return response.json()

def deleteAssets(url, headers):
    '''
    delete asset operation
    '''
    response = re.delete(url, headers = headers)
    
    try_num = 1
    while response.status_code == 504 and try_num <= 3:
        time.sleep(30)
        response = re.delete(url, headers = headers)
        try_num += 1

    logResponseErrors(response.status_code, response, url)
    
    response.raise_for_status()
    
    return response.status_code

def postAssets(url, body, headers, payloads = None):
    '''
    create asset operation
    '''    
    response = re.post(url, params = payloads, data=json.dumps(body), headers = headers)
    
    try_num = 1
    while response.status_code == 504 and try_num <= 3:
        time.sleep(30)
        response = re.post(url, params = payloads, data=json.dumps(body), headers = headers)
        try_num += 1

    logResponseErrors(response.status_code, response, url, body)
    
    response.raise_for_status()
    
    return response.json()

def updateAssets(url, body, headers):
    '''
    patch asset operation
    '''
    response = re.patch(url, data=json.dumps(body), headers = headers)
    
    try_num = 1
    while response.status_code == 504 and try_num <= 3:
        time.sleep(30)
        response = re.patch(url, data=json.dumps(body), headers = headers)
        try_num += 1

    logResponseErrors(response.status_code, response, url, body)
    
    response.raise_for_status()
    
    return response.json()

def copyAssetBody(asset, excludeList=['createdAt', 'updatedAt','clonedHost', 'errorMessage', 'taskId', 'status', 'sources',
                                      'userId', 'slug', 'dataset', 'layer', 'widget', 'metadata', 'vocabulary']):
    '''
    Copy a body dict to a new dict excluding some keys or not defined values.
    '''
    response = {}
    response.update(asset)
    
    for key, value in asset.items():
        if (key in excludeList or value is None or (type(value) == dict and len(value) == 0) ):
            response.pop(key, None)
    
    if 'provider' in response.keys() and response['provider'] =='cartodb':
        response.pop('tableName', None)
    
    return response

def upsert(conditon = False):
    '''
    Return an update/post operation base on a condition
    '''
    if conditon:
        return updateAssets
    else:
        return postAssets
    
def recreateDataset(dataset, toEnv = 'staging', destinationDatasetId = None):
    '''
    Copy the dataset from one env to the other
    '''
    
    serverUrl = {
        'prod': prod_server,
        'staging': staging_server
    }
    if dataset.get('type')!='dataset':
        return None
    
    url = f'{serverUrl[toEnv]}/v1/dataset'
    
    if destinationDatasetId:
        url = f'{url}/{destinationDatasetId}' 
        
    body = {'dataset': copyAssetBody(dataset.get('attributes'))}
    
    logger.debug(body)
    
    headers = setTokenHeader(toEnv)
    
    response = upsert(destinationDatasetId)
    
    logger.debug(response)
    if destinationDatasetId:       
        return response(url, body['dataset'], headers)
    else:
        return response(url, body, headers)
    
    
    

def recreateLayer(datasetId, layer, toEnv = 'staging', destinationLayerId = None):
    '''
    Copy the layer from one env to the other
    '''
    
    serverUrl = {
        'prod': prod_server,
        'staging': staging_server
    }
    if layer.get('type')!='layer':
        return None
    
    headers = setTokenHeader(toEnv)
    url = f'{serverUrl[toEnv]}/v1/dataset/{datasetId}/layer'
    
    if destinationLayerId:
        url = f'{url}/{destinationLayerId}'

    body = copyAssetBody(layer.get('attributes'))
    
    response = upsert(destinationLayerId)
    
    return response(url, body, headers)

def recreateWidget(datasetId, widget, toEnv = 'staging', destinationWidgetId = None):
    '''
    Copy the widget from one env to the other
    '''
    
    serverUrl = {
        'prod': prod_server,
        'staging': staging_server
    }
    
    if widget.get('type')!='widget':
        return None
    
    headers = setTokenHeader(toEnv)
    url = f'{serverUrl[toEnv]}/v1/dataset/{datasetId}/widget'
    
    if destinationWidgetId:
        url = f'{url}/{destinationWidgetId}'
    
    body = copyAssetBody(widget.get('attributes'))
    
    response = upsert(destinationWidgetId)
    
    return response(url, body, headers)

def getSubAssetMetadata(datasetId, layerId=None, widgetId=None, fromEnv = 'prod'):
    '''
    Get metadata that is not given back from main call
    '''
    serverUrl = {
        'prod': prod_server,
        'staging': staging_server
    }
    
    if (layerId and widgetId) or (not layerId and not widgetId):
        raise Exception("layerId and widgetId not allowed at the same time")
    elif layerId:
        url = f'{serverUrl[fromEnv]}/v1/dataset/{datasetId}/layer/{layerId}/metadata'
    elif widgetId:
        url = f'{serverUrl[fromEnv]}/v1/dataset/{datasetId}/widget/{widgetId}/metadata'
        
    try:
        return getAssets(url)
    except Exception as e:
        logger.info('Get operation was not successfull')
        logger.error(f'{e}')
        return None
        pass
    

def recreateMetadata(datasetId, metadata, layerId=None, widgetId=None, toEnv = 'prod'):
    '''
    Copy the metadata from one env to the other
    '''
    
    serverUrl = {
        'prod': prod_server,
        'staging': staging_server
    }
    headers = setTokenHeader(toEnv)
    
    if metadata.get('type')!='metadata':
        return None
    if layerId and widgetId:
        raise Exception("layerId and widgetId not allowed at the same time")
    elif layerId:
        url = f'{serverUrl[toEnv]}/v1/dataset/{datasetId}/layer/{layerId}/metadata'
    elif widgetId:
        url = f'{serverUrl[toEnv]}/v1/dataset/{datasetId}/widget/{widgetId}/metadata'
    else:
        url = f'{serverUrl[toEnv]}/v1/dataset/{datasetId}/metadata'
    
    body = copyAssetBody(metadata.get('attributes'))
    
    try:
        response = upsert()
        return response(url, body, headers)
    except Exception as e:
        print(f'{bcolors.WARNING}Post operation was not succesfull, trying to update instead{bcolors.ENDC}')
        response = upsert(True)
        return response(url, body, headers)
        pass

def recreateVocabulary(datasetId, vocabulary, toEnv = 'prod'):
    '''
    Copy the vocabulary from one env to the other
    '''
    
    serverUrl = {
        'prod': prod_server,
        'staging': staging_server
    }
    
    if vocabulary.get('type')!='vocabulary':
        return None
    
    headers = setTokenHeader(toEnv)
    
    url = f"{serverUrl[toEnv]}/v1/dataset/{datasetId}/vocabulary/{vocabulary['attributes']['name']}"
    body = {
        'application': vocabulary['attributes'].get('application'),
        'tags': vocabulary['attributes'].get('tags')
    }
    
    try:
        response = upsert()
        return response(url, body, headers)
    except Exception as e:
        print(f'{bcolors.WARNING}Post operation was not succesfull, trying to update instead{bcolors.ENDC}')
        response = upsert(True)
        return response(url, body, headers)
        pass

def getAssetList(fromEnv = 'prod', datasetList=None):
    '''
    Gets a list of assets from the selected env or from the constrained dataset list
    '''
    serverUrl = {
        'prod': prod_server,
        'staging': staging_server
    }
    headers = setTokenHeader(fromEnv)
    url = f'{serverUrl[fromEnv]}/v1/dataset'
    payload={
        'application':'rw',
        'status':'saved',
        'includes':'widget,layer,vocabulary,metadata',
        'page[size]':1613982331640
    }
    if datasetList:
        url = f'{serverUrl[fromEnv]}/v1/dataset/find-by-ids'
        body = {
            'ids': datasetList
        }
        return postAssets(url, body, headers, payload)
    else:
        return getAssets(url, payload)
    
def backupAssets(env = 'prod', datasetList = None):
    '''
    save a backup of production data just in case we need to recreate it again
    '''
    data = getAssetList(env, datasetList)
    

    with open(f'RW_{env}_backup_{datetime.now().strftime("%Y%m%d-%H%M%S")}.json', 'w') as outfile:
        json.dump(data, outfile)

def deleteDataFrom(env='staging', datasetList = None):
    '''
    Deletes all assets from an env.
    '''
    serverUrl = {
        'prod': prod_server,
        'staging': staging_server
    }
    userConfirmation = input(f'{bcolors.WARNING}Are you sure you want to delete \
        {str(datasetList)  if datasetList else "everything" } in {env}:{bcolors.ENDC} \
        Y/n') or "N"
    if userConfirmation == 'Y':
        headers = setTokenHeader(env)
        data = getAssetList(env, datasetList)
        
        for dataset in data['data']:
            #@TODO: this needs to be reworked a bit
            try:
                logger.info(f"deleting {serverUrl[env]}/v1/dataset/{dataset['id']}... ")
                status = deleteAssets(f"{serverUrl[env]}/v1/dataset/{dataset['id']}", headers)
                    
            except re.exceptions.HTTPError as err:
                logger.error(err)
                pass
    else:
        print('nothing was deleted')

def assetIdToBeSync(sync, syncList, assetToSync, fromEnv, toEnv):
    '''
    controls the asset id to be sync
    '''
    if sync:
        assetId = False
        for asset in syncList:
            if asset.get('type') == assetToSync.get('type') and asset.get(f'{fromEnv}Id') == assetToSync.get('id'):
                assetId = asset.get(f'{toEnv}Id')
        return assetId

    else:
        return None
    
def copyAssets(assetList, sync=False, removeAssets=False, fromEnv='prod', toEnv='staging'):
    '''
    Creates a new copy or syncs the assets that we set up in the fromEnv into the destination Env 
    '''
    if fromEnv == toEnv:
        raise NameError(f'fromEnv:{fromEnv} and toEnv:{toEnv} cannot be the same')
        
    if not assetList or len(assetList) == 0:
        raise IndexError(f'asset list is empty or not defined')
        
    
    dataAssets = []  

    if sync:
        newDatasetList = [asset[f'{fromEnv}Id'] for asset in assetList if asset['type'] == 'dataset']
        dataAssets = getAssetList(fromEnv, newDatasetList)
    else:   
        dataAssets = getAssetList(fromEnv, assetList)

    # @TODO:
    # Improve loop performance with multiprocessing
    # move loops into reusable function based on type
    # For sync only patch updated data

    for dataset in dataAssets['data']:
        try:
            print(f'{bcolors.OKBLUE}Preparing to {"sync" if sync else "copy"} from {fromEnv} to {toEnv}...{bcolors.ENDC}')
            
            resources = [] # Move this to dataset level as syncfiles are created per dataset now.
            
            toDatasetId = assetIdToBeSync(sync, assetList, dataset, fromEnv, toEnv)
            if toDatasetId:
                logger.info(f'sync [{fromEnv}]dataset: {dataset.get("id")}')
                logger.info(f'with [{toEnv}]dataset: {toDatasetId}')
            newDataset = recreateDataset(dataset, toEnv, toDatasetId)

            resources.append({
                'type': 'dataset',
                f'{fromEnv}Id':dataset.get('id'),
                f'{toEnv}Id': newDataset['data'].get('id')
            })

            for vocabulary in dataset['attributes'].get('vocabulary'):
                newVocabulary = recreateVocabulary(newDataset['data'].get('id'), vocabulary, toEnv)
                
                resources.append({
                'type': 'vocabulary',
                f'{fromEnv}Id':vocabulary.get('id'),
                f'{toEnv}Id': newVocabulary['data']
            })

            # sync layers
            for layer in dataset['attributes'].get('layer'):
                
                toLayerId = assetIdToBeSync(sync, assetList, layer, fromEnv, toEnv)
                if toLayerId:
                    logger.info(f'sync [{fromEnv}]layer: {layer.get("id")}')
                    logger.info(f'with [{toEnv}]layer: {toLayerId}')
                
                newLayer = recreateLayer(newDataset['data'].get('id'), layer, toEnv, toLayerId)
                
                resources.append({
                'type': 'layer',
                f'{fromEnv}Id':layer.get('id'),
                f'{toEnv}Id': newLayer['data'].get('id')
                })
                
                fromLayerMetadata = getSubAssetMetadata(dataset.get("id"), layerId=layer.get("id"), fromEnv=fromEnv)
                
                if fromLayerMetadata:
                    for layerMetadata in fromLayerMetadata.get('data', {}):
                        logger.info('creating metadata for layer...')
                        newMetadata = recreateMetadata(newDataset['data'].get('id'), layerMetadata, layerId=newLayer['data'].get('id'), toEnv=toEnv)
                        
                        resources.append({
                        'type': 'metadata',
                        f'{fromEnv}Id':layerMetadata.get('id'),
                        f'{toEnv}Id': newMetadata['data']
                        })
            # remove toEnv layers that are not on fromEnv using a safe net 
            if removeAssets:        
                for layer in getAssetList(toEnv, [toDatasetId])['data'][0]['attributes'].get('layer'):
                    if layer.get("id") not in [asset[f'{toEnv}Id'] for asset in resources if asset['type'] == 'layer']:
                        headers = setTokenHeader(toEnv)
                        serverUrl = {
                            'prod': prod_server,
                            'staging': staging_server
                        }
                        url = f'{serverUrl[toEnv]}/v1/dataset/{toDatasetId}/layer/{layer.get("id")}'
                        deleteAssets(url, headers)   
            
            # sync widgets
            for widget in dataset['attributes'].get('widget'):
                
                toWidgetId = assetIdToBeSync(sync, assetList, widget, fromEnv, toEnv)
                if toWidgetId:
                    logger.info(f'sync [{fromEnv}]widget: {widget.get("id")}')
                    logger.info(f'with [{toEnv}]widget: {toWidgetId}')
                    
                newWidget = recreateWidget(newDataset['data'].get('id'), widget, toEnv, toWidgetId)                
                resources.append({
                'type': 'widget',
                f'{fromEnv}Id':widget.get('id'),
                f'{toEnv}Id': newWidget['data'].get('id')
                })
                
                fromWidgetMetadata = getSubAssetMetadata(dataset.get("id"), widgetId=widget.get("id"), fromEnv=fromEnv)
                
                if fromWidgetMetadata:
                    for widgetMetadata in fromWidgetMetadata.get('data', {}):
                        logger.info('creating metadata for widget...')
                        newMetadata = recreateMetadata(newDataset['data'].get('id'), widgetMetadata, widgetId=newWidget['data'].get('id'), toEnv=toEnv)
                        
                        resources.append({
                        'type': 'metadata',
                        f'{fromEnv}Id':widgetMetadata.get('id'),
                        f'{toEnv}Id': newMetadata['data']
                        })
            # remove toEnv widgets that are not on fromEnv using a safe net
            if removeAssets:          
                for widget in getAssetList(toEnv, [toDatasetId])['data'][0]['attributes'].get('widget'):
                    if widget.get("id") not in [asset[f'{toEnv}Id'] for asset in resources if asset['type'] == 'widget']:
                        headers = setTokenHeader(toEnv)
                        serverUrl = {
                            'prod': prod_server,
                            'staging': staging_server
                        }
                        url = f'{serverUrl[toEnv]}/v1/dataset/{toDatasetId}/widget/{widget.get("id")}'
                        deleteAssets(url, headers)       

            for metadata in dataset['attributes'].get('metadata'):
                logger.info('creating metadata')
                newMetadata = recreateMetadata(newDataset['data'].get('id'), metadata, toEnv=toEnv)
                
                resources.append({
                'type': 'metadata',
                f'{fromEnv}Id':metadata.get('id'),
                f'{toEnv}Id': newMetadata['data']
                })
            
            ## Here we will add the logic to create the sync files.
        except NameError or IndexError as e:
            logger.error(e)
            raise e
        except:
            pass
    
        # We are assuming that the first item in the resources is a dataset.
        filename = f'dataset_sync_files/RW_prod_staging_match_{resources[0]["prodId"]}.json'
        try:
            ### The logic here is try to see if the file already exists and reads it
            ### if not it will create it.
            fileExists = os.path.exists(filename)
            if len(resources) > 0:
                with open(filename, 'w+') as outfile:
                    if fileExists:
                        oldfile = json.load(outfile) # we save here the old sync data.
                        # Here there are a couple of drivers: 
                        # Do we consider that the latest version of sync file generated is the right one? 
                        # What if there is a failure?
                        # Do we want to combine them? on the old code i'm seeing an assumption 
                        # related metadata being the latest thing.
                        difference = list(dictdiffer.diff(resources, oldfile))
                        if difference == []:
                            break
                        else:
                            writeOptions = {
                                'Y': resources,
                                'N': oldfile,
                                'M': dictdiffer.patch(difference, resources) 
                                }
                            for diff in difference:         
                                print(diff)
                            userConfirmation = input(f'{bcolors.WARNING} Do you want to overwrite or merge \
                                {str(oldfile)}  with  {str(resources)}:{bcolors.ENDC} \
                                Y/M/N') or "N"
                            if userConfirmation not in ('Y', 'N', 'M'):
                                raise NameError(f'User confirmation option not valid: {userConfirmation}')
                            
                            json.dump(writeOptions[userConfirmation], outfile, sort_keys=True)
                    else:
                        json.dump(resources, outfile, sort_keys=True)
                
                print(f'{bcolors.OKGREEN}{"sync" if sync else "copy"} process finished{bcolors.ENDC}')
                return filename
        except Error as e:
            raise e
        
def syncAssets(syncList, remove = False, fromEnv='prod', toEnv='staging'):
    '''
    Allows sync of Assets
    '''
    return copyAssets(syncList, True, remove, fromEnv, toEnv)

# Processing
## Get list of assets that we want to modify or sync

#### List of assets:

* `datasetsProd` will contain the id of the assets in productioon that need to be migrated to `staging`. We need to make sure that this list is in sync with the document we have shared with the assets.

### For testing purposes
Dummy assests to create assets in production environment

In [8]:
# Dummy data to test the notebook: creation of a dummy dataset with a layer in production.
toEnv = 'prod'
serverUrl = {
        'prod': prod_server,
        'staging': staging_server
    }
headers = setTokenHeader(toEnv)
urlDataset = f'{serverUrl[toEnv]}/v1/dataset'
bodyDataset = {'dataset':{
    'application': ['rw'],
    'name': 'This is a test',
    'connectorType': 'rest',
    'provider': 'cartodb',
    'published': False,
    'overwrite': False,
    'protected':False,
    'env': 'production',
    'connectorUrl': "https://wri-rw.carto.com/api/v2/sql?q=select * from air_temo_anomalies"
    }
}

responseDataset = postAssets(urlDataset, bodyDataset, headers)
responseDataset

{'data': {'id': '6a3aa408-b3d3-44c6-89b7-93fbfa545489',
  'type': 'dataset',
  'attributes': {'name': 'This is a test',
   'slug': 'This-is-a-test_9',
   'type': None,
   'subtitle': None,
   'application': ['rw'],
   'dataPath': None,
   'attributesPath': None,
   'connectorType': 'rest',
   'provider': 'cartodb',
   'userId': '57a0aa1071e394dd32ffe137',
   'connectorUrl': 'https://wri-rw.carto.com/api/v2/sql?q=select * from air_temo_anomalies',
   'sources': [],
   'tableName': 'air_temo_anomalies',
   'status': 'pending',
   'published': False,
   'overwrite': False,
   'mainDateField': None,
   'env': 'production',
   'geoInfo': False,
   'protected': False,
   'legend': {'date': [],
    'region': [],
    'country': [],
    'nested': [],
    'integer': [],
    'short': [],
    'byte': [],
    'double': [],
    'float': [],
    'half_float': [],
    'scaled_float': [],
    'boolean': [],
    'binary': [],
    'text': [],
    'keyword': []},
   'clonedHost': {},
   'errorMessage': No

In [9]:
urlLayer = f'{urlDataset}/{responseDataset["data"].get("id")}/layer'
bodyLayer = {
        'application': ['rw'],
        'name': 'test-121',
        'provider': 'cartodb',
        'default': True,
        'published': False,
        'env': 'production',
        'layerConfig': {
            "body": {}
            },
        'legendConfig': {},
        'interactionConfig': {},
        'applicationConfig': {}
    }
responseLayer = postAssets(urlLayer, bodyLayer, headers)
responseLayer

{'data': {'id': 'c21dd7ab-e729-4811-9433-8333b1d7c9e9',
  'type': 'layer',
  'attributes': {'name': 'test-121',
   'slug': 'test-121_2',
   'dataset': '6a3aa408-b3d3-44c6-89b7-93fbfa545489',
   'application': ['rw'],
   'iso': [],
   'provider': 'cartodb',
   'userId': '57a0aa1071e394dd32ffe137',
   'default': True,
   'protected': False,
   'published': False,
   'env': 'production',
   'layerConfig': {'body': {}},
   'legendConfig': {},
   'interactionConfig': {},
   'applicationConfig': {},
   'staticImageConfig': {},
   'createdAt': '2021-06-07T09:36:15.327Z',
   'updatedAt': '2021-06-07T09:36:15.327Z'}}}

In [10]:
urlWidget = f'{urlDataset}/{responseDataset["data"].get("id")}/widget'
bodyWidget = {
        'application': ['rw'],
        'name': 'test-121',
        'default': True,
        'published': False,
        'env': 'production',
        'widgetConfig': {
            "body": {}
            }
    }
responseWidget = postAssets(urlWidget, bodyWidget, headers)
responseWidget

{'data': {'id': '5f169df0-a293-4588-bbcd-521ee9484cd6',
  'type': 'widget',
  'attributes': {'name': 'test-121',
   'dataset': '6a3aa408-b3d3-44c6-89b7-93fbfa545489',
   'slug': 'test-121_2',
   'userId': '57a0aa1071e394dd32ffe137',
   'application': ['rw'],
   'verified': False,
   'default': True,
   'protected': False,
   'defaultEditableWidget': False,
   'published': False,
   'freeze': False,
   'env': 'production',
   'widgetConfig': {'body': {}},
   'template': False,
   'createdAt': '2021-06-07T09:36:17.153Z',
   'updatedAt': '2021-06-07T09:36:17.154Z'}}}

In [11]:
urlVocabulary = f'{urlDataset}/{responseDataset["data"].get("id")}/vocabulary/knowledge_graph'
bodyVocabulary = {
        'application': 'rw',
        'tags':["geospatial"]
    }
responseVocabulary = postAssets(urlVocabulary, bodyVocabulary, headers)
responseVocabulary

{'data': [{'id': 'knowledge_graph',
   'type': 'vocabulary',
   'attributes': {'tags': ['geospatial'],
    'name': 'knowledge_graph',
    'application': 'rw'}}]}

In [12]:
urlMetadataDataset = f'{urlDataset}/{responseDataset["data"].get("id")}/metadata'
bodyMetadataDataset = {
        'application': 'rw',
        'language':'ENG',
        'name':'this is a dummy dataset',
        'description':'Lorem Ipsum'
    }
responseMetadataDataset = postAssets(urlMetadataDataset, bodyMetadataDataset, headers)
responseMetadataDataset

{'data': [{'id': '60bde8962852be001ba7e42b',
   'type': 'metadata',
   'attributes': {'dataset': '6a3aa408-b3d3-44c6-89b7-93fbfa545489',
    'application': 'rw',
    'resource': {'id': '6a3aa408-b3d3-44c6-89b7-93fbfa545489',
     'type': 'dataset'},
    'language': 'eng',
    'name': 'this is a dummy dataset',
    'description': 'Lorem Ipsum',
    'createdAt': '2021-06-07T09:36:22.304Z',
    'updatedAt': '2021-06-07T09:36:22.304Z',
    'status': 'published'}}]}

In [None]:
urlMetadataLayer = f'{urlDataset}/{responseDataset["data"].get("id")}/layer/{responseLayer["data"].get("id")}/metadata'
bodyMetadataLayer = {
        'application': 'rw',
        'language':'ENG',
        'name':'this is a dummy Layer',
        'description':'Lorem Ipsum'
    }
responseMetadataLayer = postAssets(urlMetadataLayer, bodyMetadataLayer, headers)
responseMetadataLayer

In [13]:
urlMetadatawidget = f'{urlDataset}/{responseDataset["data"].get("id")}/widget/{responseWidget["data"].get("id")}/metadata'
bodyMetadatawidget = {
        'application': 'rw',
        'language':'ENG',
        'name':'this is a dummy widget',
        'description':'Lorem Ipsum'
    }
responseMetadatawidget = postAssets(urlMetadatawidget, bodyMetadatawidget, headers)
responseMetadatawidget

{'data': [{'id': '60bde89a3cc064001b3675b9',
   'type': 'metadata',
   'attributes': {'dataset': '6a3aa408-b3d3-44c6-89b7-93fbfa545489',
    'application': 'rw',
    'resource': {'id': '5f169df0-a293-4588-bbcd-521ee9484cd6',
     'type': 'widget'},
    'language': 'eng',
    'name': 'this is a dummy widget',
    'description': 'Lorem Ipsum',
    'createdAt': '2021-06-07T09:36:26.194Z',
    'updatedAt': '2021-06-07T09:36:26.194Z',
    'status': 'published'}}]}

#### List of assets:

* we need to make sure that this list is in sync with the document we have shared with the assets

In [14]:
# in the future we can automate this listing based on the doc using the google sheet api both for writing and reading from
# providing a sample of the list by printing it
datasetsProd = [responseDataset['data']['id']]
datasetsProd

['6a3aa408-b3d3-44c6-89b7-93fbfa545489']

### Backup Data in both environments

In [None]:
#backupAssets('prod')
#backupAssets('staging')

### Only do this if you want to clean data in staging. 
* You will need to be logged in

In [None]:
#deleteDataFrom()

### Copy resources from production to staging. 
The running time will depend on the size of the asset.   
Running this cell is only needed to create new assets from `production` to `staging`.
A json file is created with a unique name in local. The json files contains for each assest:
- type: this can be a "layer", a "dataset", a "widget", "vocabulary", "metadata"
- prodId: the id of the item in `production`
- stagingId: the id of the item in `staging`

In [18]:
# enter the API ID of the dataset on production to copy/sync here
prod_API_ID = ['']# ex: '79e06dd8-a2ae-45eb-8e99-e73bc87ec946'
# keep the syncFile list empty
syncFile = []

In [12]:
# copy a dataset on production to staging
for datasetId in prod_API_ID:
    syncFile.append(copyAssets([datasetId], False, fromEnv='prod', toEnv = 'staging'))
for syncfile in syncFile:
    with open(syncfile) as json_file:
        syncList = json.load(json_file)
    syncAssets(syncList, fromEnv='prod', toEnv='staging')

[94mPreparing to copy from prod to staging...[0m


ERROR:root:response: 
ERROR:root:<Response [400]>
ERROR:root:url: 
ERROR:root:https://staging-api.resourcewatch.org/v1/dataset/find-by-ids
ERROR:root:body: 
ERROR:root:{"ids": [null]}


creating sync file with name: dataset_sync_files/RW_prod_staging_match_42859b52-31f2-419c-ac14-8b0cbd6bbb6f.json
[92mcopy process finished[0m


INFO:root:sync [prod]dataset: 42859b52-31f2-419c-ac14-8b0cbd6bbb6f
INFO:root:with [staging]dataset: e95fe72e-eb7f-486c-ad0e-b0cc52ac3b94


[94mPreparing to sync from prod to staging...[0m
update sync file dataset_sync_files/RW_prod_staging_match_42859b52-31f2-419c-ac14-8b0cbd6bbb6f.json
[92msync process finished[0m


INFO:root:sync [prod]dataset: 42859b52-31f2-419c-ac14-8b0cbd6bbb6f
INFO:root:with [staging]dataset: e95fe72e-eb7f-486c-ad0e-b0cc52ac3b94


[94mPreparing to sync from prod to staging...[0m


INFO:root:sync [prod]widget: 2cb5af4f-2bfc-49f3-9f99-ac415e98c7db
INFO:root:with [staging]widget: 1804d8e0-0de5-4b9a-8ecd-b55c9ff176fb


update sync file dataset_sync_files/RW_prod_staging_match_42859b52-31f2-419c-ac14-8b0cbd6bbb6f.json
[92msync process finished[0m


### Open sync list of assets, match items with list and update them.

In [19]:
# sync dataset production <> staging
# use the printed json filename in the previous cell
if len(syncFile)==0:
    syncFile = [f'dataset_sync_files/RW_prod_staging_match_{datasetId}.json' for datasetId in prod_API_ID]
for syncfile in syncFile:
    with open(syncfile) as json_file:
        syncList = json.load(json_file)

    syncAssets(syncList, fromEnv='prod', toEnv='staging')

INFO:root:sync [prod]dataset: 42859b52-31f2-419c-ac14-8b0cbd6bbb6f
INFO:root:with [staging]dataset: 05f90e71-fef4-445c-82d9-65e77d732494


[94mPreparing to sync from prod to staging...[0m


INFO:root:sync [prod]widget: 2cb5af4f-2bfc-49f3-9f99-ac415e98c7db
INFO:root:with [staging]widget: eedaa69b-7d14-4541-9a0c-1033bcddd072


update sync file dataset_sync_files/RW_prod_staging_match_42859b52-31f2-419c-ac14-8b0cbd6bbb6f.json
[92msync process finished[0m


In [17]:
# delete testing datasets from both envs after testing:
deleteDataFrom('prod', [responseDataset['data']['id']])

[93mAre you sure you want to delete         ['6a3aa408-b3d3-44c6-89b7-93fbfa545489'] in prod:[0m         Y/n Y


INFO:root:deleting https://api.resourcewatch.org/v1/dataset/6a3aa408-b3d3-44c6-89b7-93fbfa545489... 


In [16]:
deleteDataFrom('staging', [syncList[0]['stagingId']])

INFO:root:deleting https://staging-api.resourcewatch.org/v1/dataset/e95fe72e-eb7f-486c-ad0e-b0cc52ac3b94... 
