# Install dependencies

In [None]:
%%bash
pip install wds-client --upgrade
pip install chess
pip install terra-notebook-utils --user

In [52]:
import chess.pgn
import requests
import os
import json
import wds_client
from terra_notebook_utils import azure_auth

# Define constants

In [53]:
DOMAIN = 'rtitle-azure.bee.envs-terra.bio'
WSM_BASE_URL = f'https://workspace.{DOMAIN}/api/workspaces/v1'
LEO_BASE_URL = f'https://leonardo.{DOMAIN}/api/apps/v2'
HEADERS = {
    "Authorization": f"Bearer {azure_token}",
    "Accept": "application/json"
}
WORKSPACE_ID = os.environ['WORKSPACE_ID']
WORKSPACE_STORAGE_CONTAINER_ID = os.environ['WORKSPACE_STORAGE_CONTAINER_ID']
WORKSPACE_STORAGE_CONTAINER_URL = os.environ['WORKSPACE_STORAGE_CONTAINER_URL']
FILE_NAME = 'lichess_robitto_2023-04-10.pgn'
VERSION = 'v0.2'

# Get the Workspace storage container SAS token

In [54]:
azure_token = azure_auth.get_azure_access_token()

In [55]:
def get_wsm_sas_token(workspaceId, storageContainerId):
    """Get SAS token for workspace storage container."""
    uri = f"{WSM_BASE_URL}/{WORKSPACE_ID}/resources/controlled/azure/storageContainer/{WORKSPACE_STORAGE_CONTAINER_ID}/getSasToken"
    response = requests.post(uri, headers=HEADERS)
    status_code = response.status_code
    if status_code != 200:
        return response.text
    return json.loads(response.text)

response = get_wsm_sas_token(WORKSPACE_ID, WORKSPACE_STORAGE_CONTAINER_ID)
sas_token = response['token']
print(sas_token)

sv=2021-12-02&spr=https&st=2023-04-11T04%3A19%3A49Z&se=2023-04-11T05%3A34%3A49Z&sr=c&sp=racwdl&sig=vyH2EZ2a0eIFRsVbH%2B0TRelpxIhc%2FzVUqlK%2FHQ073Zc%3D


# Prepare Workspace Data Service clients

In [56]:
def get_wds_url(workspaceId):
    """Get url for WDS."""
    uri = f"{LEO_BASE_URL}/{workspaceId}?includeDeleted=false"
    response = requests.get(uri, headers=HEADERS)
    status_code = response.status_code
    if status_code != 200:
        return response.text
    return json.loads(response.text)

response = get_wds_url(WORKSPACE_ID)
wds_url = response[0]['proxyUrls']['wds']
print(wds_url)

https://lz7d4295cb61a13b21caa1d6175b3dfd694259689e189d8abb.servicebus.windows.net/wds-49237e9f-7e0b-4f09-8d19-1ff0d668cd3a/wds


In [57]:
api_client = wds_client.ApiClient(header_name='Authorization', header_value="Bearer " + azure_token)
api_client.configuration.host = wds_url

# set up all the clients
records_client = wds_client.RecordsApi(api_client)
generalInfo_instance = wds_client.GeneralWDSInformationApi(api_client)
schema_instance = wds_client.SchemaApi(api_client)
client_instance = wds_client.InstancesApi(api_client)

# Copy the pgn file to disk

In [58]:
pgn_file = f"{WORKSPACE_STORAGE_CONTAINER_URL}/{FILE_NAME}"
pgn_file_with_sas = f"{WORKSPACE_STORAGE_CONTAINER_URL}/{FILE_NAME}?{sas_token}"
! azcopy copy '{pgn_file_with_sas}' .

INFO: Scanning...
INFO: Any empty folders will not be processed, because source and/or destination doesn't have full folder support

Job 39fc5ea9-af32-9040-53c2-daaa05a459dd has started
Log file is located at: /home/jupyter/.azcopy/39fc5ea9-af32-9040-53c2-daaa05a459dd.log

INFO: azcopy: A newer version 10.18.0 is available to downloadning...), 
         
100.0 %, 0 Done, 0 Failed, 0 Pending, 0 Skipped, 0 Total (scanning...), 


Job 39fc5ea9-af32-9040-53c2-daaa05a459dd summary
Elapsed Time (Minutes): 0.0334
Number of File Transfers: 1
Number of Folder Property Transfers: 0
Total Number of Transfers: 1
Number of Transfers Completed: 1
Number of Transfers Failed: 0
Number of Transfers Skipped: 0
TotalBytesTransferred: 11946048
Final Job Status: Completed



# Parse PGN file and upload to WDS in batches

In [60]:
pgn = open(FILE_NAME)
cols = [
    'Event', 'Site', 'Date', 'Round', 'White', 'Black', 'Result', 'BlackElo', 'BlackRatingDiff', 'ECO', 'Termination', 'TimeControl', 'UTCDate', 'UTCTime', 'Variant', 'WhiteElo', 'WhiteRatingDiff'
]
batch_size = 1000
batch_upsert = []
i = 0
last_updated = 0
while True:
    offset = pgn.tell()
    headers = chess.pgn.read_headers(pgn)
    if headers is None:
        break
        
    attrs = {}
    attrs['offset'] = offset
    attrs['pgn_file'] = pgn_file
    for c in cols:
        attrs[c] = headers.get(c, "?")
    record = wds_client.models.BatchRecordRequest(str(i), 'string', attrs)
    operation = wds_client.models.BatchOperation('upsert', record)
    batch_upsert.append(operation)
    i = i + 1
    if i % batch_size == 0: 
        print(f"Uploading games {last_updated+1}-{i}...")
        record = records_client.batch_write_records(WORKSPACE_ID, VERSION, "games", batch_upsert)
        print(record)
        batch_upsert.clear()
        last_updated = i
    
print(f"Uploading games {last_updated+1}-{i}...")
record = records_client.batch_write_records(WORKSPACE_ID, VERSION, "games", batch_upsert)
print(record)
batch_upsert.clear()
    

Uploading games 1-1000...
{'message': 'Huzzah', 'records_modified': 1000}
Uploading games 1001-2000...
{'message': 'Huzzah', 'records_modified': 1000}
Uploading games 2001-3000...
{'message': 'Huzzah', 'records_modified': 1000}
Uploading games 3001-4000...
{'message': 'Huzzah', 'records_modified': 1000}
Uploading games 4001-5000...
{'message': 'Huzzah', 'records_modified': 1000}
Uploading games 5001-6000...
{'message': 'Huzzah', 'records_modified': 1000}
Uploading games 6001-7000...
{'message': 'Huzzah', 'records_modified': 1000}
Uploading games 7001-8000...
{'message': 'Huzzah', 'records_modified': 1000}
Uploading games 8001-9000...
{'message': 'Huzzah', 'records_modified': 1000}
Uploading games 9001-10000...
{'message': 'Huzzah', 'records_modified': 1000}
Uploading games 10001-11000...
{'message': 'Huzzah', 'records_modified': 1000}
Uploading games 11001-12000...
{'message': 'Huzzah', 'records_modified': 1000}
Uploading games 12001-13000...
{'message': 'Huzzah', 'records_modified': 1