# In this notebook, we are creating metadata to ingest NBA clips

### List all the files uploaded to NBA folder

In [11]:
"""
Copyright (c) Microsoft Corporation.
Licensed under the MIT license.
"""
from enrichment.metadata_parser.metadata_parser import MetadataParser
from azure.storage.blob import BlobServiceClient
from datetime import datetime

# setting up variables for connection and source to the source storage acount/container
source_account_key = ''
account_name = 'nbavideofootage'
container = 'videos'
source_connection_string = f'DefaultEndpointsProtocol=https;AccountName={account_name};AccountKey={source_account_key};EndpointSuffix=core.windows.net'


In [2]:
source_service = BlobServiceClient.from_connection_string(conn_str=source_connection_string)
source_container_client = source_service.get_container_client(container)

In [None]:
nba_file_names = [x['name'] for x in source_container_client.list_blobs( name_starts_with='footage')]
len(nba_file_names)

### Initialize DataFrame to fill in the information

In [4]:
import pandas as pd
df = pd.DataFrame(nba_file_names)
df.columns = ['matching_video_name']

In [5]:
import datetime
import random

# fake creation dates to avoid going to Youtube API
def random_date(start, end):
    """Generate a random datetime between `start` and `end`"""
    return start + datetime.timedelta(
        # Get a random amount of seconds between `start` and `end`
        seconds=random.randint(0, int((end - start).total_seconds())),
    )

## Define content for NBA metadata

In [6]:
df['video_description'] = df['matching_video_name'].apply(lambda x: x.split('ENT')[0].replace('_',' '))
df['usage_terms'] = 'No Restrictions'
df['video_languages'] = "English"
df['video_languages_code'] = 'en-US'
df['keywords'] = [['Sport','Basketball','NBA'] for i in df.index] 
df['version'] = 1
df['first_creation_date'] = df['version'].apply(lambda x: random_date(pd.to_datetime('2020-01-01'),pd.to_datetime('2022-07-20')))
df['current_version_creation_date'] = df['first_creation_date'].astype(str)
df['first_creation_date'] = df['first_creation_date'].astype(str)
df['file_name'] = df['matching_video_name'].apply(lambda x: x.split('.')[0])
df['data_source'] = 'nba'


In [7]:
list_of_jsons  = df.to_dict('records')

## Convert DataFrame into a list of dictionaries and save those as JSON files

In [10]:
import json

for json_file in list_of_jsons:

    parser = MetadataParser()
    parsed = parser.parse_metadata(json_file)
    
    # Move the video file to a new location
    video_blob  =  source_container_client.get_blob_client(parsed['matching_video_name'])
    target_video_blob =  source_container_client.get_blob_client(f"{parsed['file_name']}/{parsed['matching_video_name']}") 
    target_video_blob.start_copy_from_url(video_blob.url)
    # Upload the Json to a blob in the same directory
    new_blob_name = f"{parsed['file_name']}/{parsed['file_name']}.json"
    blob_client = source_container_client.get_blob_client(new_blob_name)
    try:
        blob_client.upload_blob(json.dumps(parsed), overwrite=True)
    except:
        continue
