In [None]:
import json
import numpy as np
import pandas as pd
import requests
import re
from datetime import datetime
from dateutil import parser
import pytz

"""
read certificates
"""
tokens = {}
with open('D:\\workspace\\certificates.json', 'r') as f:
    certificates = json.loads(f.read())

"""
helpful functions
"""

def authenticate(siteurl, region):
    """
    get authenticate info from vbrick.
    """
    token_url = f'{siteurl}/api/v2/authenticate'
    resp = requests.post(token_url, json= certificates[region])
    return resp.json()


def get_token(siteurl, region):
    """
    get token

    if token not exist or token expiration, get the token from authenticate function again.
    """
    tz = pytz.timezone('UTC')
    now = datetime.now(tz=tz)
    if region not in tokens:
        print('token not exist')
        token_info = authenticate(siteurl, region)
        tokens[region] = token_info
    else:
        expira = parser.parse(tokens[region]['expiration'])
        if now > expira:
            print('get token again.')
            token_info = authenticate(siteurl, region)
            tokens[region] = token_info
        
    return tokens[region]['token']


def get_attendee_details(siteurl, eventid, token):
    """
    get attendee detail list from vbrick.
    """
    attendee_url = f'{siteurl}/api/v2/scheduled-events/{eventid}/post-event-report'
    headers = {
        'Content-Type': 'application/json',
        'Authorization': f'bearer {token}'
    }
    
    records = []
    resp = requests.get(attendee_url, headers=headers)
    scrollid = resp.json()['scrollId']
    attendee_count = resp.json()['attendeeCount']
    extract_sessions(eventid, resp.json()['sessions'], records)

    while (attendee_count > 0 and scrollid is not None):
        resp = requests.get(attendee_url, headers=headers, params={'scrollId': scrollid})
        scrollid = resp.json()['scrollId']
        attendee_count = resp.json()['attendeeCount']
        extract_sessions(eventid, resp.json()['sessions'], records)
    return records


def extract_sessions(eventid, sessions, records):
    for session in sessions:
        record = {}
        record['eventid'] = eventid
        record['user_type'] = session['userType']
        record['name'] = session['name']
        record['email'] = session['email']
        record['ip_address'] = session['ipAddress']
        record['browser'] = session['browser']
        record['device_type'] = session['deviceType']
        record['zone'] = session['zone']
        record['entered_date'] = session['enteredDate']
        record['exited_date'] = session['exitedDate']
        record['session_time'] = session['sessionTime']
        record['viewing_start_time'] = session['viewingStartTime']
        record['viewing_time'] = session['viewingTime']
        record['public_cdn_time'] = session['publicCDNTime']
        record['ecdn_time'] = session['eCDNTime']
        record['attendee_type'] = session['attendeeType']
        records.append(record)

In [None]:
"""
read csv file

filter by condition:
- livestreamaddr should not be blank or Nan
- webexsiteurl should not be test sites
"""
df = pd.read_csv("D:\\workspace\\meetinginfo_by_meetinguuid.csv")
df = df[df['livestreamaddr'].notna() & ~df['webexsiteurl'].str.contains('test')]

In [None]:
%%time
"""
loop dataframe to fetch meeting details
"""
print(f'loop begin: total rows is {df.shape[0]}')

for row in df.itertuples():
    vbricksiteurl = row.vbricksiteurl
    region = re.search('https://(.+?).livestream', vbricksiteurl).group(1)
    eventid = row.vbrickeventid
    token = get_token(vbricksiteurl, region)
    print(f'fetch event {eventid}, region {region}')
    attendee_list = get_attendee_details(vbricksiteurl, eventid, token)
    attendee_df = pd.DataFrame(attendee_list)
    with pd.ExcelWriter('attendee-details.xlsx', mode='a', if_sheet_exists='overlay') as writer:
        attendee_df.to_excel(writer, sheet_name='attendee list', startrow=writer.sheets['attendee list'].max_row, index=False, header=False)