In [43]:
import requests
from requests.auth import HTTPBasicAuth
import os
import time
import csv
import numpy as np
from datetime import datetime, timezone
import pandas as pd

# 1) Function for Fetching xAPI Data from Trax LRS 

In [44]:
def fetch_data(API_KEY,user,password,lim,prev,cur):
# Basic authentication parameters for standard HTTP requst

    auth = HTTPBasicAuth(user,password)

# API-URL endpoint for HTTP request 
    api_url = f'https://lrs.vma.lm.lt/trax/api/{API_KEY}/xapi/std/statements'
    headers = {'X-Experience-API-Version': '1.0.0'}
    
# Parameters object enable certain constraints when extracting data
## Parameters can be specifically entered in main function at bottom of the file

# Make sure SINCE and UNTIL parameters are entered in THIS date format: '2023-11-05T14:52:24Z'

    params = {f'limit':{lim},'since':{prev},'until':{cur}}

    try:
        response = requests.get(api_url,headers=headers, params=params, auth=auth)
        response.raise_for_status()
        
        data = response.json()
        return data

    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
        if response.status_code == 500:
            print(f"Server Error: {response.text}")
        return None

# 2) Functions for cleaning and changing JSON data to Tabular format

In [45]:
# This method takes the JSON data and checks/extracts data from dictionary json object before being stored in 2-D array for further processing

def data_cleaning(json_data):
    state_IDs = []
    index = 0
    for i in json_data:
        two_D = []


        if i['context']['platform'] == 'Moodle':

            two_D.append(i['id']) if i['id'] else two_D.append(None)

            try:
                objecttype = i['actor']['objectType']
                two_D.append(objecttype)

            except (KeyError, TypeError):

                try:
                    objecttype = i['authority']['objectType']
                    two_D.append(objecttype)

                except (KeyError, TypeError):
                    two_D.append(None)

            try:
                name = i['actor']['name']
                two_D.append(name)

            except (KeyError, TypeError):
                two_D.append(None)

            try:
                email = i['actor']['account']['name']
                two_D.append(email)

            except (KeyError, TypeError):
                try:
                    email = i['actor']['mbox']
                    two_D.append(email)

                except (KeyError, TypeError):
                    two_D.append(None)


            two_D.append(i['verb']['id']) if i['verb']['id'] else two_D.append(None)

            try:
                objectOT = i['object']['objectType']
                two_D.append(objectOT)

            except (KeyError, TypeError):
                two_D.append(None)

    #         two_D.append(i['object']['objectType']) if i['object']['objectType'] else two_D.append(None)
            two_D.append(i['object']['id']) if i['object']['id'] else two_D.append(None)

            dic = i['object']['definition']['name']
            key = next(iter(dic))
            two_D.append(i['object']['definition']['name'][key]) if i['object']['definition']['name'][key] else two_D.append(None) 

            two_D.append(i['context']['platform']) if i['context']['platform'] else two_D.append(None)

            two_D.append(next(iter(i['object']['definition']['name'].keys()))) if i['object']['definition']['name'] else two_D.append(None)
            two_D.append(i['timestamp']) if i['timestamp'] else two_D.append(None)


        else:


            two_D.append(i['id']) if i['id'] else two_D.append(None)
            two_D.append(i['actor']['objectType']) if i['actor']['objectType'] else two_D.append(None)
            two_D.append(i['actor']['name']) if i['actor']['name'] else two_D.append(None)
            two_D.append(i['actor']['mbox']) if i['actor']['mbox'] else two_D.append(None)
            two_D.append(i['verb']['display']['en-US']) if i['verb']['display']['en-US'] else two_D.append(None)
            two_D.append(i['object']['objectType']) if i['object']['objectType'] else two_D.append(None)
            two_D.append(i['object']['id']) if i['object']['id'] else two_D.append(None)

            dic = i['object']['definition']['name']
            key = next(iter(dic))
            two_D.append(i['object']['definition']['name'][key]) if i['object']['definition']['name'][key] else two_D.append(None) 

            two_D.append(i['context']['platform']) if i['context']['platform'] else two_D.append(None)
            two_D.append(i['context']['language']) if i['context']['language'] else two_D.append(None)
            two_D.append(i['timestamp']) if i['timestamp'] else two_D.append(None)


        state_IDs.append(two_D)
    return state_IDs

In [46]:
# Method for formatting email in 'actor_mbox' 
def splitting_mbox(row):
    return row['actor_mbox'].split(':')[-1]

# Method for formatting timestamps
def fix_timestamps(row):
    if row['platform'] != 'Moodle':
        return str(row['timestamp']).split('.')[0] + '+00:00'
    elif row['platform'] == 'Moodle':
        return str(row['timestamp'])


# Method for formating verb column    
def split_verb_display(row):
    if row['platform'] == 'Moodle':
        
        if 'http' in row['verb_display_en']:
            return row['verb_display_en'].split('/')[-1]
        else:
            return row['verb_display_en']
    else:
        return row['verb_display_en']
    
# Method for making the data in the 'platform' column more consistent
def change_CS_uniform_name(row):
    if row['platform'] != 'Moodle':
        return 'Common Space'
    else:
        return row['platform']

# 3) Main Method created to execute all functions in sequential order

In [47]:
def main(api_key,user,password,limit,prev,curr):


    # GETTING DATA
    data = fetch_data(api_key,user,password,limit,prev,curr)

    # CLEANING RAW DATA
    fs = data['statements']
    xdata = data_cleaning(fs)

    # CREATING COLUMNS AND STORING IN DATAFRAME
    col_names = ['statement_ID','actor_obj','actor_name','actor_mbox','verb_display_en','obj_type','obj_id','obj_def_name_en','platform','language','timestamp']
    new_df = pd.DataFrame(xdata,columns=col_names)

    # 2ND ROUND DATA CLEANING

    new_df['timestamp'] = new_df.apply(fix_timestamps,axis=1)

    new_df['verb_display_en'] = new_df.apply(split_verb_display,axis=1)

    new_df['timestamp'] = pd.to_datetime(new_df['timestamp'])
    
    new_df['platform'] = new_df.apply(change_CS_uniform_name,axis=1)

    new_df['actor_mbox'] = new_df.apply(splitting_mbox, axis=1)
    
    
    return new_df



# 4) Execute code to extract data

In [50]:
if __name__ == '__main__':
    
# AUTHENTICATION and API-KEY required
    
    user = 'username_example'
    password = 'password_example'
    api_key = 'api_key_example'

    
# PARAMETERS
# 'prev' and 'curr' stand for 'since' and 'until' parameters to specify which time period to collect data from the LRS 
    limit = 10
    prev = '2023-08-05T14:52:24Z'
    curr = '2023-11-05T14:52:24Z'
    
# Data officially stored as dataframe in df variable and can be used for further analysis

    df = main(api_key,user,password,limit,prev,curr)
    