# Initialisation

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import requests
import operator
from tabulate import tabulate
import json
import zipfile
import os
import io as io #Made in Python 3.x Replace io by StringIO module with the io module. For use in Python 2.x use
import shutil
import mutagen.mp3 as mp3 #pip install mutagen
#import StringIO as io
baseURL = "http://lp-sandbox.ekstep.org:8080/taxonomy-service/v2/analytics/getContent/"
listURL = "http://lp-sandbox.ekstep.org:8080/taxonomy-service/v2/analytics/content/list"
url = listURL
resp = requests.get(url).json()

In [2]:
#This gives the keys of any dictionary in sorted order
#It returns None value if it is not a dictionary and raises an error message
#If you wish to debug the code then pass a "verbose=true" argument
def get_keys(obj,**kwargs):
    a=False
    try:
        if(kwargs["verbose"]):
            a=True
    except KeyError:
        ;
    if(isinstance(obj,dict)):
        key_list=list(obj.keys())
        key_list.sort()
        if(len(key_list)>0):
            return key_list
        else:
            if(a):
                print("No keys. Empty dictionary")
            return
    else:
        if(a):
            print("This is not a dictionary")
        return

## Extract Parameters from Metadata to be used in Downloads

In [3]:
#From the json metadata, it extracts the downloadUrl and the identifier
def get_download_parameters(obj,**kwargs):
    verbose=False
    try:
        if(kwargs["verbose"]):
            verbose=True
    except KeyError:
        ;
    url=None
    identifier=None
    if(isinstance(obj,dict)):
        for i in get_keys(obj):
            if(i=="downloadUrl"):
                url=obj[i]
            if(i=="identifier"):
                identifier=obj[i]
    if(url==None and verbose):
        print("URL does not exist")
    elif(identifier==None and verbose):
        print("Identifier does not exist")
    else:
        return url,identifier

## Download a File given URL and Folder Name

In [4]:
#Downloads file and saves it in folder identifier in the current directory
def download_file(url,identifier,**kwargs):
    verbose=False
    try:
        if(kwargs["verbose"]):
            verbose=True
    except KeyError:
        ;
    if(url==None or identifier==None):
        if(verbose):
            print("One of the inputs is None")
        return False
    r=requests.get(url)
    z=zipfile.ZipFile(io.BytesIO(r.content))
    z.extractall(identifier)
    return r.ok

## Unzip all Files present in a Directory and save them into temp(Directory Name)

In [5]:
#Extracts all files in a directory into temp$directory 
def unzip_files(directory,**kwargs):
    verbose=False
    try:
        if(kwargs["verbose"]):
            verbose=True
    except KeyError:
        ;
    extracted=False
    for dirname, dirnames, filenames in os.walk(directory):
        for subdirname in dirnames:
            string=os.path.join(dirname, subdirname)
        for filename in filenames:
            string=os.path.join(dirname, filename)
            if(string.find(".zip")>=0):
                with zipfile.ZipFile(string, "r") as z:
                    loc="temp"+directory
                    z.extractall(loc)
                if(verbose):
                    print("Extracted",string)
                extracted=True
    if(verbose and not extracted):
        print("No zip file exists")
    return extracted

## Copy folders assets, data and items to temp(Directory name)

In [6]:
#In case there are no zip files then the content was uploaded unzipped and thus we can transfer the 
    #three main folders assets,data,items directly
def copy_main_folders(directory,prefix,**kwargs):
    flag=False
    verbose=False
    try:
        if(kwargs["verbose"]):
            verbose=True
    except KeyError:
        ;
    for root, dirnames, filenames in os.walk(directory):
        for subdirname in dirnames:
            string=os.path.join(root, subdirname)
            loc=prefix+directory+"/"+subdirname
            if(subdirname=="assets"):
                shutil.copytree(string,loc)
                if(verbose):
                    print("Copied",string)
                flag=True
            elif(subdirname=="data"):
                shutil.copytree(string,loc)
                if(verbose):
                    print("Copied",string)           
                flag=True
            elif(subdirname=="items"):
                shutil.copytree(string,loc)
                if(verbose):
                    print("Copied",string)
                flag=True
    if(verbose and not flag):
        print("No folder named assets,data or items")
    return flag

## Combine all of the above to process Downloaded Files into a Suitable Format

In [7]:
def process_entity(obj,**kwargs):
    verb=False
    try:
        if(kwargs["verbose"]):
            verb=True
    except KeyError:
        ;
    url,identifier=get_download_parameters(obj,verbose=verb)
    if(url==None or identifier==None):
        return
    if(not download_file(url,identifier,verbose=verb)):
        return
    if(not unzip_files(identifier,verbose=verb)):
        if(not copy_main_folders(identifier,"temp",verbose=verb)):
            shutil.rmtree(identifier)
            return
    shutil.rmtree(identifier)
    copy_main_folders("temp"+identifier,"_")
    shutil.rmtree("temp"+identifier)
    os.rename("_temp"+identifier,identifier)
    if(verb):
        print("Success "+identifier)
    return identifier

## Get a list of all files in a Directory having any one of the Types in the list Typ (Accepts only a list object)

In [8]:
def get_file_type_directory(directory,typ):
    ls=[]
    for root, dirnames, filenames in os.walk(directory):
        for file in filenames:
            for i in typ:
                if file.endswith("."+i):
                     ls.append(os.path.join(root, file))
    if(len(ls)==0):
        print("No json files found")
        return
    return ls

## Count all files in a Directory having any one of the Types in the list Typ (Accepts only a list object)

In [9]:
def count_file_type_directory(directory,typ):
    x={}
    for i in typ:
        x[i]=0
    for root, dirnames, filenames in os.walk(directory):
        for file in filenames:
            for i in typ:
                if file.endswith("."+i):
                     x[i]+=1
    return x

## Count the Length of all Audio Files in a Directory

In [10]:
def count_MP3_length_directory(directory):
    x=0
    for root, dirnames, filenames in os.walk(directory):
        for file in filenames:
            try:
                if file.endswith(".mp3"):
                    string=os.path.join(root, file)
                    audio = mp3.MP3(string)
                    x+=audio.info.length
            except mp3.HeaderNotFoundError:
                print("Media read error")
    return x

## Get all Values corresponding to all Keys of a Dictionary

In [11]:
def get_all_values(obj):
    x=[]
    if(isinstance(obj,dict)):
        b=get_keys(obj)
        if(b!=None):
            for i in b:
                a=get_all_values(obj[i])
                if(a!=None):
                    for j in a:
                        try:
                            x.index(j)
                        except ValueError:
                            x.append(j)
    elif(isinstance(obj,list)):
        for i in obj:
            a=get_all_values(i)
            if(a!=None):
                for j in a:
                    try:
                        x.index(j)
                    except ValueError:
                        x.append(j)
    else:
        x=[obj]
    if(len(x)>0):
        return x
    else:
        return            

## Extract json data from a List of File Locations

In [12]:
def extract_json_data(json_file_names):
    json_files=[]
    for i in json_file_names:
        lines = [line for line in open(i,encoding="utf8")]
        x=[]
        bracket=0
        k=0
        for ln in lines:
            for char in ln:
                if char=='{':
                    bracket+=1
                elif char=='}':
                    bracket-=1
            if(bracket!=0):
                x.append(ln)
            else:
                x.append("}")
                f=open("temp%d.json"%(k),'w',encoding="utf8")
                for i in x:
                    f.write(i)
                f.close()
                k+=1
                x=[]
        for j in range(0,k):
            try:
                with open("temp%d.json"%(j),encoding="utf8") as f:
                    json_data = json.load(f)
                    json_files.append(get_all_values(json_data))
                    f.close()
                    os.remove("temp%d.json"%(j))
            except ValueError:
                f.close()
                os.remove("temp%d.json"%(j));
    return json_files

## Load the List of Concepts

In [13]:
def load_dict(filename):
    lines = [line.rstrip('\n') for line in open(filename)]
    return lines

In [14]:
conceptList=load_dict("conceptList.txt")

In [15]:
def filter_assessment_data(directory):
    concList=[]
    ls_items=get_file_type_directory(directory+"/items",json_file_type)
    if(ls_items!=None):
        items=extract_json_data(ls_items)
        for i in items[0]:
            try:
                if(i[0:2]=="LD" or i[0:2]=="LO" or i[0:3]=="Num"):
                    try:
                        conceptList.index(i)
                        concList.append(i)
                    except ValueError:
                        ;
            except TypeError:
                ;
    return concList

In [16]:
for i in range(50,60):
    identifier=process_entity(resp["result"]['contents'][i])
    if(identifier!=None):
        print("Identifier",identifier)
        media_type=["mp3","ogg","png","gif","jpg"]
        json_file_type=["json","json.bk"]
        #Count the number of files of a particular media type in the item directory
        print("\nNumber of objects of each type:",count_file_type_directory(identifier+"/assets",media_type))
        #Get the length of all mp3 files in a directory
        print("\nTotal length of MP3 files (seconds):",count_MP3_length_directory(identifier+"/assets"))
        #Extract data from json files
        ls_data=get_file_type_directory(identifier+"/data",json_file_type)
        if(ls_data!=None):
            data=extract_json_data(ls_data)
            print("\nThe stories:\n")
            for i in data:
                for j in i:
                    print("\t",j)
                print("\n")
        #Extract concepts from items folder
        concList=filter_assessment_data(identifier)
        if(len(concList)>0):
            print("\nThe concepts taught:")
            for i in concList:
                print(i)
        #Delete folder created
        shutil.rmtree(identifier)
        print("\n")

Identifier ecml_testbook_gridlayout

Number of objects of each type: {'png': 6, 'jpg': 0, 'ogg': 0, 'gif': 0, 'mp3': 0}

Total length of MP3 files (seconds): 0
No json files found
No json files found


Identifier ecml_testbook_image

Number of objects of each type: {'png': 3, 'jpg': 0, 'ogg': 0, 'gif': 0, 'mp3': 0}

Total length of MP3 files (seconds): 0
No json files found
No json files found


Identifier ecml_testbook_shape

Number of objects of each type: {'png': 2, 'jpg': 0, 'ogg': 0, 'gif': 0, 'mp3': 0}

Total length of MP3 files (seconds): 0
No json files found
No json files found


Identifier ecml_testbook_inputPlugin

Number of objects of each type: {'png': 3, 'jpg': 0, 'ogg': 0, 'gif': 0, 'mp3': 0}

Total length of MP3 files (seconds): 0
No json files found
No json files found


Identifier testbook.ecml.text1

Number of objects of each type: {'png': 3, 'jpg': 0, 'ogg': 0, 'gif': 0, 'mp3': 0}

Total length of MP3 files (seconds): 0
No json files found
No json files found


Iden