In [10]:
# mocks the data for Pratham usecase V1 and recommends content based on content popularity
import csv
import sys
import collections
import os.path
import requests

# on exit clean-ups
import atexit

# cassandra libs
from cassandra.cluster import Cluster
from cassandra.query import dict_factory


# neo4j libs
from py2neo import Graph
from py2neo import Node, Relationship
from py2neo import authenticate
from random import randint

# neo4j graph connector
authenticate("localhost:7474", "neo4j", "1sTep123")
graph = Graph()
# delete entire graph
graph.delete_all()



# bool flag database connections
cassandraDbOn=False
neo4jDbOn=False

def dbCleanUP(cassandraDbOn,neo4jDbOn):
    if cassandraDbOn:
    	print 'cleaning Cassandra state'
    	session.shutdown();
    	cluster.shutdown();

atexit.register(dbCleanUP,True,True)

# setup cassandra connection
cassandraDbOn=True
cluster = Cluster()
session = cluster.connect('learner_db')

# set response schema to Dictionaries
session.row_factory = dict_factory

# process learner-db
# move content summary table
def movecontentsideloadingsummary():
    graph = Graph()

    cids = session.execute("SELECT DISTINCT content_id from content_sideloading_summary")
    for cid in cids:
        uid = cid['content_id']
        print("** Content:",uid)

        node=Node("Content",id=uid)
        graph.merge(node,"Content","id")

        contentDict = session.execute("SELECT * from content_sideloading_summary WHERE id='" + uid + "'")[0]
        cid = contentDict['content_id']
        
        #if (contentDict.has_key('total_count')):
        #    total_count = contentDict['total_count']
        #    node.properties['content_popularity'] = total_count
        #    node.push()
        node.properties['content_popularity'] = randint(1,100)
        node.push()
        
        print('content: ', cid, 'content_popularity: ',total_count)

# move concept map 
def moveConceptMap():
    # neo4j graph connector
    graph = Graph()
    # delete entire graph

    url="http://lp-sandbox.ekstep.org:8080/taxonomy-service/v2/analytics/domain/map"
    resp = requests.get(url).json()

    # move all concepts
    conceptList = resp["result"]["concepts"]
    for conceptDict in conceptList:
        identifier=None
        ASERlevel=None
        if(not conceptDict.has_key('identifier')):
            continue

        identifier = conceptDict['identifier']
        # create/find node
        node = graph.merge_one("Concept","id",identifier)

        if(conceptDict.has_key('subject')):
            subject = conceptDict['subject']
            node.properties["subject"]=subject
            node.push()

        if(conceptDict.has_key('objectType')):
            objectType = conceptDict['objectType']
            node.properties["objectType"]=objectType
            node.push()
        node.properties['tags'] ="ASERlevel_"+str(randint(1,5))
        node.push()

        # move all relations
        relationList = resp["result"]["relations"]
    for relationDict in relationList:

        if (not relationDict.has_key('startNodeId') ):
            continue
        if (not relationDict.has_key('endNodeId') ):
            continue
        if (not relationDict.has_key('relationType') ):
            continue
        startNodeId = relationDict['startNodeId']
        endNodeId = relationDict['endNodeId']
        relationType = relationDict['relationType']
        node1 = graph.merge_one("Concept","id",startNodeId)
        node2 = graph.merge_one("Concept","id",endNodeId)
        graph.create(Relationship(node1, relationType, node2))

def moveContentModel():
    baseURL = "http://lp-sandbox.ekstep.org:8080/taxonomy-service/v2/analytics/getContent/"
    listURL = "http://lp-sandbox.ekstep.org:8080/taxonomy-service/v2/analytics/content/list"

    # neo4j graph connector
    graph = Graph()
    
    url = listURL
    resp = requests.get(url).json()
    # no of content
    contentList = resp["result"]["contents"]
    for contentListDict in contentList:
        # check if there is an identifier for this content
        if(not contentListDict.has_key('identifier')):
            continue
    
        # check if there is an identifier for this content
        identifier = contentListDict['identifier']

        # create a node for this Content
        node = graph.merge_one("Content","id",identifier)
       
        url = baseURL + identifier
        resp = requests.get(url)

        if(resp.status_code!=200):
            continue

        resp =  resp.json()

        concept=None
        Subject=None
        ASERlevel=None
        content_popularity=None
        
        contentDict = resp["result"]["content"]

    
        if(contentDict.has_key('concepts')):
            # this forms a "relationship" in the graph
            concepts = contentDict['concepts']
        
        if(contentDict.has_key('Subject')):
            # this forms a "relationship" in the graph
            Subject = contentDict['Subject']
            node.properties['Subject'] = Subject
            node.push()
            
        node.properties['tags'] = "ASERlevel_"+str(randint(1,5))
        node.push()
        #updating content popularity as a tag
        node.properties['content_popularity'] = randint(1,100)
        node.push()

print('................')
moveConceptMap();
print('Concept Map populated')
# content model
print('................')
moveContentModel();
print('Content Model populated')
print('................')
#movecontentsideloadingsummary();
print('Content popularity score updated')

................
Concept Map populated
................
Content Model populated
................
Content popularity score updated


In [60]:
sub="numeracy"
level="ASERlevel_2"
#content for exact match of level
contentRec1= graph.cypher.execute("MATCH (b:Content) WHERE (lower(b.Subject)=~ '(?i)"+sub+"') AND ( has(b.tags)) AND ('"+level+"' IN (b.tags))  Return b.id AS Content, b.tags AS tag, b.content_popularity AS popularity ORDER BY popularity DESC")
# content for match at domain level-optional match
contentRec= graph.cypher.execute("MATCH (b:Content) WHERE lower(b.Subject)=~ '(?i)"+sub+"' OPTIONAL MATCH (b) WHERE ( has(b.tags)) AND ('"+level+"' IN (b.tags))  Return b.id AS Content, b.tags AS tag, b.content_popularity AS popularity ORDER BY tag DESC,popularity DESC")

print('Recomended content for ASER level match:')
print(contentRec1)
print('Recomended content for \'subject\' level match :')
print(contentRec)


Recomended content for ASER level match:
   | Content                      | tag         | popularity
---+------------------------------+-------------+------------
 1 | org.ekstep.num.scrn.basic    | ASERlevel_2 |         40
 2 | org.ekstep.ordinal.worksheet | ASERlevel_2 |         18
 3 | org.ekstep.math.magic        | ASERlevel_2 |         16
 4 | numeracy_365                 | ASERlevel_2 |         13

Recomended content for 'subject' level match :
    | Content                       | tag         | popularity
----+-------------------------------+-------------+------------
  1 | org.ekstep.hindi.num.activity | ASERlevel_5 |         92
  2 | numeracy_366                  | ASERlevel_5 |         87
  3 | org.ekstep.addobj.worksheet   | ASERlevel_5 |         48
  4 | org.ekstep.eng.num.activity   | ASERlevel_5 |         47
  5 | org.ekstep.delta              | ASERlevel_5 |         41
  6 | org.ekstep.time.worksheet     | ASERlevel_5 |         23
  7 | numeracy_374                  | A