In [105]:
from modules.ONS import queries
reload(queries)

index = "ons1515656903908"

models = []
for i in range(9):
    modelName = "ons_model_%d" % (i+1)
    models.append(modelName)
    
weights = [1.0]*9

size = 10

searchTerms = ["rpi", "gender pay gap", "cpi", "gdp", "inflation", "crime", "unemployment", 
              "population", "immigration", "mental health", "london", "london population", 
              "retail price index", "life expectancy", "obesity", "religion", "migration", 
              "poverty", "social media", "employment"]

qidDict = {}
for i in range(len(searchTerms)):
    qidDict[i+1] = searchTerms[i]

In [95]:
from elasticsearch import Elasticsearch

esUrl = "http://localhost:9200"
esClient = Elasticsearch(esUrl, timeout=1000)

from pymongo import MongoClient, ASCENDING, DESCENDING
mongoClient = MongoClient('localhost', 27017)

db = mongoClient.local
collection = db.judgements

def mergeDicts(x, y):
    z = x.copy()   # start with x's keys and values
    z.update(y)    # modifies z with y's keys and values & returns None
    return z

def termQuery(term):
    return {"term": term}

def timeQuery(dateTime):
    return {"timeStamp": dateTime}

# Get date of most recent entries
doc = collection.find().sort([("timeStamp", DESCENDING)]).limit(1).next()
timeStamp = doc["timeStamp"]
    
timeStampQuery = timeQuery(timeStamp)

In [96]:
import numpy as np

MAX_SCORE = 4.0

def idealJudgement(num):
    i = 0
    incremenet = (1.0 / (float(num) - 1.0)) * num
    
    iJ = np.zeros(num)
    val = len(iJ)
    while (val > 0):
        iJ[i] = (val / float(num)) * MAX_SCORE
        i += 1
        val -= incremenet
        
    return iJ

def idealDiscountedCumulativeGain(num):
    idealGain = idealJudgement(num)
    iDCG = np.zeros(num)
    
    total = 0.0
    for i in range(num):
        total += idealGain[i] / float(i+1)
        iDCG[i] = total
    return iDCG

class Judgements(object):
    def __init__(self, qid, judgements):
        self.qid = qid
        self.judgements = judgements
        
    def dcg(self):
        total = 0.0
        
        dcg = []
        
        for i in range(len(self.judgements)):
            judgement = self.judgements[i]
            total += judgement["judgement"] / float(judgement["rank"])
            dcg.append(total)
            
        return np.array(dcg)
    
    def ndcg(self):
        
        dcg = self.dcg()
        idcg = idealDiscountedCumulativeGain(len(dcg))
        
        ndcg = np.zeros(len(dcg))
        
        for i in range(len(ndcg)):
            ndcg[i] = min(1.0, dcg[i] / idcg[i])
        return ndcg
    
    def __iter__(self):
        return self.judgements.__iter__()
    
    def __getitem__(self, i):
        return self.judgements[i]
    
    def __len__(self):
        return len(self.judgements)
    
    def remove(self, item):
        self.judgements.remove(item)

In [117]:
import copy, json

def processTerm(qid, searchTerm, modelWeights, pages=10):
    mongoQuery = mergeDicts(termQuery(searchTerm), timeStampQuery)
    cursor = collection.find(mongoQuery)
    judgementCount = cursor.count()
    
    if (judgementCount > 0):
        judgements = cursor.next()
        modelJudgements = copy.deepcopy(judgements)

        keep = []
        judgementList = modelJudgements["judgements"]["judgementList"]
        
        # Reset ranks
        for judgement in judgementList:
            judgement["rank"] = -1
        
        rescoreQueries = queries.getRescoreQueriesForModels(searchTerm, models, modelWeights)
        for page in range(1, pages+1):
            fromParam = (page - 1) * size
            esQuery = queries.getBaseQuery(searchTerm, rescoreQueries, fromParam, size)
            hits = esClient.search(index=index, body=esQuery)
            
#             print "Got %d hits for page %d" % (len(hits["hits"]["hits"]), page)
            
            searchResults = []
            for hit in hits["hits"]["hits"]:
                searchResults.append( hit["_id"] )
            
            # Check for matches
            for judgement in judgementList:
                url = judgement["attrs"]["uri"]
                if (url in searchResults):
                    newRank = int(((page - 1) * 10.0) + searchResults.index(url) + 1)
                    judgement["rank"] = newRank
                    j = copy.deepcopy(judgement)
                    j["rank"] = newRank
                    keep.append(j)
                    
        if (len(keep) > 1):
            judgementList = Judgements(qid, keep)
            ndcg = judgementList.ndcg()
            print searchTerm, ndcg.mean()
        

In [118]:
# processTerm(1, "gender pay gap", weights)
for qid in qidDict:
    processTerm(qid, qidDict[qid], weights)

1
rpi 0.751859317226
1
gender pay gap 0.381042769502
1
cpi 0.894613719614
1
gdp 0.804628666513
1
inflation 0.325583012225
1
crime 0.631960822418
1
unemployment 0.214898344191
1
population 0.100648159892
1
immigration 0.788106496236
1
mental health 0.699182670488
1
1
london population 1.0
1
retail price index 0.614907603821
1
life expectancy 0.0334934224896
0
1
religion 0.568614989855
1
migration 0.699754836336
1
poverty 0.497377146226
1
social media 0.96152245441
1
employment 0.585448240629
