Skip to content

Commit

Permalink
cleanup / quick hack for solr stress test
Browse files Browse the repository at this point in the history
  • Loading branch information
Guenter Hipler committed Feb 17, 2016
1 parent 514a6ea commit 8cadbe2
Show file tree
Hide file tree
Showing 6 changed files with 311 additions and 0 deletions.
File renamed without changes.
23 changes: 23 additions & 0 deletions misc/solrPerformanceTest/MongoWrapper.py
@@ -0,0 +1,23 @@

#from pymongo.connection import Connection
from pymongo import MongoClient
#from bson.binary import Binary



class MongoWrapper():

def __init__(self):
#self.connection = Connection("mongodb://localhost:29017/admin")
self.client = MongoClient("mongodb://sb-db4.swissbib.unibas.ch:29017/admin")
#self.client = MongoClient("mongodb://localhost:29017/admin")
self.solrDB = self.client["solr"]
self.collection = self.solrDB["queries"]

def getCollection(self):
return self.collection


def closeConnections(self):
if not self.client is None:
self.client.close()
20 changes: 20 additions & 0 deletions misc/solrPerformanceTest/README
@@ -0,0 +1,20 @@

Attention: this is just a quick hack to pick up real user requests found in the logs of the productive Solr servers

principal idea (with two steps):
A. readLogs5x.py
- scan the list of logfiles
- use only the params query part
- make some transformations on this part for things we don't want or can't use
(in our case:
-- sru.swissbib.ch is Java based and uses the native binary protocols. These are reflected in the logs and we can't make
http based requests in this test scenamrio
-- filter for facet only requests (at the moment harrdcoded)

- the final request we want to use for our test scenario are stored within schema-less Dcoument storage (in our case MongoDB)
This makes it easy to store additional results (querytime and number of hits) even in parallel


B. RunQueryies.py
- baed on the stored (MongoDB) requests we can make stress tests against Solr servers (hard-coded!)

62 changes: 62 additions & 0 deletions misc/solrPerformanceTest/RunQueries.py
@@ -0,0 +1,62 @@

from MongoWrapper import MongoWrapper
import requests
import json
from StringIO import StringIO
import time
from datetime import datetime, timedelta


class RunQueries():
def __init__(self):


cTimeUTC = datetime.utcnow()
nTList = [str(cTimeUTC.date()),"T",str(cTimeUTC.hour),str(cTimeUTC.minute),str(cTimeUTC.second),"Z"]
self.currentTime = "".join(nTList)


self.mongoWrapper = MongoWrapper()



def startRunning(self):
for doc in self.mongoWrapper.getCollection().find({}):
try:
#print query
query = doc["query"]
#time = doc["time"]
#hits = doc["hits"]
#id = doc["_id"]

#result = requests.get("http://sb-s1.swissbib.unibas.ch:8080/solr/sb-biblio/select",params=query.encode("utf-8"))
result = requests.get("http://sb-s20.swissbib.unibas.ch:8080/solr/sb-biblio/select",params=query.encode("utf-8"))
#result = requests.get("http://search.swissbib.ch/solr/sb-biblio/select",params=query.encode("utf-8"))
text = result.content
io = StringIO(text)
myJson = json.load(io)
queryTime = myJson["responseHeader"]["QTime"]
numberHits = myJson["response"]["numFound"]

doc["solr5QTime" + self.currentTime] = (int)(queryTime)
doc["solr5hits" + self.currentTime] = (int)(numberHits)

self.mongoWrapper.getCollection().save(doc)
#self.mongoWrapper.getCollection().safe(doc,safe=True)


except Exception,ex:
print ex




if __name__ == '__main__':


runner = RunQueries()
runner.startRunning()




48 changes: 48 additions & 0 deletions misc/solrPerformanceTest/notes.txt
@@ -0,0 +1,48 @@
db.queries.find({solr5QTime: {$exists: true}}).count()


db.queries.createIndex( { 'solr5QTime2016-02-10T13523Z': 1 } )
db.queries.find({'solr5QTime2016-02-16T105326Z': {$exists: true}}).count()

solr5QTime2016-02-16T105326Z


db.queries.find( { 'solr5QTime2016-02-16T105326Z': { $gt: 100 } } )

db.queries.find( { 'solr5QTime2016-02-16T105326Z': { $gt: 100 }} )


modifiers : {no_cursor_timeout: True}

queries insgesamt:
101196
kleiner 100
96000




grösser 1000
706



grösser 5000
24

grösser 10000
4


$PYTHON_SOLR/python readLogs5x.py -d/usr/local/swissbib/Staff/gh/solrProdLogs > read.logs.log 2>&1 &


1) Abfragen insgesamt: 76656
less than 100 73270
> 500 511
> 1000 325
> 1500 232
> 2500 197
> 4000 30
longest 5724

158 changes: 158 additions & 0 deletions misc/solrPerformanceTest/readLogs5x.py
@@ -0,0 +1,158 @@
# -*- coding: utf-8 -*-
import sys
#import pymongo.connection
#from pymongo.connection import Connection
#from bson.binary import Binary

from MongoWrapper import MongoWrapper



__author__ = 'swissbib'


import re

class ParseLogLine():
def __init__(self,shortquery,longquery):
self.numberLinesWritten = 0
self.qLine = re.compile("params={(.*?)}")
self.qLineSolr5 = re.compile("params={(.*?)} hits")
self.qOnly = re.compile("&q=(.*?)&")
self.qOnlyList = []
#self.shortqueryFile = open(shortquery,"a")
#self.longqeryFile = open(longquery,"a")

self.numberHits = re.compile("hits=(\d+)")

# QTime=1
self.qTime = re.compile("QTime=(\d+)")

#self.host = "mongodb://admin:ayKejO3k@sb-db5.swissbib.unibas.ch:29017/admin"

self.mongoWrapper = MongoWrapper()


self.jMeterQueries = ["%2Bthe+%2Bart+%2Bof+%2Bcomputer+%2Bprogramming",
"q=Faust","event=newSearcher",
"start=0&q=%2Bsublocal_B1:[*+TO+*]%0a%0a%2B(%0asubtop_swd:[*+TO+*]++OR+%0asubpers_swd",
"Basler+Zeitschrift+für+Geschichte+und+Altertumskunde",
"start=0&q=%2Bsublocal_BY:[*+TO+*]%0a%0a%2B(%0asubtop_idsbb",
"start=0&q=%2B(%0asubtop_rero:[*+TO+*]++OR+%0asubpers_rero:[*+TO+*]",
"start=0&q=%2Bsublocal_BP",
"start=0&q=%2Bsublocal_BU",
"start=0&q=%2Bsublocal_BW",
"start=0&q=%2Bsublocal_G1",
"start=0&q=%2Bsublocal_G3",
"start=0&q=%2Bsublocal_G5",
"start=0&q=%2Bsublocal_G7",
"start=0&q=%2Bsublocal_G9",
"start=0&q=%2Bsublocal_GC",
"start=0&q=%2Bsublocal_GF",
"file=schema.xml&contentType=text/xml",
"file=admin-extra",
"file=solrconfig.xml",
"command=details",
"numTerms=0&show=index",
#"wt=json",
"q=title_long:The+art+of+computer+programming&rows=2",
"touchpoint/perma.do"]
self.relevantQueries = ["facet=true"]


def __del__(self):
#sys.stdout.write("\n".join(self.qOnlyList))
#self.shortqueryFile.write("\n".join(self.qOnlyList))
#self.shortqueryFile.close()
#self.longqeryFile.close()
if not self.mongoWrapper is None:
self.mongoWrapper.closeConnections()



def evaluate(self,line):
#qValue = self.qLine.search(line)
qValue = self.qLineSolr5.search(line)
wanted = False
if qValue:
query = qValue.group(1)
for item in self.jMeterQueries:
if query.find(item) != -1:
return
for item in self.relevantQueries:

if query.find(item) != -1:
wanted = True



if wanted:
q1 = query.replace("&wt=javabin","&wt=json")
q2 = q1.replace("&version=2","")

nHits = self.numberHits.search(line)
qT = self.qTime.search(line)
if nHits and qT:
hits = (int)(nHits.group(1))
time = (int)(qT.group(1))
if hits > 1:

self.numberLinesWritten += 1
newrecord = {
"query":q2,
"time":(int)(time),
"hits":(int)(hits)
}

try:

self.mongoWrapper.getCollection().insert(newrecord)
except Exception as pythonBaseException:

print pythonBaseException


def getNumberOfWrittenQueries(self):
return self.numberLinesWritten




if __name__ == '__main__':

import os
from argparse import ArgumentParser





oParser = ArgumentParser()
oParser.add_argument("-d", "--dir", dest="directory")
oParser.add_argument("-l", "--longqueryFile", dest="wholequeryFile", default="./longquery.txt")
oParser.add_argument("-s", "--shortqueryFile", dest="queryFile", default="./shortquery.txt")



args = oParser.parse_args()


tDir = args.directory

longqueryFile= args.wholequeryFile
shortqueryFile = args.queryFile

numberOfLines = 0
os.chdir(tDir)
for fname in os.listdir(tDir):
sys.stdout.write("".join(["\n\n","-----",fname,"-----","\n"]))
iF = open (fname,"r")
pLog = ParseLogLine(shortqueryFile,longqueryFile)
for line in iF:
pLog.evaluate(line)

numberOfLines += pLog.getNumberOfWrittenQueries()

sys.stdout.write("".join(["\n\n","-----","number of queries: ",str(numberOfLines),"-----","\n"]))


0 comments on commit 8cadbe2

Please sign in to comment.