Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
cleanup / quick hack for solr stress test
- Loading branch information
Guenter Hipler
committed
Feb 17, 2016
1 parent
514a6ea
commit 8cadbe2
Showing
6 changed files
with
311 additions
and
0 deletions.
There are no files selected for viewing
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
|
||
#from pymongo.connection import Connection | ||
from pymongo import MongoClient | ||
#from bson.binary import Binary | ||
|
||
|
||
|
||
class MongoWrapper(): | ||
|
||
def __init__(self): | ||
#self.connection = Connection("mongodb://localhost:29017/admin") | ||
self.client = MongoClient("mongodb://sb-db4.swissbib.unibas.ch:29017/admin") | ||
#self.client = MongoClient("mongodb://localhost:29017/admin") | ||
self.solrDB = self.client["solr"] | ||
self.collection = self.solrDB["queries"] | ||
|
||
def getCollection(self): | ||
return self.collection | ||
|
||
|
||
def closeConnections(self): | ||
if not self.client is None: | ||
self.client.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
|
||
Attention: this is just a quick hack to pick up real user requests found in the logs of the productive Solr servers | ||
|
||
principal idea (with two steps): | ||
A. readLogs5x.py | ||
- scan the list of logfiles | ||
- use only the params query part | ||
- make some transformations on this part for things we don't want or can't use | ||
(in our case: | ||
-- sru.swissbib.ch is Java based and uses the native binary protocols. These are reflected in the logs and we can't make | ||
http based requests in this test scenamrio | ||
-- filter for facet only requests (at the moment harrdcoded) | ||
|
||
- the final request we want to use for our test scenario are stored within schema-less Dcoument storage (in our case MongoDB) | ||
This makes it easy to store additional results (querytime and number of hits) even in parallel | ||
|
||
|
||
B. RunQueryies.py | ||
- baed on the stored (MongoDB) requests we can make stress tests against Solr servers (hard-coded!) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
|
||
from MongoWrapper import MongoWrapper | ||
import requests | ||
import json | ||
from StringIO import StringIO | ||
import time | ||
from datetime import datetime, timedelta | ||
|
||
|
||
class RunQueries(): | ||
def __init__(self): | ||
|
||
|
||
cTimeUTC = datetime.utcnow() | ||
nTList = [str(cTimeUTC.date()),"T",str(cTimeUTC.hour),str(cTimeUTC.minute),str(cTimeUTC.second),"Z"] | ||
self.currentTime = "".join(nTList) | ||
|
||
|
||
self.mongoWrapper = MongoWrapper() | ||
|
||
|
||
|
||
def startRunning(self): | ||
for doc in self.mongoWrapper.getCollection().find({}): | ||
try: | ||
#print query | ||
query = doc["query"] | ||
#time = doc["time"] | ||
#hits = doc["hits"] | ||
#id = doc["_id"] | ||
|
||
#result = requests.get("http://sb-s1.swissbib.unibas.ch:8080/solr/sb-biblio/select",params=query.encode("utf-8")) | ||
result = requests.get("http://sb-s20.swissbib.unibas.ch:8080/solr/sb-biblio/select",params=query.encode("utf-8")) | ||
#result = requests.get("http://search.swissbib.ch/solr/sb-biblio/select",params=query.encode("utf-8")) | ||
text = result.content | ||
io = StringIO(text) | ||
myJson = json.load(io) | ||
queryTime = myJson["responseHeader"]["QTime"] | ||
numberHits = myJson["response"]["numFound"] | ||
|
||
doc["solr5QTime" + self.currentTime] = (int)(queryTime) | ||
doc["solr5hits" + self.currentTime] = (int)(numberHits) | ||
|
||
self.mongoWrapper.getCollection().save(doc) | ||
#self.mongoWrapper.getCollection().safe(doc,safe=True) | ||
|
||
|
||
except Exception,ex: | ||
print ex | ||
|
||
|
||
|
||
|
||
if __name__ == '__main__': | ||
|
||
|
||
runner = RunQueries() | ||
runner.startRunning() | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
db.queries.find({solr5QTime: {$exists: true}}).count() | ||
|
||
|
||
db.queries.createIndex( { 'solr5QTime2016-02-10T13523Z': 1 } ) | ||
db.queries.find({'solr5QTime2016-02-16T105326Z': {$exists: true}}).count() | ||
|
||
solr5QTime2016-02-16T105326Z | ||
|
||
|
||
db.queries.find( { 'solr5QTime2016-02-16T105326Z': { $gt: 100 } } ) | ||
|
||
db.queries.find( { 'solr5QTime2016-02-16T105326Z': { $gt: 100 }} ) | ||
|
||
|
||
modifiers : {no_cursor_timeout: True} | ||
|
||
queries insgesamt: | ||
101196 | ||
kleiner 100 | ||
96000 | ||
|
||
|
||
|
||
|
||
grösser 1000 | ||
706 | ||
|
||
|
||
|
||
grösser 5000 | ||
24 | ||
|
||
grösser 10000 | ||
4 | ||
|
||
|
||
$PYTHON_SOLR/python readLogs5x.py -d/usr/local/swissbib/Staff/gh/solrProdLogs > read.logs.log 2>&1 & | ||
|
||
|
||
1) Abfragen insgesamt: 76656 | ||
less than 100 73270 | ||
> 500 511 | ||
> 1000 325 | ||
> 1500 232 | ||
> 2500 197 | ||
> 4000 30 | ||
longest 5724 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
# -*- coding: utf-8 -*- | ||
import sys | ||
#import pymongo.connection | ||
#from pymongo.connection import Connection | ||
#from bson.binary import Binary | ||
|
||
from MongoWrapper import MongoWrapper | ||
|
||
|
||
|
||
__author__ = 'swissbib' | ||
|
||
|
||
import re | ||
|
||
class ParseLogLine(): | ||
def __init__(self,shortquery,longquery): | ||
self.numberLinesWritten = 0 | ||
self.qLine = re.compile("params={(.*?)}") | ||
self.qLineSolr5 = re.compile("params={(.*?)} hits") | ||
self.qOnly = re.compile("&q=(.*?)&") | ||
self.qOnlyList = [] | ||
#self.shortqueryFile = open(shortquery,"a") | ||
#self.longqeryFile = open(longquery,"a") | ||
|
||
self.numberHits = re.compile("hits=(\d+)") | ||
|
||
# QTime=1 | ||
self.qTime = re.compile("QTime=(\d+)") | ||
|
||
#self.host = "mongodb://admin:ayKejO3k@sb-db5.swissbib.unibas.ch:29017/admin" | ||
|
||
self.mongoWrapper = MongoWrapper() | ||
|
||
|
||
self.jMeterQueries = ["%2Bthe+%2Bart+%2Bof+%2Bcomputer+%2Bprogramming", | ||
"q=Faust","event=newSearcher", | ||
"start=0&q=%2Bsublocal_B1:[*+TO+*]%0a%0a%2B(%0asubtop_swd:[*+TO+*]++OR+%0asubpers_swd", | ||
"Basler+Zeitschrift+für+Geschichte+und+Altertumskunde", | ||
"start=0&q=%2Bsublocal_BY:[*+TO+*]%0a%0a%2B(%0asubtop_idsbb", | ||
"start=0&q=%2B(%0asubtop_rero:[*+TO+*]++OR+%0asubpers_rero:[*+TO+*]", | ||
"start=0&q=%2Bsublocal_BP", | ||
"start=0&q=%2Bsublocal_BU", | ||
"start=0&q=%2Bsublocal_BW", | ||
"start=0&q=%2Bsublocal_G1", | ||
"start=0&q=%2Bsublocal_G3", | ||
"start=0&q=%2Bsublocal_G5", | ||
"start=0&q=%2Bsublocal_G7", | ||
"start=0&q=%2Bsublocal_G9", | ||
"start=0&q=%2Bsublocal_GC", | ||
"start=0&q=%2Bsublocal_GF", | ||
"file=schema.xml&contentType=text/xml", | ||
"file=admin-extra", | ||
"file=solrconfig.xml", | ||
"command=details", | ||
"numTerms=0&show=index", | ||
#"wt=json", | ||
"q=title_long:The+art+of+computer+programming&rows=2", | ||
"touchpoint/perma.do"] | ||
self.relevantQueries = ["facet=true"] | ||
|
||
|
||
def __del__(self): | ||
#sys.stdout.write("\n".join(self.qOnlyList)) | ||
#self.shortqueryFile.write("\n".join(self.qOnlyList)) | ||
#self.shortqueryFile.close() | ||
#self.longqeryFile.close() | ||
if not self.mongoWrapper is None: | ||
self.mongoWrapper.closeConnections() | ||
|
||
|
||
|
||
def evaluate(self,line): | ||
#qValue = self.qLine.search(line) | ||
qValue = self.qLineSolr5.search(line) | ||
wanted = False | ||
if qValue: | ||
query = qValue.group(1) | ||
for item in self.jMeterQueries: | ||
if query.find(item) != -1: | ||
return | ||
for item in self.relevantQueries: | ||
|
||
if query.find(item) != -1: | ||
wanted = True | ||
|
||
|
||
|
||
if wanted: | ||
q1 = query.replace("&wt=javabin","&wt=json") | ||
q2 = q1.replace("&version=2","") | ||
|
||
nHits = self.numberHits.search(line) | ||
qT = self.qTime.search(line) | ||
if nHits and qT: | ||
hits = (int)(nHits.group(1)) | ||
time = (int)(qT.group(1)) | ||
if hits > 1: | ||
|
||
self.numberLinesWritten += 1 | ||
newrecord = { | ||
"query":q2, | ||
"time":(int)(time), | ||
"hits":(int)(hits) | ||
} | ||
|
||
try: | ||
|
||
self.mongoWrapper.getCollection().insert(newrecord) | ||
except Exception as pythonBaseException: | ||
|
||
print pythonBaseException | ||
|
||
|
||
def getNumberOfWrittenQueries(self): | ||
return self.numberLinesWritten | ||
|
||
|
||
|
||
|
||
if __name__ == '__main__': | ||
|
||
import os | ||
from argparse import ArgumentParser | ||
|
||
|
||
|
||
|
||
|
||
oParser = ArgumentParser() | ||
oParser.add_argument("-d", "--dir", dest="directory") | ||
oParser.add_argument("-l", "--longqueryFile", dest="wholequeryFile", default="./longquery.txt") | ||
oParser.add_argument("-s", "--shortqueryFile", dest="queryFile", default="./shortquery.txt") | ||
|
||
|
||
|
||
args = oParser.parse_args() | ||
|
||
|
||
tDir = args.directory | ||
|
||
longqueryFile= args.wholequeryFile | ||
shortqueryFile = args.queryFile | ||
|
||
numberOfLines = 0 | ||
os.chdir(tDir) | ||
for fname in os.listdir(tDir): | ||
sys.stdout.write("".join(["\n\n","-----",fname,"-----","\n"])) | ||
iF = open (fname,"r") | ||
pLog = ParseLogLine(shortqueryFile,longqueryFile) | ||
for line in iF: | ||
pLog.evaluate(line) | ||
|
||
numberOfLines += pLog.getNumberOfWrittenQueries() | ||
|
||
sys.stdout.write("".join(["\n\n","-----","number of queries: ",str(numberOfLines),"-----","\n"])) | ||
|
||
|