Permalink
Browse files

Started on documentation

  • Loading branch information...
1 parent e0b5e9f commit e3d942583c035e816fae815f83a30385244ec8a6 Krzysztof Piatkowski committed Jun 14, 2012
View
5 .hgignore
@@ -9,5 +9,6 @@ syntax:glob
target
/target$
.DS_Store
-minlogudtraekservice/src/main/resources/schema/*.wsdl
-jmeter*
+jmeter*
+README.pdf
+*\#*
View
19 README.md
@@ -0,0 +1,19 @@
+"Minlog" service
+====================
+Den nyeste udgave at dette dokument kan findes på
+
+Installationsvejledning
+-----------------------
+
+
+Driftsvejledning
+
+Design og Arkitektur beskrivelse
+
+Guide til anvendere
+
+Guide til udviklere
+
+Test vejledning
+
+Testrapport til sammenligning
View
0 README.txt
No changes.
View
49 documentation/pom.xml
@@ -0,0 +1,49 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <artifactId>minlog</artifactId>
+ <groupId>dk.minlog</groupId>
+ <version>1.0-SNAPSHOT</version>
+ <relativePath>..</relativePath>
+ </parent>
+ <artifactId>documentation</artifactId>
+ <dependencies>
+ <dependency>
+ <groupId>org.markdownj</groupId>
+ <artifactId>markdownj</artifactId>
+ <version>0.3.0-1.0.2b4</version>
+ </dependency>
+ <dependency>
+ <groupId>org.ccil.cowan.tagsoup</groupId>
+ <artifactId>tagsoup</artifactId>
+ <version>1.2</version>
+ </dependency>
+ <dependency>
+ <groupId>org.xhtmlrenderer</groupId>
+ <artifactId>core-renderer</artifactId>
+ <version>R8</version>
+ </dependency>
+ </dependencies>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>exec-maven-plugin</artifactId>
+ <version>1.2.1</version>
+ <executions>
+ <execution>
+ <phase>test</phase>
+ <goals>
+ <goal>java</goal>
+ </goals>
+ <configuration>
+ <mainClass>DocumentCompiler </mainClass>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+</project>
View
26 documentation/src/main/java/DocumentCompiler.java
@@ -0,0 +1,26 @@
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.util.Scanner;
+
+import org.xhtmlrenderer.pdf.ITextRenderer;
+
+import com.petebevin.markdown.MarkdownProcessor;
+
+public class DocumentCompiler {
+ public static void main(String[] args) throws Exception{
+ String source = "../README.md";
+ String target = "../README.pdf";
+ String sourceFile = new Scanner( new File(source), "UTF-8").useDelimiter("\\A").next();
+
+ // Convert from markdown to html
+ MarkdownProcessor mp = new MarkdownProcessor();
+ String html = "<html><body>" + mp.markdown(sourceFile) + "</body></html>";
+
+ // Use Document to create pdf
+ ITextRenderer renderer = new ITextRenderer();
+ renderer.setDocumentFromString(html);
+ renderer.layout();
+ renderer.createPDF(new BufferedOutputStream(new FileOutputStream(new File(target))));
+ }
+}
View
1 pom.xml
@@ -10,6 +10,7 @@
<module>minlogudtraekservice</module>
<module>functional-tests</module>
<module>test-deps</module>
+ <module>documentation</module>
</modules>
<properties>
View
82 splunk/Splunk2MySQL Flytteværktøj.txt
@@ -0,0 +1,82 @@
+!! Splunk til MySQL Flytte værktøj !!
+
+Scriptet er lavet til at Mappe data fra Splunk til Skema i MySQL. Mapning er som følger:
+
+Splunk MySQL
+------------------------------------------------------------------
+PersonCivilRegistration `cprNrBorger`
+UserIdentification `bruger`
+UserIdentificationOnBehalfOf `ansvarlig`
+HealthcareProfessionalOrganization `orgUsingID
+SourceSystemIdentifier `systemName`
+Activity `handling`
+SessionId `sessionId`
+"2012....." `tidspunkt`
+
+Med følgende Input til Splunk
+
+2012-06-07T15:26:48.388Z PersonCivilRegistrationIdentifier="100000001" UserIdentifier="0101001000" UserIdentifierOnBehalfOf="0101001000" HealthcareProfessionalOrganization="SOR:12345678" SourceSystemIdentifier="System name" Activity="Sundhedsperson med cpr-nummer 0101001001 har lavet opslag p data." SessionId="urn:uuid:bcf637c1-3032-4cb3-a353-ac36faca3503"
+
+!! Installation af Splunk til MySQL Flytteværktøj !!
+
+Requirements:
+
+For at kunne køre softwaren skal følgende være installeret
+
+* Python 2.7+
+* Splunk SDK for Python - https://github.com/splunk/splunk-sdk-python
+* Python MySQL API (pymysql) - https://github.com/petehunt/PyMySQL/
+
+Opsætning:
+
+* Installer Python 2.7+
+* Hent Splunk SDK herfra: https://github.com/splunk/splunk-sdk-python
+* Installer Splunk SDK på server med
+
+ python setup.py install
+
+* Hent MySQL API fra https://github.com/petehunt/PyMySQL/
+* Installer MySQL API på serveren med
+
+ python setup.py install
+
+!! Kørsel af Værktøj !!
+
+Scriptet er beregnet på at blive kørt at udenforstående automatisk kørsel, så som cron eller deamontols.
+
+!! Filer scriptet skriver i !!
+
+STATUS_FILE="Splunk2MySQL.status" - Denne fil indeholder den seneste Index Time som scriptet er nået til at flytte data for.
+
+SPLUNK_DATA_FILE="currentData.csv" - Fil som skrivet skriver med de seneste data hentet fra Splunk og som indsættes i MySQL
+
+LOG_FILE="Splunk2MySQL.log" - Scriptets logfil - Default loglevel er INFO - men for høj debug af actions kan dette rettes til DEBUG ved at rette linjen:
+
+ Fra:
+ logging.basicConfig(filename=LOG_FILE,level=logging.INFO,format='%(asctime)s, %(funcName)s, %(levelname)s, %(message)s', datefmt='%m/%d/%Y %H:%M:%S')
+
+ Til:
+ logging.basicConfig(filename=LOG_FILE,level=logging.DEBUG,format='%(asctime)s, %(funcName)s, %(levelname)s, %(message)s', datefmt='%m/%d/%Y %H:%M:%S')
+
+
+!! Scripts opbygning !!
+
+Scriptet gør følgende:
+
+* Connecter til Splunk
+* Validere Splunk søgningen
+* Udføre søgning i Splunk udfra hvor langt man er nået - hvis det er første gang scriptet køre vil det tage op til nu minus 30 sec. Ellers hentes alt der er indexeret siden sidste kørsel til nu minus 30 sec.
+* Gemmer data i CSV fil
+* Åbner forbindelse til MySQL
+* Læser CSV fil og indsætter data i MySQL
+* Skriver den højeste index tid i STATUS_FILE
+
+Hvis nået går galt vil dette blive skrevet ud til LOG_FILE og derfor er det vigtigt at denne overvåges for ERROR entries.
+
+!! Antagelser !!
+
+* Scriptet antager at man henter data udfra tidspunktet de er indexeret på i Splunk, da man ikke kan være sikker på at datoen i logs vil være fortløbende - nogle servere kan f.eks. være offline og først aflevere senere og således er den eneste sikre ting at hente data udfra hvornår de er indexeret i Splunk. Derfor bruges denne metode. Tests af scriptet ser ud til at virke i denne hensende. Desuden er der et delay på (default) 30 sekunder der sikre at man ikke henter data op til det sekund som scriptet køre, da der kan nå at komme mere data i de sekund i Splunk skal skal sikres medtages. Derfor er det nu minus 30 sekunder (default) der hentes for i det man kalder scriptet.
+
+!! Usikkerhed !!
+
+* Med den antagelse at man henter data ud af Splunk udfra Indextime og at man dermed får alt data med over i MySQL når scriptet køre. Der er dog ikke udviklet nogen verifikation af at alle data med sikkerhed er flyttet. Det ville være muligt at forsøge validere at der indenfor tidsrammen indextime-x til indextime-y er så mange entries i Splunk og så kigge på om scriptet også har flyttet så mange entries. Dette er nuværende ikke del af scriptet.
View
250 splunk/Splunk2MySQL.py
@@ -0,0 +1,250 @@
+#!/usr/bin/env python
+#
+#
+#
+
+import sys, os
+import csv
+import time
+from time import localtime,strftime,sleep
+from datetime import datetime
+import pymysql
+import logging
+import uuid
+
+from splunklib.binding import HTTPError
+import splunklib.client as client
+
+#
+# Variables
+#
+
+# Splunk Connection Details
+SPLUNK_SERVER="localhost"
+SPLUNK_PORT="8089"
+SPLUNK_USER="admin"
+SPLUNK_PASSWORD="abekat"
+SPLUNK_SCHEMA="http" # http eller https
+
+#MySQL Connection Details
+MYSQL_SERVER="localhost"
+MYSQL_PORT=3306
+MYSQL_USER="jvl"
+MYSQL_PASSWORD="abekat"
+MYSQL_DATABASE="minlog"
+MYSQL_INSERT_STATEMENT="INSERT INTO LogEntry (regKode, cprNrBorger, bruger, ansvarlig, orgUsingID, systemName, handling, sessionid, tidspunkt) VALUES ('%s','%s','%s','%s','%s','%s','%s','%s','%s');" # The Insert statement to use for the MySQL database
+
+# Splunk Search Details
+SPLUNK_RETURN_FIELDS="_indextime, _time, PersonCivilRegistrationIdentifier, UserIdentifier, UserIdentifierOnBehalfOf, HealthcareProfessionalOrganization, SourceSystemIdentifier, Activity, SessionId" # Fields that will be selected from Splunk and returned
+SPLUNK_SEARCH_INDEX_TIME_DELAY_SEC=30 # The number og sec to go back for the _indextime search. To make sure that all events that will be indexed that second have been written
+SPLUNK_SEARCH_INDEX_TIME=int(time.time()-SPLUNK_SEARCH_INDEX_TIME_DELAY_SEC) # Calculate max _indextime
+SPLUNK_SEARCH='search index=main sourcetype=minlog (_indextime < ' + str(SPLUNK_SEARCH_INDEX_TIME) + ' AND _indextime > %s) | fields ' + SPLUNK_RETURN_FIELDS + ' | sort by _indextime asc' # Contruct the Splunk Search.
+
+# Helper files Details
+STATUS_FILE="Splunk2MySQL.status" # File with the latest _indextime moved
+SPLUNK_DATA_FILE="currentData.csv" # File to write data to when fetched from Splunk
+LOG_FILE="Splunk2MySQL.log"
+
+#
+# Setup Logging
+#
+# Logging setup to log in 24 hour format and to default from from INFO level
+#
+logging.basicConfig(filename=LOG_FILE,level=logging.INFO,format='%(asctime)s, %(funcName)s, %(levelname)s, %(message)s', datefmt='%m/%d/%Y %H:%M:%S')
+
+#
+# Get a connection to Splunk and return the service object
+#
+def connectSplunk():
+ try:
+ logging.debug('Connecting to SplunkServer=\"'+str(SPLUNK_SERVER)+'\"')
+ service = client.connect(host=SPLUNK_SERVER, port=SPLUNK_PORT, schema=SPLUNK_SCHEMA, username=SPLUNK_USER, password=SPLUNK_PASSWORD)
+ logging.debug('Returning service for SplunkServer=\"'+str(SPLUNK_SERVER)+'\"')
+ return service
+ except Exception as e:
+ logging.error('Unable to connect to SplunkServer=\"'+str(SPLUNK_SERVER)+'\" ExceptionType=\"'+str(type(e))+'\" Message=\"'+str(e)+'\"')
+
+#
+# Debug function to call to debug Splunk connection
+#
+def testSplunkConn(service):
+ content = service.info
+ for key in sorted(content.keys()):
+ value = content[key]
+ if isinstance(value, list):
+ print "%s:" % key
+ for item in value: print " %s" % item
+ else:
+ print "%s: %s" % (key, value)
+
+ print "Settings:"
+ content = service.settings.content
+ for key in sorted(content.keys()):
+ value = content[key]
+ print " %s: %s" % (key, value)
+
+#
+# Debug function to call to debug the Splunk Search configured
+#
+def testSplunkSearch(service):
+ try:
+ logging.debug('Parsing Splunk Search=\"'+str(SPLUNK_SEARCH)+'\"')
+ service.parse(SPLUNK_SEARCH, parse_only=True)
+ logging.debug('Done Parsing for SplunkSearch=\"'+str(SPLUNK_SEARCH)+'\"')
+ return 0
+ except HTTPError as e:
+ logging.error('Unable to parse SplunkSearch=\"'+str(SPLUNK_SEARCH)+'\" ExceptionType=\"'+str(type(e))+ '\" Message=\"'+str(str(e))+'\"')
+ return 1
+
+#
+# Call the Splunk search and write SPLUNK_DATA_FILE
+#
+def searchSplunk(service):
+ LAST_INDEX_TIME=readStatusFile()
+ SEARCH=SPLUNK_SEARCH %(LAST_INDEX_TIME)
+ logging.debug('Submitting Search JOB=\"'+str(SEARCH)+'\" LAST_INDEX_TIME=\"'+str(LAST_INDEX_TIME)+'\"')
+ job = service.jobs.create(SEARCH)
+ logging.debug('Done Submitting Search JOB=\"'+str(SEARCH)+'\" LAST_INDEX_TIME=\"'+str(LAST_INDEX_TIME)+'\"')
+
+ logging.debug('Checking Search JOB=\"'+str(SEARCH)+'\" LAST_INDEX_TIME=\"'+str(LAST_INDEX_TIME)+'\"')
+ while True:
+ stats = job.refresh()(
+ 'isDone',
+ 'doneProgress',
+ 'scanCount',
+ 'eventCount',
+ 'resultCount')
+ progress = float(stats['doneProgress'])*100
+ scanned = int(stats['scanCount'])
+ matched = int(stats['eventCount'])
+ results = int(stats['resultCount'])
+ if stats['isDone'] == '1':
+ break
+ sleep(2)
+ logging.debug('Done Checking Search JOB=\"'+str(SEARCH)+'\" LAST_INDEX_TIME=\"'+str(LAST_INDEX_TIME)+'\"')
+ logging.debug('Reading Results for Search JOB=\"'+str(SEARCH)+'\" LAST_INDEX_TIME=\"'+str(LAST_INDEX_TIME)+'\"')
+ results = job.results(output_mode="csv", field_list=SPLUNK_RETURN_FIELDS, count=0)
+ COUNT=job.content.resultCount
+ logging.info('Start Moving COUNT=\"'+job.content.resultCount+'\" from Splunk to MySQL')
+ logging.debug('Done Reading Results for Search JOB=\"'+str(SEARCH)+'\" LAST_INDEX_TIME=\"'+str(LAST_INDEX_TIME)+'\"')
+ logging.debug('Writing CSV for Search JOB=\"'+str(SEARCH)+'\" to CSVFILE=\"'+str(SPLUNK_DATA_FILE)+'\" LAST_INDEX_TIME=\"'+str(LAST_INDEX_TIME)+'\"')
+ csvFile = open(SPLUNK_DATA_FILE,mode='w')
+ while True:
+ content = results.read(1024)
+ if len(content) == 0: break
+ csvFile.write(content)
+ csvFile.close()
+ job.cancel()
+ logging.debug('Done Writing CSV for Search JOB=\"'+str(SEARCH)+'\" to CSVFILE=\"'+str(SPLUNK_DATA_FILE)+'\" LAST_INDEX_TIME=\"'+str(LAST_INDEX_TIME)+'\"')
+ return COUNT
+
+#
+# Insert data into MySQL and write new status to STATUS_FILE
+#
+def writeToMySQL(COUNT):
+ try:
+ logging.debug('Connecting to MySQLServer=\"'+str(MYSQL_SERVER)+'\"')
+ conn = pymysql.connect(host=MYSQL_SERVER, port=MYSQL_PORT, user=MYSQL_USER, passwd=MYSQL_PASSWORD, db=MYSQL_DATABASE, charset='utf8')
+ logging.debug('Done Connecting to MySQLServer=\"'+str(MYSQL_SERVER)+'\"')
+ cur = conn.cursor()
+
+ INDEX_TIME=readStatusFile()
+ OLD_INDEX_TIME=INDEX_TIME
+
+ logging.debug('Opening CSVFILE=\"'+str(SPLUNK_DATA_FILE)+'\" LAST_INDEX_TIME=\"'+str(INDEX_TIME)+'\"')
+ csvFile=file(SPLUNK_DATA_FILE, mode="rb")
+ logging.debug('Done Opening CSVFILE=\"'+str(SPLUNK_DATA_FILE)+'\" LAST_INDEX_TIME=\"'+str(INDEX_TIME)+'\"')
+
+ csvFile.readline() # Skip first entry in file, this is the CSV headers
+
+ logging.debug('Reading CSVFILE=\"'+str(SPLUNK_DATA_FILE)+'\" LAST_INDEX_TIME=\"'+str(INDEX_TIME)+'\"')
+ csvReader= csv.reader(csvFile)
+ for data in csvReader:
+ try:
+ _indextime, _time, PersonCivilRegistrationIdentifier, UserIdentifier, UserIdentifierOnBehalfOf, HealthcareProfessionalOrganization, SourceSystemIdentifier, Activity, SessionId = data
+ SQL_STATEMENT = MYSQL_INSERT_STATEMENT %(uuid.uuid4(),PersonCivilRegistrationIdentifier,UserIdentifier,UserIdentifierOnBehalfOf,HealthcareProfessionalOrganization,SourceSystemIdentifier,Activity,SessionId,_time)
+ INDEX_TIME=_indextime
+ logging.debug('Insert data into MySQL PersonCivilRegistrationIdentifier=\"'+str(PersonCivilRegistrationIdentifier)+'\" LAST_INDEX_TIME=\"'+str(INDEX_TIME)+'\"')
+ cur.execute(SQL_STATEMENT)
+ logging.debug('Done Insert data into MySQL PersonCivilRegistrationIdentifier=\"'+str(PersonCivilRegistrationIdentifier)+'\" LAST_INDEX_TIME=\"'+str(INDEX_TIME)+'\"')
+ except Exception as e:
+ logging.error('Unable Write to MySQLServer=\"'+str(MYSQL_SERVER)+'\" ExceptionType=\"'+str(type(e))+ '\" Message=\"'+str(str(e))+'\"')
+ logging.error('Rolling Back Database Changes')
+ conn.rollback()
+ logging.error('Done Rolling Back Database Changes')
+ logging.debug('Done Reading CSVFILE=\"'+str(SPLUNK_DATA_FILE)+'\" LAST_INDEX_TIME=\"'+str(INDEX_TIME)+'\"')
+ logging.info('Inserted data into MySQL OLD_LAST_INDEX_TIME=\"'+str(OLD_INDEX_TIME)+'\" NEW_LAST_INDEX_TIME=\"'+str(INDEX_TIME)+'\"')
+ writeStatusFile(INDEX_TIME)
+ logging.debug('Closing Connection to MySQLServer=\"'+str(MYSQL_SERVER)+'\" OLD_LAST_INDEX_TIME=\"'+str(OLD_INDEX_TIME)+'\" NEW_LAST_INDEX_TIME=\"'+str(INDEX_TIME)+'\"')
+ conn.commit()
+ logging.info('Moved COUNT=\"'+COUNT+'\" from Splunk to MySQL')
+ logging.debug('Done Connection to MySQLServer=\"'+str(MYSQL_SERVER)+'\" OLD_LAST_INDEX_TIME=\"'+str(OLD_INDEX_TIME)+'\" NEW_LAST_INDEX_TIME=\"'+str(INDEX_TIME)+'\"')
+ csvFile.close()
+
+ except Exception, e:
+ logging.error('Unable To connect to MySQLServer=\"'+str(MYSQL_SERVER)+'\" ExceptionType=\"'+str(type(e))+ '\" Message=\"'+str(str(e))+'\"')
+ sys.exit(2)
+
+ finally:
+ logging.debug('Closing Connection to MySQLServer=\"'+str(MYSQL_SERVER)+'\"')
+ cur.close()
+ conn.close()
+ logging.debug('Done Closing Connection to MySQLServer=\"'+str(MYSQL_SERVER)+'\"')
+
+#
+# Read STATUS_FILE and return LAST_INDEX_TIME
+#
+def readStatusFile():
+ # Open the Status file that tells where we are in the indexing time
+ LAST_INDEX_TIME = 0;
+ if os.path.isfile(STATUS_FILE):
+ logging.debug('LAST_INDEX_TIME_FILE=\"'+str(STATUS_FILE)+'\" Existed')
+ logging.debug('Reading LAST_INDEX_TIME_FILE=\"'+str(STATUS_FILE)+'\"')
+ try:
+ LAST_INDEX_TIME_FILE = open(STATUS_FILE,'r')
+ LAST_INDEX_TIME = long(LAST_INDEX_TIME_FILE.readline())
+
+ except IOError as e:
+ logging.error('Unable To Read LAST_INDEX_TIME_FILE=\"'+str(STATUS_FILE)+'\" ExceptionType=\"'+str(type(e))+ '\" Message=\"'+str(str(e))+'\"')
+ sys.exit(2)
+ finally:
+ logging.debug('Done Reading LAST_INDEX_TIME_FILE=\"'+str(STATUS_FILE)+'\"')
+ LAST_INDEX_TIME_FILE.close()
+ else:
+ logging.info('STATUS_FILE=\"'+str(STATUS_FILE)+'\" did not exist! Starting from zero')
+ return LAST_INDEX_TIME
+
+#
+# Write new LAST_INDEX_TIME to STATUS_FILE
+#
+def writeStatusFile(LAST_INDEX_TIME):
+ if LAST_INDEX_TIME > 0:
+ try:
+ logging.debug('Opening LAST_INDEX_TIME_FILE=\"'+str(STATUS_FILE)+'\" for writing and reading OLD_LAST_INDEX_TIME')
+ OLD_LAST_INDEX_TIME=readStatusFile()
+ LAST_INDEX_TIME_FILE = open(STATUS_FILE,'w')
+ logging.debug('Writing to LAST_INDEX_TIME_FILE=\"'+str(STATUS_FILE)+'\" OLD_LAST_INDEX_TIME=\"'+str(OLD_LAST_INDEX_TIME)+'\" NEW_LAST_INDEX_TIME=\"'+str(LAST_INDEX_TIME)+'\"')
+ LAST_INDEX_TIME_FILE.write(str(LAST_INDEX_TIME))
+ logging.debug('Done Writing to LAST_INDEX_TIME_FILE=\"'+str(STATUS_FILE)+'\" OLD_LAST_INDEX_TIME=\"'+str(OLD_LAST_INDEX_TIME)+'\" NEW_LAST_INDEX_TIME=\"'+str(LAST_INDEX_TIME)+'\"')
+ LAST_INDEX_TIME_FILE.close()
+ logging.debug('Done Opening LAST_INDEX_TIME_FILE=\"'+str(STATUS_FILE)+'\" for writing and reading OLD_LAST_INDEX_TIME')
+ except IOError as e:
+ logging.error('Error writing LAST_INDEX_TIME=\"' + str(LAST_INDEX_TIME) + '\" to LAST_INDEX_TIME_FILE=\"' + LAST_INDEX_TIME_FILE + '\"')
+ sys.exit(2)
+#
+# Define Main Flow
+#
+def main(argv):
+ try:
+ logging.info('Starting Run at TIME=\"'+str(datetime.now().isoformat(' '))+'\"')
+ service = connectSplunk()
+ testSplunkSearch(service)
+ COUNT = searchSplunk(service)
+ writeToMySQL(COUNT)
+ except Exception as e:
+ logging.debug('Exception under Run - ExceptionType=\"'+str(type(e))+ '\" Message=\"'+str(str(e))+'\"')
+ finally:
+ logging.info('Finished Run at TIME=\"'+str(datetime.now().isoformat(' '))+'\"')
+
+if __name__ == "__main__":
+ main(sys.argv[1:])

0 comments on commit e3d9425

Please sign in to comment.