Permalink
Browse files

urlscanner

  • Loading branch information...
torhve committed Apr 26, 2012
1 parent 9df3e73 commit a5d62ca9954856de2fa2bf7fab600b167f24c23b
Showing with 40 additions and 0 deletions.
  1. +40 −0 logurlscanner.py
View
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+
+import sys, sqlite3, os, time
+
+import re
+octet = r'(?:2(?:[0-4]\d|5[0-5])|1\d\d|\d{1,2})'
+ipAddr = r'%s(?:\.%s){3}' % (octet, octet)
+# Base domain regex off RFC 1034 and 1738
+label = r'[0-9a-z][-0-9a-z]*[0-9a-z]?'
+domain = r'%s(?:\.%s)*\.[a-z][-0-9a-z]*[a-z]?' % (label, label)
+urlRe = re.compile(r'(\w+://(?:%s|%s)(?::\d+)?(?:/[^\])>\s]*)?)' % (domain, ipAddr), re.I)
+class urldb(object):
+
+ def __init__(self):
+ filename = os.path.join('/home/xt/.weechat', 'urlserver.sqlite3')
+ self.conn = sqlite3.connect(filename)
+ self.cursor = self.conn.cursor()
+
+ def insert(self, time, nick, buffer_name, url, message, prefix):
+ execute = self.cursor.execute("insert into urls values (NULL, ?, ?, ?, ?, ?, ?)" ,(time, nick, buffer_name, url, message, prefix))
+
+ def close(self):
+ self.conn.commit()
+ self.cursor.close()
+ self.conn.close()
+
+if __name__ == '__main__':
+ db = urldb()
+
+ log = sys.argv[1]
+ for line in file(log, 'r'):
+ splitted = line.decode('UTF-8').split('\t')
+ wtime = splitted[0]
+ nick = splitted[1]
+ message = '\t'.join(splitted[2:])
+ for url in urlRe.findall(message):
+ wtime = time.mktime(time.strptime('2009-05-12 17:38:46', '%Y-%m-%d %H:%M:%S'))
+ db.insert(wtime, nick, sys.argv[1], url, message, nick)
+
+ db.close()

0 comments on commit a5d62ca

Please sign in to comment.