Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

hash_value & hash_email methods, uses a common random secret and MD5 …

…for consistent transformation;

multiple databases in the same file;
multiple files at the command line;
example .yml for developer.mozilla.org
  • Loading branch information...
commit 67774c0c95a34551420c2b6afa2c9d9ba41d861d 1 parent 610c2d2
@lmorchard authored
Showing with 160 additions and 7 deletions.
  1. +43 −5 anonymize.py
  2. +2 −2 anonymize.yml
  3. +115 −0 developer_mozilla_org.yml
View
48 anonymize.py
@@ -1,9 +1,12 @@
#!/usr/bin/env python
# This assumes an id on each field.
import logging
+import hashlib
+import random
log = logging.getLogger('anonymize')
+common_hash_secret = "%016x" % (random.getrandbits(128))
def get_truncates(config):
@@ -31,6 +34,8 @@ def get_deletes(config):
listify = lambda x: x if isinstance(x, list) else [x]
def get_updates(config):
+ global common_hash_secret
+
database = config.get('database', {})
tables = database.get('tables', [])
sql = []
@@ -53,6 +58,14 @@ def get_updates(config):
elif operation == 'random_username':
for field in listify(details):
updates.append("%s = CONCAT('_user_', id)" % field)
+ elif operation == 'hash_value':
+ for field in listify(details):
+ updates.append("%(field)s = MD5(CONCAT(@common_hash_secret, %(field)s))"
+ % dict(field=field))
+ elif operation == 'hash_email':
+ for field in listify(details):
+ updates.append("%(field)s = CONCAT(MD5(CONCAT(@common_hash_secret, %(field)s)), '@mozilla.com')"
+ % dict(field=field))
elif operation == 'delete':
continue
else:
@@ -63,19 +76,44 @@ def get_updates(config):
def anonymize(config):
+ database = config.get('database', {})
+
+ if 'name' in database:
+ print "USE %s;" % database['name']
+
+ print "SET FOREIGN_KEY_CHECKS=0;"
+
sql = []
sql.extend(get_truncates(config))
sql.extend(get_deletes(config))
sql.extend(get_updates(config))
- print 'SET FOREIGN_KEY_CHECKS=0;'
for stmt in sql:
print stmt + ';'
- print 'SET FOREIGN_KEY_CHECKS=1;'
+ print "SET FOREIGN_KEY_CHECKS=1;"
+ print
if __name__ == '__main__':
+
import yaml
import sys
- f = sys.argv[1] if len(sys.argv) > 1 else 'anonymize.yml'
- cfg = yaml.load(open(f))
- anonymize(cfg)
+
+ if len(sys.argv) > 1:
+ files = sys.argv[1:]
+ else:
+ files = [ 'anonymize.yml' ]
+
+ for f in files:
+ print "--"
+ print "-- %s" %f
+ print "--"
+ print "SET @common_hash_secret=rand();"
+ print ""
+ cfg = yaml.load(open(f))
+ if 'databases' not in cfg:
+ anonymize(cfg)
+ else:
+ databases = cfg.get('databases')
+ for name, sub_cfg in databases.items():
+ print "USE %s;" % name
+ anonymize({'database': sub_cfg})
View
4 anonymize.yml
@@ -1,5 +1,5 @@
---- This is a sample anonymize.yml file that's used for the Firefox Add-ons
---- database.
+# This is a sample anonymize.yml file that's used for the Firefox Add-ons
+# database.
database:
truncate:
View
115 developer_mozilla_org.yml
@@ -0,0 +1,115 @@
+# Anonymization rules for MDN wiki_mdc_deki
+
+databases:
+
+ lmo_wiki_mdc_deki:
+
+ truncate:
+ - objectcache
+ - querycache
+ - requestlog
+ - requeststats
+
+ tables:
+ attachments_backup:
+ hash_value:
+ - at_user_text
+ - at_removed_by_text
+ logins:
+ random_ip:
+ - login_ip_address
+ users:
+ hash_value:
+ - user_name
+ - user_real_name
+ hash_email:
+ - user_email
+ nullify:
+ - user_password
+ - user_newpassword
+ - user_token
+ - user_external_name
+
+ lmo_developer_mozilla_org_django:
+
+ truncate:
+ - auth_message
+ - django_admin_log
+ - django_session
+ - threadedcomments_freethreadedcomment
+ - threadedcomments_testmodel
+
+ tables:
+ actioncounters_actioncounterunique:
+ random_ip:
+ - ip
+ nullify:
+ - user_agent
+ - session_key
+ auth_user:
+ hash_value:
+ - username
+ random_email:
+ - email
+ random_int:
+ - first_name
+ - last_name
+ nullify:
+ - password
+ contentflagging_contentflag:
+ random_ip:
+ - ip
+ nullify:
+ - user_agent
+ - session_key
+ user_profiles:
+ random_int:
+ - location
+ - homepage
+
+ lmo_developer_mozilla_org_phpbb:
+
+ truncate:
+ - phpbb_sessions
+ - phpbb_sessions_keys
+
+ tables:
+ phpbb_banlist:
+ nullify:
+ - ban_ip
+ phpbb_log:
+ random_ip:
+ - log_ip
+ phpbb_moderator_cache:
+ hash_value:
+ - username
+ phpbb_posts:
+ nullify:
+ - post_username
+ random_ip:
+ - poster_ip
+ phpbb_privmsgs:
+ random_ip:
+ - author_ip
+ phpbb_profile_fields_data:
+ hash_value:
+ - pf_irc_nickname
+ phpbb_topics:
+ hash_value:
+ - topic_first_poster_name
+ - topic_last_poster_name
+ phpbb_users:
+ hash_value:
+ - username
+ - username_clean
+ hash_email:
+ - user_email
+ nullify:
+ - user_ip
+ - user_password
+ - user_passchg
+ - user_email_hash
+ - user_last_confirm_key
+ - user_lastpage
+ - user_form_salt
+
Please sign in to comment.
Something went wrong with that request. Please try again.