Skip to content

Commit

Permalink
added support for composite keys as primary key
Browse files Browse the repository at this point in the history
  • Loading branch information
sunitparekh committed Aug 17, 2012
1 parent dd798b5 commit aab50f7
Show file tree
Hide file tree
Showing 14 changed files with 77 additions and 35 deletions.
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ database 'DatabaseName' do
# User -> table name (case sensitive)
table 'User' do
# id, DateOfBirth, FirstName, LastName, UserName, Password -> table column names (case sensitive)
primary_key 'id'
primary_key 'id' # composite key is also supported
anonymize 'DateOfBirth','FirstName','LastName' # uses default anonymization based on data types
anonymize('UserName').using FieldStrategy::StringTemplate.new('user#{row_number}')
anonymize('Password') { |field| "password" }
Expand All @@ -39,6 +39,11 @@ Run using:

$ ruby my_dsl.rb

## Examples

1. [Whitelist](https://github.com/sunitparekh/data-anonymization/blob/master/whitelist_dsl.rb)
2. [Blacklist](https://github.com/sunitparekh/data-anonymization/blob/master/blacklist_dsl.rb)

#### Share feedback
Please use Github [issues](https://github.com/sunitparekh/data-anonymization/issues) to share feedback, feature suggestions and report issues.

Expand Down Expand Up @@ -374,12 +379,6 @@ database 'Chinook' do
end
```

## Examples

1. [Whitelist](https://github.com/sunitparekh/data-anonymization/blob/master/whitelist_dsl.rb)
2. [Blacklist](https://github.com/sunitparekh/data-anonymization/blob/master/blacklist_dsl.rb)


## Logging

How do I switch off the progress bar?
Expand All @@ -402,6 +401,7 @@ DataAnon::Utils::Logging.logger.level = Logger::INFO
1. Added the progress bar using 'powerbar' gem. Which also shows the ETA for each table.
2. Added More strategies
3. Fixed default anonymization strategies for boolean and integer values
4. Added support for composite primary key

#### 0.1.2 (August 14, 2012)

Expand Down
1 change: 1 addition & 0 deletions data-anonymization.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Gem::Specification.new do |gem|
gem.require_paths = ["lib"]

gem.add_dependency('activerecord', '~> 3.2.8')
gem.add_dependency('composite_primary_keys', '~> 5.0.8')
gem.add_dependency('activesupport', '~> 3.2.8')
gem.add_dependency('rgeo', '~> 0.3.15')
gem.add_dependency('rgeo-geojson', '~> 0.2.3')
Expand Down
1 change: 1 addition & 0 deletions lib/data-anonymization.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
require "utils/random_float"
require "utils/random_string"
require "utils/geojson_parser"
require "utils/progress_bar"
require "utils/resource"
require "core/database"
require "core/field"
Expand Down
30 changes: 18 additions & 12 deletions lib/strategy/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,15 @@ def process_fields &block
self
end

def primary_key field
@primary_key = field
def primary_key *fields
@primary_keys = fields
end

def is_primary_key? field
@primary_keys.select { |key| field.downcase == key.downcase }.length > 0
end


def whitelist *fields
fields.each { |f| @fields[f.downcase] = DataAnon::Strategy::Field::Whitelist.new }
end
Expand All @@ -45,25 +50,26 @@ def self.using field_strategy
end

def dest_table
@dest_table ||= Utils::DestinationTable.create @name, @primary_key
@dest_table ||= Utils::DestinationTable.create @name, @primary_keys
end

def source_table
@source_table ||= Utils::SourceTable.create @name, @primary_key
@source_table ||= Utils::SourceTable.create @name, @primary_keys
end

def process
progress_bar = PowerBar.new unless ENV['show_progress'] && ENV['show_progress'] == 'false'
logger.debug "Processing table #{@name} with fields strategies #{@fields}"
total = source_table.count
index = 1
progress_bar.show(:msg => "Table: #{@name}", :done => index, :total => total) if progress_bar
source_table.find_each(:batch_size => 100) do |record|
process_record index, record
index += 1
progress_bar.show(:msg => "Table: #{@name}", :done => index, :total => total) if (index % 1000 == 0) && progress_bar
if total > 0
index = 1
progress_bar = DataAnon::Utils::ProgressBar.new @name, total
source_table.all.each do |record|
process_record index, record
index += 1
progress_bar.show(index)
end
progress_bar.close
end
progress_bar.close if progress_bar
end

end
Expand Down
3 changes: 2 additions & 1 deletion lib/strategy/blacklist.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@ def process_record index, record
@fields.each do |field, strategy|
database_field_name = record.attributes.select { |k,v| k.downcase == field }.keys[0]
field_value = record.attributes[database_field_name]
unless field_value.nil? || database_field_name.downcase == @primary_key.downcase
unless field_value.nil? || is_primary_key?(database_field_name)
field = DataAnon::Core::Field.new(database_field_name, field_value, index, record)
record[database_field_name] = strategy.anonymize(field)
end
end
record.save!
end

end
end
end
2 changes: 1 addition & 1 deletion lib/strategy/field/string/select_from_database.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ class SelectFromDatabase
include Utils::Logging

def initialize table_name, field_name
source = Utils::SourceTable.create table_name
source = Utils::SourceTable.create table_name, []
@values = source.select(field_name).uniq.collect { |record| record[field_name]}
logger.debug "For field strategy #{table_name}:#{field_name} using values #{@values} "

Expand Down
6 changes: 4 additions & 2 deletions lib/strategy/whitelist.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@ class Whitelist < DataAnon::Strategy::Base
def process_record(index, record)
dest_record_map = {}
record.attributes.each do |field_name, field_value|
unless field_value.nil? || field_name.downcase == @primary_key.downcase
unless field_value.nil? || is_primary_key?(field_name)
field = DataAnon::Core::Field.new(field_name, field_value, index, record)
field_strategy = @fields[field_name.downcase] || DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies)
dest_record_map[field_name] = field_strategy.anonymize(field)
end
end
dest_record = dest_table.new dest_record_map
dest_record[@primary_key] = record[@primary_key]
@primary_keys.each do |key|
dest_record[key] = record[key]
end
dest_record.save!
end

Expand Down
14 changes: 8 additions & 6 deletions lib/utils/database.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
require 'active_record'
require 'composite_primary_keys'
require 'logger'

module DataAnon
Expand All @@ -23,10 +24,11 @@ class DestinationDatabase < ActiveRecord::Base

class BaseTable

def self.create_table table_name, primary_key, database
def self.create_table database, table_name, primary_keys
Class.new(database) do
self.table_name = table_name
self.primary_key = primary_key
self.primary_keys = primary_keys if primary_keys.length > 1
self.primary_key = primary_keys[0] if primary_keys.length == 1
self.mass_assignment_sanitizer = MassAssignmentIgnoreSanitizer.new(self)
end
end
Expand All @@ -35,16 +37,16 @@ def self.create_table table_name, primary_key, database

class SourceTable < BaseTable

def self.create table_name, primary_key = nil
create_table table_name, primary_key, SourceDatabase
def self.create table_name, primary_key
create_table SourceDatabase, table_name, primary_key
end

end

class DestinationTable < BaseTable

def self.create table_name, primary_key = nil
create_table table_name, primary_key, DestinationDatabase
def self.create table_name, primary_key
create_table DestinationDatabase, table_name, primary_key
end

end
Expand Down
29 changes: 29 additions & 0 deletions lib/utils/progress_bar.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
module DataAnon
module Utils

class ProgressBar

def initialize table_name, total
@total = total
@table_name = table_name
@progress_bar = PowerBar.new if total > 0 && show_progress
end

def show_progress
ENV['show_progress'] != 'false'
end

def show index
if @progress_bar && ((index % 1000 == 0) || (index == @total) || (index == 1))
@progress_bar.show(:msg => "Table: #{@table_name} (#{index}/#{@total})", :done => index, :total => @total)
end
end

def close
@progress_bar.close if @progress_bar
end

end

end
end
2 changes: 1 addition & 1 deletion lib/version.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module DataAnonymization
VERSION = "0.2.0.rc2"
VERSION = "0.2.0.rc3"
end
2 changes: 1 addition & 1 deletion spec/acceptance/rdbms_blacklist_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
end

DataAnon::Utils::SourceDatabase.establish_connection connection_spec
source = DataAnon::Utils::SourceTable.create 'customers', 'cust_id'
source = DataAnon::Utils::SourceTable.create 'customers', ['cust_id']
new_rec = source.find(CustomerSample::SAMPLE_DATA[:cust_id])
new_rec['email'].should == 'test+1@gmail.com'

Expand Down
2 changes: 1 addition & 1 deletion spec/acceptance/rdbms_whitelist_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
end

DataAnon::Utils::DestinationDatabase.establish_connection dest_connection_spec
dest_table = DataAnon::Utils::DestinationTable.create 'customers', 'cust_id'
dest_table = DataAnon::Utils::DestinationTable.create 'customers', ['cust_id']
new_rec = dest_table.find(CustomerSample::SAMPLE_DATA[:cust_id])
new_rec.first_name.should_not be("Sunit")
new_rec.last_name.should_not be("Parekh")
Expand Down
2 changes: 1 addition & 1 deletion spec/support/customer_sample.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def self.create_schema connection_spec

def self.insert_record connection_spec, data_hash = SAMPLE_DATA
DataAnon::Utils::TempDatabase.establish_connection connection_spec
source = DataAnon::Utils::BaseTable.create_table 'customers', 'cust_id',DataAnon::Utils::TempDatabase
source = DataAnon::Utils::BaseTable.create_table DataAnon::Utils::TempDatabase, 'customers', 'cust_id'
cust = source.new data_hash
cust.cust_id = data_hash[:cust_id]
cust.save!
Expand Down
4 changes: 2 additions & 2 deletions spec/utils/database_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@
end

it "should test the connection to source database" do
album = DataAnon::Utils::SourceTable.create "Album", "AlbumId"
album = DataAnon::Utils::SourceTable.create "Album", ["AlbumId"]
album.count.should > 0
album.all.length > 0
end

it "should test the connection to destination database" do
album = DataAnon::Utils::DestinationTable.create "Album", "AlbumId"
album = DataAnon::Utils::DestinationTable.create "Album", ["AlbumId"]
album.count.should == 0
album.all.length == 0

Expand Down

0 comments on commit aab50f7

Please sign in to comment.