Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

added support for composite keys as primary key

  • Loading branch information...
commit aab50f7e06f2cc0ddd38bac25b190bc80cba6a43 1 parent dd798b5
@sunitparekh authored
View
14 README.md
@@ -24,7 +24,7 @@ database 'DatabaseName' do
# User -> table name (case sensitive)
table 'User' do
# id, DateOfBirth, FirstName, LastName, UserName, Password -> table column names (case sensitive)
- primary_key 'id'
+ primary_key 'id' # composite key is also supported
anonymize 'DateOfBirth','FirstName','LastName' # uses default anonymization based on data types
anonymize('UserName').using FieldStrategy::StringTemplate.new('user#{row_number}')
anonymize('Password') { |field| "password" }
@@ -39,6 +39,11 @@ Run using:
$ ruby my_dsl.rb
+## Examples
+
+1. [Whitelist](https://github.com/sunitparekh/data-anonymization/blob/master/whitelist_dsl.rb)
+2. [Blacklist](https://github.com/sunitparekh/data-anonymization/blob/master/blacklist_dsl.rb)
+
#### Share feedback
Please use Github [issues](https://github.com/sunitparekh/data-anonymization/issues) to share feedback, feature suggestions and report issues.
@@ -374,12 +379,6 @@ database 'Chinook' do
end
```
-## Examples
-
-1. [Whitelist](https://github.com/sunitparekh/data-anonymization/blob/master/whitelist_dsl.rb)
-2. [Blacklist](https://github.com/sunitparekh/data-anonymization/blob/master/blacklist_dsl.rb)
-
-
## Logging
How do I switch off the progress bar?
@@ -402,6 +401,7 @@ DataAnon::Utils::Logging.logger.level = Logger::INFO
1. Added the progress bar using 'powerbar' gem. Which also shows the ETA for each table.
2. Added More strategies
3. Fixed default anonymization strategies for boolean and integer values
+4. Added support for composite primary key
#### 0.1.2 (August 14, 2012)
View
1  data-anonymization.gemspec
@@ -18,6 +18,7 @@ Gem::Specification.new do |gem|
gem.require_paths = ["lib"]
gem.add_dependency('activerecord', '~> 3.2.8')
+ gem.add_dependency('composite_primary_keys', '~> 5.0.8')
gem.add_dependency('activesupport', '~> 3.2.8')
gem.add_dependency('rgeo', '~> 0.3.15')
gem.add_dependency('rgeo-geojson', '~> 0.2.3')
View
1  lib/data-anonymization.rb
@@ -5,6 +5,7 @@
require "utils/random_float"
require "utils/random_string"
require "utils/geojson_parser"
+require "utils/progress_bar"
require "utils/resource"
require "core/database"
require "core/field"
View
30 lib/strategy/base.rb
@@ -16,10 +16,15 @@ def process_fields &block
self
end
- def primary_key field
- @primary_key = field
+ def primary_key *fields
+ @primary_keys = fields
end
+ def is_primary_key? field
+ @primary_keys.select { |key| field.downcase == key.downcase }.length > 0
+ end
+
+
def whitelist *fields
fields.each { |f| @fields[f.downcase] = DataAnon::Strategy::Field::Whitelist.new }
end
@@ -45,25 +50,26 @@ def self.using field_strategy
end
def dest_table
- @dest_table ||= Utils::DestinationTable.create @name, @primary_key
+ @dest_table ||= Utils::DestinationTable.create @name, @primary_keys
end
def source_table
- @source_table ||= Utils::SourceTable.create @name, @primary_key
+ @source_table ||= Utils::SourceTable.create @name, @primary_keys
end
def process
- progress_bar = PowerBar.new unless ENV['show_progress'] && ENV['show_progress'] == 'false'
logger.debug "Processing table #{@name} with fields strategies #{@fields}"
total = source_table.count
- index = 1
- progress_bar.show(:msg => "Table: #{@name}", :done => index, :total => total) if progress_bar
- source_table.find_each(:batch_size => 100) do |record|
- process_record index, record
- index += 1
- progress_bar.show(:msg => "Table: #{@name}", :done => index, :total => total) if (index % 1000 == 0) && progress_bar
+ if total > 0
+ index = 1
+ progress_bar = DataAnon::Utils::ProgressBar.new @name, total
+ source_table.all.each do |record|
+ process_record index, record
+ index += 1
+ progress_bar.show(index)
+ end
+ progress_bar.close
end
- progress_bar.close if progress_bar
end
end
View
3  lib/strategy/blacklist.rb
@@ -6,13 +6,14 @@ def process_record index, record
@fields.each do |field, strategy|
database_field_name = record.attributes.select { |k,v| k.downcase == field }.keys[0]
field_value = record.attributes[database_field_name]
- unless field_value.nil? || database_field_name.downcase == @primary_key.downcase
+ unless field_value.nil? || is_primary_key?(database_field_name)
field = DataAnon::Core::Field.new(database_field_name, field_value, index, record)
record[database_field_name] = strategy.anonymize(field)
end
end
record.save!
end
+
end
end
end
View
2  lib/strategy/field/string/select_from_database.rb
@@ -6,7 +6,7 @@ class SelectFromDatabase
include Utils::Logging
def initialize table_name, field_name
- source = Utils::SourceTable.create table_name
+ source = Utils::SourceTable.create table_name, []
@values = source.select(field_name).uniq.collect { |record| record[field_name]}
logger.debug "For field strategy #{table_name}:#{field_name} using values #{@values} "
View
6 lib/strategy/whitelist.rb
@@ -5,14 +5,16 @@ class Whitelist < DataAnon::Strategy::Base
def process_record(index, record)
dest_record_map = {}
record.attributes.each do |field_name, field_value|
- unless field_value.nil? || field_name.downcase == @primary_key.downcase
+ unless field_value.nil? || is_primary_key?(field_name)
field = DataAnon::Core::Field.new(field_name, field_value, index, record)
field_strategy = @fields[field_name.downcase] || DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies)
dest_record_map[field_name] = field_strategy.anonymize(field)
end
end
dest_record = dest_table.new dest_record_map
- dest_record[@primary_key] = record[@primary_key]
+ @primary_keys.each do |key|
+ dest_record[key] = record[key]
+ end
dest_record.save!
end
View
14 lib/utils/database.rb
@@ -1,4 +1,5 @@
require 'active_record'
+require 'composite_primary_keys'
require 'logger'
module DataAnon
@@ -23,10 +24,11 @@ class DestinationDatabase < ActiveRecord::Base
class BaseTable
- def self.create_table table_name, primary_key, database
+ def self.create_table database, table_name, primary_keys
Class.new(database) do
self.table_name = table_name
- self.primary_key = primary_key
+ self.primary_keys = primary_keys if primary_keys.length > 1
+ self.primary_key = primary_keys[0] if primary_keys.length == 1
self.mass_assignment_sanitizer = MassAssignmentIgnoreSanitizer.new(self)
end
end
@@ -35,16 +37,16 @@ def self.create_table table_name, primary_key, database
class SourceTable < BaseTable
- def self.create table_name, primary_key = nil
- create_table table_name, primary_key, SourceDatabase
+ def self.create table_name, primary_key
+ create_table SourceDatabase, table_name, primary_key
end
end
class DestinationTable < BaseTable
- def self.create table_name, primary_key = nil
- create_table table_name, primary_key, DestinationDatabase
+ def self.create table_name, primary_key
+ create_table DestinationDatabase, table_name, primary_key
end
end
View
29 lib/utils/progress_bar.rb
@@ -0,0 +1,29 @@
+module DataAnon
+ module Utils
+
+ class ProgressBar
+
+ def initialize table_name, total
+ @total = total
+ @table_name = table_name
+ @progress_bar = PowerBar.new if total > 0 && show_progress
+ end
+
+ def show_progress
+ ENV['show_progress'] != 'false'
+ end
+
+ def show index
+ if @progress_bar && ((index % 1000 == 0) || (index == @total) || (index == 1))
+ @progress_bar.show(:msg => "Table: #{@table_name} (#{index}/#{@total})", :done => index, :total => @total)
+ end
+ end
+
+ def close
+ @progress_bar.close if @progress_bar
+ end
+
+ end
+
+ end
+end
View
2  lib/version.rb
@@ -1,3 +1,3 @@
module DataAnonymization
- VERSION = "0.2.0.rc2"
+ VERSION = "0.2.0.rc3"
end
View
2  spec/acceptance/rdbms_blacklist_spec.rb
@@ -22,7 +22,7 @@
end
DataAnon::Utils::SourceDatabase.establish_connection connection_spec
- source = DataAnon::Utils::SourceTable.create 'customers', 'cust_id'
+ source = DataAnon::Utils::SourceTable.create 'customers', ['cust_id']
new_rec = source.find(CustomerSample::SAMPLE_DATA[:cust_id])
new_rec['email'].should == 'test+1@gmail.com'
View
2  spec/acceptance/rdbms_whitelist_spec.rb
@@ -32,7 +32,7 @@
end
DataAnon::Utils::DestinationDatabase.establish_connection dest_connection_spec
- dest_table = DataAnon::Utils::DestinationTable.create 'customers', 'cust_id'
+ dest_table = DataAnon::Utils::DestinationTable.create 'customers', ['cust_id']
new_rec = dest_table.find(CustomerSample::SAMPLE_DATA[:cust_id])
new_rec.first_name.should_not be("Sunit")
new_rec.last_name.should_not be("Parekh")
View
2  spec/support/customer_sample.rb
@@ -34,7 +34,7 @@ def self.create_schema connection_spec
def self.insert_record connection_spec, data_hash = SAMPLE_DATA
DataAnon::Utils::TempDatabase.establish_connection connection_spec
- source = DataAnon::Utils::BaseTable.create_table 'customers', 'cust_id',DataAnon::Utils::TempDatabase
+ source = DataAnon::Utils::BaseTable.create_table DataAnon::Utils::TempDatabase, 'customers', 'cust_id'
cust = source.new data_hash
cust.cust_id = data_hash[:cust_id]
cust.save!
View
4 spec/utils/database_spec.rb
@@ -11,13 +11,13 @@
end
it "should test the connection to source database" do
- album = DataAnon::Utils::SourceTable.create "Album", "AlbumId"
+ album = DataAnon::Utils::SourceTable.create "Album", ["AlbumId"]
album.count.should > 0
album.all.length > 0
end
it "should test the connection to destination database" do
- album = DataAnon::Utils::DestinationTable.create "Album", "AlbumId"
+ album = DataAnon::Utils::DestinationTable.create "Album", ["AlbumId"]
album.count.should == 0
album.all.length == 0
Please sign in to comment.
Something went wrong with that request. Please try again.