# Differences between source, shadow, and new tables
## (e.g. data integrity issue/foreign key missing errors)

In [1]:
import datajoint as dj

ratinfo        = dj.create_virtual_module('ratinfo', 'bl_ratinfo')
bdata          = dj.create_virtual_module('bdata', 'bl_bdata')

shadow_lab     = dj.create_virtual_module('shadow_lab', 'bl_shadow_lab')
shadow_subject = dj.create_virtual_module('shadow_subject', 'bl_shadow_subject')
shadow_action  = dj.create_virtual_module('shadow_action', 'bl_shadow_action')

new_lab        = dj.create_virtual_module('new_lab', 'bl_new_lab')
new_subject    = dj.create_virtual_module('new_subject', 'bl_new_subject')
new_action     = dj.create_virtual_module('new_action', 'bl_new_action')

Connecting kg7524@datajoint01.pni.princeton.edu:3306


---
## subject.Rats: source table - shadow table

In [211]:
ratinfo.Rats.proj(rats_old_id='internalID') - shadow_subject.Rats

rats_old_id
349
361
379
152
153
1043
1700
184
185
1658


In [213]:
print('\nThe following ratinfo.Rats `experimenters` are not in ratinfo.Contacts, except `Alex` who has two entries in ratinfo.Contacts.')

rats_missing_experimenter = (ratinfo.Rats.proj('experimenter') - shadow_subject.Rats.proj(internalID='rats_old_id')).fetch('experimenter')

print('\nNumber of entries:', len(rats_missing_experimenter))

print('\nList of experimenters:', set(rats_missing_experimenter))


The following ratinfo.Rats `experimenters` are not in ratinfo.Contacts, except `Alex` who has two entries in ratinfo.Contacts.

Number of entries: 10

List of experimenters: {'', 'AnimalFacility', 'Elyssa', 'Alex', 'experimenter', 'Jovanna/Min'}


## subject.Rats: shadow table - new table

In [214]:
shadow_subject.Rats.proj() - new_subject.Rats

rats_old_id


In [3]:
shadow_subject.Rats.Contact.proj() - new_subject.Rats.Contact

contact  PUIDs of the lab member(s) responsible for the rat,"ratname  Unique rat name, 1 letter 3 numbers"
,


---
## subject.RatHistory: source table - shadow table

In [4]:
ratinfo.RatHistory.proj(rathistory_old_id='internalID') - shadow_subject.RatHistory


rathistory_old_id
6363
6364
6365
6366
5281
862
4929
8816
2139


In [5]:
print('\nThe following ratinfo.RatHistory `experimenters` are not in ratinfo.Contacts')

rathistory_missing_experimenter = (ratinfo.RatHistory.proj('experimenter') - shadow_subject.RatHistory.proj(internalID='rathistory_old_id')).fetch('experimenter')

print('\nNumber of entries:', len(rathistory_missing_experimenter))

print('\nList of experimenters:', set(rathistory_missing_experimenter))


The following ratinfo.RatHistory `experimenters` are not in ratinfo.Contacts

Number of entries: 9

List of experimenters: {'B186', 'J226', 'experimenter', 'Ben, Christine'}


## subject.RatHistory: shadow table - new table

In [21]:
shadow_subject.RatHistory.proj() - new_subject.RatHistory

rathistory_old_id
7
8
21
23
148
171
211
512
574
612


In [44]:
rathistory_new_missing = (shadow_subject.RatHistory & (shadow_subject.RatHistory.proj() - new_subject.RatHistory)).proj('ratname', 'logtime')

print('\nUnique number of `ratname` missing in ratinfo.Rats: ', len(set((rathistory_new_missing - (dj.U('ratname') & ratinfo.Rats)).fetch('ratname'))))

print('\n`ratname` missing in ratinfo.Rats: ', set((rathistory_new_missing - (dj.U('ratname') & ratinfo.Rats)).fetch('ratname')))

count=0
for i in rathistory_new_missing:
    if len(shadow_subject.RatHistory & {'ratname':i['ratname'], 'logtime':i['logtime']})>1:
        count+=1

print('\nNumber of new primary keys (ratname and logtime) with duplicate entries:', count)


Unique number of `ratname` missing in ratinfo.Rats:  29

`ratname` missing in ratinfo.Rats:  {'M084', '266', 'J288', 'F100', 'Z130', '0452', 'F133', 'U001', 'A171', 'J231', 'Z276', '0454', 'F132', 'J289', '0455', '0451', '229', '0450', '261', '0449', '0453', '0447', 'A172', '263', 'H054', 'H053', '0448', '0446', 'F084'}

Number of new primary keys (ratname and logtime) with duplicate entries: 1798


In [45]:
shadow_subject.RatHistory.Contact.proj() - new_subject.RatHistory.Contact

contact  PUIDs of the lab member(s) responsible for the rat,"ratname  Unique rat name, 1 letter 3 numbers"
,H125
,M102
,T232
aakrami,229
aakrami,A141
aakrami,A142
aakrami,B099
aakrami,B198
aakrami,B199
aakrami,B200


In [None]:
# Should the primary key be changed to contact & ratname?

---
## lab.Contacts: source table - shadow table

In [18]:
ratinfo.Contacts.proj(contacts_old_id='contactid') - shadow_lab.Contacts


contacts_old_id
53
59
97


Issue addressed.  Duplicate emails for contactid = 59 and 94; 53 and 71; 12 and 97

## lab.Contacts: shadow table - new table

In [19]:
shadow_lab.Contacts.proj() - new_lab.Contacts


contacts_old_id


---
## lab.RigMaintenance: source table - shadow table


In [23]:
ratinfo.RigMaintenance.proj(rig_maintenance_id='maintenance_id') - shadow_lab.RigMaintenance

rig_maintenance_id


## lab.RigMaintenance: shadow table - new table


In [21]:
shadow_lab.RigMaintenance.proj() - new_lab.RigMaintenance

rig_maintenance_id
733
734
774
808
810
811
829
831
832
837


In [82]:
rigmaintenance_missing = (ratinfo.RigMaintenance & (shadow_lab.RigMaintenance.proj(maintenance_id='rig_maintenance_id') - new_lab.RigMaintenance.proj(maintenance_id='rig_maintenance_id'))).fetch('rigid')

print('\nNumber of entries in ratinfo.RigMaintenance:', rigmaintenance_missing.shape[0])

print('\nNumber of missing rigids in ratinfo.Riginfo:', len(set(rigmaintenance_missing)))

print('\nMissing rigids in ratinfo.Riginfo:', set(rigmaintenance_missing))


Number of entries in ratinfo.RigMaintenance: 197

Number of missing rigids in ratinfo.Riginfo: 31

Missing rigids in ratinfo.Riginfo: {303, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 325, 326, 327, 328, 329, 330, 331, 332, 333, 335, 336}


---
## lab.Rigflush: source table - shadow table

In [25]:
ratinfo.Rigflush.proj(rigflush_old_id='id') - shadow_lab.Rigflush

rigflush_old_id


## lab.Rigflush: shadow table - new table

In [26]:
shadow_lab.Rigflush.proj() - new_lab.Rigflush

rigflush_old_id


---
## lab.Rigfood: source table - shadow table

In [86]:
ratinfo.Rigfood.proj(rigfood_id='rigfoodid') - shadow_lab.Rigfood

rigfood_id


## lab.Rigfood: shadow table - new table

In [87]:
shadow_lab.Rigfood.proj() - new_lab.Rigfood

rigfood_id
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164


In [84]:
rigfood_missing = (ratinfo.Rigfood & (shadow_lab.Rigfood.proj(rigfoodid='rigfood_id') - new_lab.Rigfood.proj(rigfoodid='rigfood_id'))).fetch('rigid')

print('\nNumber of entries in lab.Rigfood:', rigfood_missing.shape[0])

print('\nNumber of missing `rigids` in ratinfo.Riginfo:', len(set(rigfood_missing)))

print('\nMissing `rigids` in ratinfo.Riginfo:', set(rigfood_missing))


Number of entries in lab.Rigfood: 41

Number of missing `rigids` in ratinfo.Riginfo: 1

Missing `rigids` in ratinfo.Riginfo: {255}


---
## lab.Riginfo: source table - shadow table

In [31]:
ratinfo.Riginfo.proj() - shadow_lab.Riginfo

rigid


## lab.Riginfo: shadow table - new table

In [85]:
shadow_lab.Riginfo.proj() - new_lab.Riginfo

rigid


---
## lab.TrainingRoom: source table - shadow table


In [34]:
ratinfo.TrainingRoom.proj() - shadow_lab.TrainingRoom

tower


## lab.TrainingRoom: shadow table - new table

In [35]:
shadow_lab.TrainingRoom.proj() - new_lab.TrainingRoom

tower


---
## action.CalibrationInfoTbl: source table - shadow table

In [36]:
bdata.CalibrationInfoTbl.proj(calibration_info_tbl_id='calibrationid') - shadow_action.CalibrationInfoTbl

calibration_info_tbl_id
52913
52914
52915
52916
52919
52920
52921
52922
53095
76687


In [165]:
calibration_missing = bdata.CalibrationInfoTbl - shadow_action.CalibrationInfoTbl.proj(calibrationid='calibration_info_tbl_id')

print('\nIncorrect dateval:\n', calibration_missing & 'dateval = "0000-00-00 00:00:00"')

print('\nIncorrect rig_id:\n', calibration_missing & 'dateval != "0000-00-00 00:00:00"')


Incorrect dateval:
 rig_id     initials     dateval        valve          timeval     dispense     isvalid     *calibrationid target     validity    
+--------+ +----------+ +------------+ +------------+ +---------+ +----------+ +---------+ +------------+ +--------+ +----------+
401        ED           0000-00-00 00: left1water     0.15        2.32         0           76687          HIGH                   
 (Total: 1)


Incorrect rig_id:
 rig_id     initials     dateval        valve          timeval     dispense     isvalid     *calibrationid target     validity    
+--------+ +----------+ +------------+ +------------+ +---------+ +----------+ +---------+ +------------+ +--------+ +----------+
          RE           2011-09-10 12: left1water     0.12        21.0         1           52913          LOW        PERM        
          RE           2011-09-10 12: right1water    0.15        21.0         1           52914          LOW        PERM        
          RE           2011-09-10 1

## action.CalibrationInfoTbl: shadow table - new table


In [173]:
shadow_action.CalibrationInfoTbl.proj() - new_action.CalibrationInfoTbl

calibration_info_tbl_id
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726


In [172]:
calibration_missing_rigid = shadow_action.CalibrationInfoTbl - new_action.CalibrationInfoTbl.proj()

print('\nMissing rigid in ratinfo.Riginfo:', set(calibration_missing_rigid.fetch('rigid')))


Missing rigid in ratinfo.Riginfo {44, 999}


---
## action.Mass: source table - shadow table

In [39]:
ratinfo.Mass.proj(mass_id='weighing') - shadow_action.Mass

mass_id
722475
270029
176125
27
24
33
32
25
28
26


In [121]:
mass_missing = ratinfo.Mass - shadow_action.Mass.proj(weighing='mass_id')

mass_missing_tech = (mass_missing & 'date != "0000-00-00"' & 'timeval != "0:00:00"').fetch('tech')

print('\nNumber of `tech` initials with either zero or multiple entries in ratinfo.Contacts:', len(mass_missing_tech))

print('\n`tech` initials with either zero or multiple entries in ratinfo.Contacts:', set(mass_missing_tech))

print('\nNumber of entries with `date = 0000-00-00` :', len((mass_missing & 'date = "0000-00-00"').proj()))

print('\nNumber of entries with `timeval = 0:00:00` :', len((mass_missing & 'timeval = "0:00:00"').proj()))


Number of `tech` initials with either zero or multiple entries in ratinfo.Contacts: 358192

`tech` initials with either zero or multiple entries in ratinfo.Contacts: {'EJD', 'RL', 'TZL', 'TB', 'AB', 'SB', 'FW', 'TJ', 'TH', 'SS', 'AS', 'JW', 'MP'}

Number of entries with `date = 0000-00-00` : 3

Number of entries with `timeval = 0:00:00` : 1872


## action.Mass: shadow table - new table

In [15]:
shadow_action.Mass.proj() - new_action.Mass

mass_id
778
864
982
1019
1103
1140
1220
1257
1278
1317


In [19]:
mass_new_missing = shadow_action.Mass - new_action.Mass.proj()

print('\nNumber of `ratname` missing in bl_new_action.Mass: ', len(set(mass_new_missing.fetch('ratname'))))

print('\n`ratname` present in ratinfo.Rats: ', set((mass_new_missing & (dj.U('ratname') & ratinfo.Rats)).fetch('ratname')))

print('\n`ratname` missing in ratinfo.Rats: ', set((mass_new_missing - (dj.U('ratname') & ratinfo.Rats)).fetch('ratname')))

print('\n`ratname` present in bl_new_subject.Rats: ', (mass_new_missing & (dj.U('ratname') & new_subject.Rats)).fetch('ratname'))


Number of `ratname` missing in bl_new_action.Mass:  27

`ratname` present in ratinfo.Rats:  {'T075', 'J090', 'Z004', 'Z089', 'J096', 'Z003', 'J089'}

`ratname` missing in ratinfo.Rats:  {'M084', 'E057', 'J288', 'F100', 'U011', 'M021', 'U001', 'A171', 'TANK', 'J289', 'A172', 'H054', 'H053', 'E068', 'E055', 'J148', 'B518', 'F084', 'B11', '   0'}

`ratname` present in bl_new_subject.Rats:  []


---
## action.Rigwater: source table - shadow table

In [43]:
ratinfo.Rigwater.proj(rigwater_id='id') - shadow_action.Rigwater



rigwater_id
1098
1100
1102
1104
1106
1108
1110
1112
1114
1116


In [101]:
rigwater_missing_date = ratinfo.Rigwater - shadow_action.Rigwater.proj(id='rigwater_id')

print('\nNumber of entries in ratinfo.Rigwater: ', len(rigwater_missing_date.fetch('dateval')))

print('\nError with date:', set(rigwater_missing_date.fetch('dateval')))


Number of entries in ratinfo.Rigwater:  27

Error with date: {'0000-00-00', '2017-05-00', '2017-01-00', '2016-12-00'}


## action.Rigwater: shadow table - new table

In [12]:
shadow_action.Rigwater.proj() - new_action.Rigwater


rigwater_id
1
2
166
4453
4456
4556
4557
4558
4559
4560


In [13]:
rigwater_missing_ratname = ratinfo.Rigwater & (shadow_action.Rigwater.proj(id='rigwater_id') - new_action.Rigwater.proj(id='rigwater_id'))

print('\nNumber of entries in ratinfo.Rigwater: ', len(rigwater_missing_ratname.fetch('ratname')))

print('\nMissing ratname in ratinfo.Rats: ', set(rigwater_missing_ratname.fetch('ratname')))


Number of entries in ratinfo.Rigwater:  59

Missing ratname in ratinfo.Rats:  {'X000', 'Z999', 'BROK', 'F100', 'F084', 'M000', '100', 'ratn', 'F066', 'K999'}


---
## action.Schedule: source table - shadow table


In [10]:
ratinfo.Schedule.proj(schedule_id='schedentryid') - shadow_action.Schedule

schedule_id
136150
136151
136152
136153
136154
220996
221001
221063


In [94]:
schedule_missing = ratinfo.Schedule - shadow_action.Schedule.proj(schedentryid='schedule_id')

print('Error with date:', set(schedule_missing.fetch('date')))

Error with date: {'0000-00-00'}


## action.Schedule: shadow table - new table

In [11]:
shadow_action.Schedule.proj() - new_action.Schedule

schedule_id


---
## action.Surgery: source table - shadow table

In [59]:
ratinfo.Surgery.proj(surgery_old_id='id') - shadow_action.Surgery

surgery_old_id
1
2
56
115
117
119
120
163
192
229


In [182]:
surgery_missing = ratinfo.Surgery - shadow_action.Surgery.proj(id='surgery_old_id')

print('\nIncorrect date: ', set(surgery_missing.fetch('date')))


Incorrect date:  {'0000-00-00'}


## action.Surgery: shadow table - new table

In [6]:
shadow_action.Surgery.proj() - new_action.Surgery

surgery_old_id
53
121
122
159
160
179
263


In [7]:
print('\nDuplicate primary key for new table (ratname, surgery_date, surgery_starttime): W055, W009')

surgery_missing_ratname = (shadow_action.Surgery & (shadow_action.Surgery.proj() - new_action.Surgery) & 'ratname != "W009"' & 'ratname != "W055"').fetch('ratname')

print('\n`ratname` not in ratinfo.Rats:', surgery_missing_ratname)


Duplicate primary key for new table (ratname, surgery_date, surgery_starttime): W055, W009

`ratname` not in ratinfo.Rats: ['T09' 'Z58' 'Z60' 'A280']


---
## action.TechSchedule: source table - shadow table


In [60]:
ratinfo.TechSchedule.proj() - shadow_action.TechSchedule


scheduleid
810


In [95]:
techschedule_missing = ratinfo.TechSchedule & (ratinfo.TechSchedule.proj() - shadow_action.TechSchedule)

print('Error with date:\n', techschedule_missing)

Error with date:
 date           day     overnight     *scheduleid    morning     evening    
+------------+ +-----+ +-----------+ +------------+ +---------+ +---------+
0000-00-00             Adrian        810            Klaus       Jovanna    
 (Total: 1)



## action.TechSchedule: shadow table - new table

In [63]:
shadow_action.TechSchedule.proj() - new_action.TechSchedule



scheduleid


---
## action.Technotes: source table - shadow table

In [65]:
ratinfo.Technotes.proj(technote_id='technoteid') - shadow_action.Technotes


technote_id


## action.Technotes: shadow table - new table

In [66]:
shadow_action.Technotes.proj() - new_action.Technotes


technote_id


---
## action.Water: source table - shadow table

In [4]:
ratinfo.Water.proj(water_id='watering') - shadow_action.Water

water_id
455474
455475
455476
455477
455479


In [71]:
water_missing = ratinfo.Water - shadow_action.Water.proj(watering='water_id')
print('Error with date:', set(water_missing.fetch('date')))


Error with date: {'0000-00-00'}


## action.Water: shadow table - new table

In [2]:
shadow_action.Water.proj() - new_action.Water

water_id
3384
3516
3517
4281
4282
4403
4404
4503
4504
4603


In [13]:
water_missing = shadow_action.Water - new_action.Water.proj()

print('\nNumber of `ratname` missing in bl_new_action.Water: ', len(set(water_missing.fetch('ratname'))))

print('\n`ratname` present in ratinfo.Rats: ', set((water_missing & (dj.U('ratname') & ratinfo.Rats)).fetch('ratname')))

print('\n`ratname` missing in ratinfo.Rats: ', set((water_missing - (dj.U('ratname') & ratinfo.Rats)).fetch('ratname')))

print('\n`ratname` present in bl_new_subject.Rats: ', (water_missing & (dj.U('ratname') & new_subject.Rats)).fetch('ratname'))


Number of `ratname` missing in bl_new_action.Water:  50

`ratname` present in ratinfo.Rats:  {'T075', 'J090', 'Z004', 'Z089', 'J096', 'Z003', 'J089'}

`ratname` missing in ratinfo.Rats:  {'M084', 'F084', '`', 'U005', 'E057', 'J288', 'F100', 'S191', 'U011', 'Z130', 'M021', 'F133', 'U001', 'BROK', 'A171', '0058', 'U012', 'U006', 'U019', 'TANK', 'U030', 'F132', 'CO64', 'J289', '0014', 'U018', 'U029', 'U010', 'U015', 'U032', 'A172', 'H054', 'B010', 'H053', 'E068', 'U016', 'U021', 'E055', 'J148', 'U034', '0048', 'U017', 'U009'}

`ratname` present in bl_new_subject.Rats:  []
