# Differences between source, shadow, and new tables
### (e.g. data integrity issue/foreign key missing errors)

In [141]:
import datajoint as dj
import pandas as pd
import pygsheets
from django.core.serializers.json import DjangoJSONEncoder
import json
from oauth2client.service_account import ServiceAccountCredentials

dj.config['database.host'] = 'datajoint01.pni.princeton.edu'

SCOPES = ('https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive')
gc = pygsheets.authorize(service_file='u19datajoint-b121ee91f642.json')
spreadsheet_key = '1aS1M5RwhlYqoAko8aiIcbahL6aBAWe5RWX0_NUlvtRc'
spreadsheet_key2 = '1uijlaAoYUtRwweJ4R0A4YrEl9101rpqXKpAAOW-ZKoM'
spreadsheet_key3 = '1xIen1A4jjp9KA3BBVnQXH3qc-R0tY9we1EzAzMtTDxs'

### Create environments and open spreadsheet

In [184]:
sh = gc.open_by_key(spreadsheet_key)
sh2 = gc.open_by_key(spreadsheet_key2)
sh3 = gc.open_by_key(spreadsheet_key3)

ratinfo        = dj.create_virtual_module('ratinfo', 'bl_ratinfo')
bdata          = dj.create_virtual_module('bdata', 'bl_bdata')

shadow_lab     = dj.create_virtual_module('shadow_lab', 'bl_shadow_lab')
shadow_subject = dj.create_virtual_module('shadow_subject', 'bl_shadow_subject')
shadow_action  = dj.create_virtual_module('shadow_action', 'bl_shadow_action')

new_lab        = dj.create_virtual_module('new_lab', 'bl_new_lab')
new_subject    = dj.create_virtual_module('new_subject', 'bl_new_subject')
new_action     = dj.create_virtual_module('new_action', 'bl_new_action')

### Function to order dataframe and write it to spreadsheet

In [94]:
def write_df_spreadsheet(df, sh, sheet_name, sheet_title=None ,order_cols=None, sort_cols=None):

    #Reorder columns to make important columns to apperar first
    if order_cols:
        cols = df.columns.tolist()
        ordered_cols = order_cols
        cols_less = [x for x in cols if x not in ordered_cols]
        cols = ordered_cols + cols_less
        df = df[cols]
    
    #Sort dataframe
    if sort_cols:
        df = df.sort_values(sort_cols)

    #Write spreadsheet
    wks = sh.worksheet_by_title(sheet_name)
    if sheet_title:
        wks.update_value('A1', sheet_title)
    wks.resize(cols=2)
    wks.insert_cols(2, number=df.shape[1]-2, inherit=True)
    wks.set_dataframe(df,'C3')

    return df


### Table subject.Rats:  (rats that couldn't be copied)

In [49]:
#get Rats that couldn't be copied to new design
t = ratinfo.Rats - shadow_subject.Rats.proj(internalID='rats_old_id')
df_rats = pd.DataFrame(t.fetch(as_dict=True))
df_rats['deliverydate'] = df_rats['deliverydate'].astype(str)

df_rats = write_df_spreadsheet(df_rats, sh, 'rats_integrity',\
                     sheet_title='RATS WITH MISSING EXPERIMENTER OR DUPLICATED EXPERIMENTER',\
                     order_cols=['ratname', 'experimenter', 'deliverydate'], sort_cols=['deliverydate'])

df_rats

Unnamed: 0,ratname,experimenter,deliverydate,internalID,free,alert,contact,training,comments,vendor,...,extant,cagemate,forceFreeWater,dateSac,forceDepWater,bringUpAt,bringupday,ignoredByWatermeister,larid,israt
2,Z003,Alex,0000-00-00,184,0,0,,0,ordered on 2008-09-26,Taconic,...,0,,0,2009-08-21,0,0,,0,,1
3,Z004,Alex,0000-00-00,185,0,0,,0,ordered on 2008-09-26,Taconic,...,0,,0,2009-08-18,0,0,,0,,1
9,unna,experimenter,0000-00-00,1700,0,0,,0,,Taconic,...,0,,0,0000-00-00,0,0,,0,,1
5,J090,,2004-07-09,361,0,0,,0,"176-200, cannulation practice",Taconic,...,0,,0,2004-11-05,0,0,,0,,1
0,sen1,AnimalFacility,2007-12-05,152,0,0,,0,Sentinel Rats; FEMALE SD; 51-75g; Case 1576,Taconic,...,0,sen2,0,0000-00-00,0,0,,0,,1
1,sen2,AnimalFacility,2007-12-05,153,0,0,,0,Sentinel Rats; FEMALE SD; 51-75g; Case 1576,Taconic,...,0,sen1,0,0000-00-00,0,0,,0,,1
6,J096,,2009-04-23,379,0,0,,0,Ordered on 2009-04-17,Taconic,...,0,,0,0000-00-00,0,0,,0,,1
4,J089,,2009-07-09,349,0,0,,0,"ordered on 2009-03-27, wt 175-200g, SC cannula...",Taconic,...,0,J090,0,2009-11-02,1,0,,0,,1
7,T075,Elyssa,2011-10-04,1043,0,0,bbscott,1,Practice rat for Elyssa,Taconic,...,0,,0,0000-00-00,0,0,,0,,1
8,Z089,Jovanna/Min,2013-06-12,1658,0,0,begelfer,0,U032 eartagged to Z089. Surgery 6/12/2014. Per...,Taconic,...,0,,0,0000-00-00,0,0,,0,,1


In [26]:
print('\nThe following ratinfo.Rats `experimenters` are not in ratinfo.Contacts, except `Alex` who has two entries in ratinfo.Contacts.')

rats_missing_experimenter = (ratinfo.Rats.proj('experimenter') - shadow_subject.Rats.proj(internalID='rats_old_id')).fetch('experimenter')

print('\nNumber of entries:', len(rats_missing_experimenter))

print('\nList of experimenters:', set(rats_missing_experimenter))


The following ratinfo.Rats `experimenters` are not in ratinfo.Contacts, except `Alex` who has two entries in ratinfo.Contacts.

Number of entries: 10

List of experimenters: {'', 'experimenter', 'Elyssa', 'Jovanna/Min', 'AnimalFacility', 'Alex'}


### subject.Rats: shadow table - new table

In [27]:
shadow_subject.Rats.proj() - new_subject.Rats

rats_old_id


In [28]:
shadow_subject.Rats.Contact.proj() - new_subject.Rats.Contact

contact  PUIDs of the lab member(s) responsible for the rat,"ratname  Unique rat name, 1 letter 3 numbers"
,


## Table subject.Rathistory:  (rathistory that couldn't be copied)

In [47]:
#get Rats that couldn't be copied to new design
t = ratinfo.RatHistory - shadow_subject.RatHistory.proj(internalID='rathistory_old_id')
df_rathistory_missing_exp = pd.DataFrame(t.fetch(as_dict=True))


df_rathistory_missing_exp = write_df_spreadsheet(df_rathistory_missing_exp, sh, 'rathistory_experimenter_integrity',\
                     sheet_title='EXPERIMENTER IN RATHISTORY NOT IN CONTACTS',\
                     order_cols=['ratname', 'experimenter', 'logtime'], sort_cols=['logtime'])

df_rathistory_missing_exp

Unnamed: 0,ratname,experimenter,logtime,internalID,free,alert,contact,training,comments,waterperday,recovering,extant,cagemate,forceFreeWater,dateSac,forceDepWater,bringUpAt,bringupday,ignoredByWatermeister
0,unna,experimenter,2014-12-08 16:24:54,862,0,0,,0,,30,0,1,,0,0000-00-00,0,0,,0
1,Z111,experimenter,2015-05-29 14:37:22,2139,1,0,pbibawi,0,,30,0,1,Z112,0,0000-00-00,0,0,,0
2,unna,B186,2016-09-16 14:54:31,4929,0,0,,0,,30,0,1,,0,0000-00-00,0,0,,0
3,J226,J226,2016-11-22 11:24:32,5281,0,0,"bbscott, aakrami",0,born 9/24/16,30,0,1,J227,0,0000-00-00,0,0,,0
4,F101,"Ben, Christine",2017-03-21 18:08:17,6363,0,0,"bbscott, cmc9",0,,30,0,1,F102,0,0000-00-00,0,0,,0
5,F111,"Ben, Christine",2017-03-21 18:08:28,6364,0,0,"bbscott, cmc9",0,,30,0,1,F112,0,0000-00-00,0,0,,0
6,F113,"Ben, Christine",2017-03-21 18:10:55,6365,0,0,"bbscott, cmc9",0,,30,0,1,F114,0,0000-00-00,0,0,,0
7,F114,"Ben, Christine",2017-03-21 18:11:04,6366,0,0,"bbscott, cmc9",0,,30,0,1,F113,0,0000-00-00,0,0,,0
8,unna,experimenter,2018-02-05 12:54:20,8816,0,0,,0,,30,0,1,,0,0000-00-00,0,0,,0


In [46]:
print('\nThe following ratinfo.RatHistory `experimenters` are not in ratinfo.Contacts')

rathistory_missing_experimenter = (ratinfo.RatHistory.proj('experimenter') - shadow_subject.RatHistory.proj(internalID='rathistory_old_id')).fetch('experimenter')

print('\nNumber of entries:', len(rathistory_missing_experimenter))

print('\nList of experimenters:', set(rathistory_missing_experimenter))


The following ratinfo.RatHistory `experimenters` are not in ratinfo.Contacts

Number of entries: 9

List of experimenters: {'Ben, Christine', 'experimenter', 'B186', 'J226'}


### subject.RatHistory: shadow table - new table

In [46]:
#get Rathistory records that doesn't have matching ratname
t = (shadow_subject.RatHistory & (shadow_subject.RatHistory.proj() - new_subject.RatHistory))
df_rathistory_missing_rats = pd.DataFrame(t.fetch(as_dict=True))


df_rathistory_missing_rats = write_df_spreadsheet(df_rathistory_missing_rats, sh, 'rathistory_ratname_integrity',\
                     sheet_title='RATNAME IN RATHISTORY NOT IN RATS',\
                     order_cols=['rathistory_old_id', 'ratname', 'user_id', 'logtime'], sort_cols=['logtime'])

df_rathistory_missing_rats

Unnamed: 0,rathistory_old_id,ratname,user_id,logtime,free,alert,training,comments,waterperday,recovering,extant,cagemate,force_free_water,date_sac,force_dep_water,bring_up_at,bringupday,ignored_by_watermeister
0,7,A171,cduan,2014-06-27 19:26:19,0,0,0,,30,0,1,A172,0,,0,0,,0
1,8,A172,cduan,2014-06-27 19:26:26,0,0,0,,30,0,1,A171,0,,0,0,,0
2,21,F039,cmc9,2014-07-01 16:48:06,0,0,0,5/28/14: headplate implantation over PPC (from...,30,0,1,,0,,0,0,,0
3,23,F040,cmc9,2014-07-01 16:48:19,0,0,0,5/28/14: headplate implantation over PPC (from...,30,0,1,,0,,0,0,,0
4,148,F036,cmc9,2014-07-18 14:18:10,0,0,0,"12/2/13-headplate over right PPC, SD",30,0,0,,0,,9,0,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1862,14948,A307,abondy,2020-09-30 17:25:21,0,0,0,Drd2-iCre +/- #511,30,1,1,A306,0,,0,0,,0
1863,14962,T281,zhihaol,2020-10-03 19:53:48,0,0,0,,30,1,1,T282,0,,0,0,,0
1864,14963,T281,zhihaol,2020-10-03 19:53:48,0,0,0,,30,1,1,,0,,0,0,,0
1865,14966,T282,zhihaol,2020-10-03 19:53:58,0,0,0,,30,1,1,,0,,0,0,,0


In [53]:
rathistory_new_missing = (shadow_subject.RatHistory & (shadow_subject.RatHistory.proj() - new_subject.RatHistory)).proj('ratname', 'logtime')

print('\nUnique number of `ratname` missing in ratinfo.Rats: ', len(set((rathistory_new_missing - (dj.U('ratname') & ratinfo.Rats)).fetch('ratname'))))

print('\n`ratname` missing in ratinfo.Rats: ', set((rathistory_new_missing - (dj.U('ratname') & ratinfo.Rats)).fetch('ratname')))




Unique number of `ratname` missing in ratinfo.Rats:  29

`ratname` missing in ratinfo.Rats:  {'0449', 'J231', 'J288', '229', 'H053', '0448', 'F132', 'H054', 'A171', '0452', 'F084', '0447', 'F133', '0455', '261', '0454', 'Z276', '0446', 'U001', 'Z130', 'J289', '0450', '266', '263', '0451', 'M084', '0453', 'A172', 'F100'}


In [59]:
shadow_subject.RatHistory.Contact - new_subject.RatHistory.Contact

contact  PUIDs of the lab member(s) responsible for the rat,"ratname  Unique rat name, 1 letter 3 numbers"
,H125
,M102
,T232
aakrami,229
aakrami,A141
aakrami,A142
aakrami,B099
aakrami,B198
aakrami,B199
aakrami,B200


In [None]:
# Should the primary key be changed to contact & ratname?

## Contacts that couldn't be copied

In [11]:
t = ratinfo.Contacts - shadow_lab.Contacts.proj(contactid='contacts_old_id')
emails_rep = pd.DataFrame(t.proj('email').fetch(as_dict=True))
emails_rep = emails_rep.drop(['contactid'], axis=1)
key_mails = emails_rep.to_dict('records')
df_contact_mails = (ratinfo.Contacts & key_mails)
df_contact_mails

experimenter,email,initials,telephone,tag_letter,tag_RGB_old,lab_manager,subscribe_all,tech_morning,tech_afternoon,tech_computer,is_alumni,custom_rig_order,FullName,tech_overnight,tag_RGB,tech_shifts,phone_carrier,contactid
Tim,thanks@princeton.edu,TH,2069150669,T,=BLOB=,0,0,0,0,0,1,,Tim Hanks,0,255 255 0,,email,12
Tylerxx,tamina@exchange.Princeton.EDU,TA,8087541752,,=BLOB=,0,0,0,0,0,1,,Tyler Amina,0,128 0 0,,,53
old_Scott,baptista@princeton.edu,XX,0,,=BLOB=,0,0,1,0,0,1,,old Scott Baptista,1,,,att,59
Tylerx,tamina@exchange.Princeton.EDU,TA,8087541752,,=BLOB=,0,0,0,0,0,1,,Tyler Amina,0,128 0 0,,,71
Scott,baptista@princeton.edu,SB,2019626570,,=BLOB=,0,0,1,0,0,0,,Scott Baptista,1,,,att,94
Timx,thanks@princeton.edu,TH,2069150669,T,=BLOB=,0,0,0,0,0,1,,Tim Hanks,0,255 255 0,,email,97


Issue addressed.  Duplicate emails for contactid = 59 and 94; 53 and 71; 12 and 97

### lab.Contacts: shadow table - new table

In [12]:
shadow_lab.Contacts.proj() - new_lab.Contacts


contacts_old_id


---
### lab.RigMaintenance: source table - shadow table


In [13]:
ratinfo.RigMaintenance.proj(rig_maintenance_id='maintenance_id') - shadow_lab.RigMaintenance

rig_maintenance_id


## Table lab.RigMaintenance:  (RigMaintenance records that couldn't be copied)


In [44]:
#get rig maintenance records that doesn't have matching rigid
rigmaintenance_missing = (ratinfo.RigMaintenance & (shadow_lab.RigMaintenance.proj(maintenance_id='rig_maintenance_id') - new_lab.RigMaintenance.proj(maintenance_id='rig_maintenance_id')))
df_rigmaintenance_missing = pd.DataFrame(rigmaintenance_missing.fetch(as_dict = True))

df_rigmaintenance_missing = write_df_spreadsheet(df_rigmaintenance_missing, sh, 'rig_maintenance_integrity',\
                     sheet_title='RIGS_ID IN rig_maintenance NOT IN riginfo',\
                     order_cols=['maintenance_id', 'rigid', 'broke_date', 'fix_person'], sort_cols=['broke_date'])

df_rigmaintenance_missing



Unnamed: 0,maintenance_id,rigid,broke_date,fix_person,note,isbroken,broke_person,fix_date,fix_note
0,733,305,2015-11-06 00:00:00,Chuck,test,0,Chuck,2016-04-21 15:41:15,
1,734,306,2015-11-06 00:00:00,Chuck,test,0,Chuck,2016-04-21 15:41:19,
2,774,303,2016-03-21 00:00:00,Chuck,,0,Chuck,0000-00-00 00:00:00,
3,808,303,2016-05-08 12:14:28,Chuck,Water tower won't leave session 3.\n,0,Sam,2016-05-09 10:02:46,
4,810,305,2016-05-08 12:14:28,Chuck,Water tower won't leave session 3.\n,0,Sam,2016-05-09 10:02:48,
...,...,...,...,...,...,...,...,...,...
192,1996,310,2019-11-09 05:57:24,,not working screen says still training long af...,0,Jess,2019-11-09 08:35:03,
193,1997,311,2019-11-09 05:57:24,,not working screen says still training long af...,0,Jess,2019-11-09 08:35:06,
194,1998,312,2019-11-09 05:57:24,,not working screen says still training long af...,0,Jess,2019-11-09 08:35:08,
195,2007,322,2019-11-16 11:36:01,,broken\n,0,Klaus,2019-11-19 20:06:59,


In [15]:

print('\nNumber of entries in ratinfo.RigMaintenance:', rigmaintenance_missing.shape[0])

print('\nNumber of missing rigids in ratinfo.Riginfo:', len(set(rigmaintenance_missing)))

print('\nMissing rigids in ratinfo.Riginfo:', set(rigmaintenance_missing))


Number of entries in ratinfo.RigMaintenance: 197

Number of missing rigids in ratinfo.Riginfo: 31

Missing rigids in ratinfo.Riginfo: {303, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 325, 326, 327, 328, 329, 330, 331, 332, 333, 335, 336}


---
## lab.Rigflush: source table - shadow table

In [3]:
ratinfo.Rigflush.proj(rigflush_old_id='id') - shadow_lab.Rigflush

rigflush_old_id


## lab.Rigflush: shadow table - new table

In [4]:
shadow_lab.Rigflush.proj() - new_lab.Rigflush

rigflush_old_id


---
## lab.Rigfood: source table - shadow table

In [5]:
ratinfo.Rigfood.proj(rigfood_id='rigfoodid') - shadow_lab.Rigfood

rigfood_id


## Table lab.RigFood:  (RigFood records that couldn't be copied)

In [39]:
#get rig food records that doesn't have matching rigid
rigfood_missing = (ratinfo.Rigfood & (shadow_lab.Rigfood.proj(rigfoodid='rigfood_id') - new_lab.Rigfood.proj(rigfoodid='rigfood_id')))
df_rigfood_missing = pd.DataFrame(rigfood_missing.fetch(as_dict = True))


df_rigfood_missing = write_df_spreadsheet(df_rigfood_missing, sh, 'rig_food_integrity',\
                     sheet_title='RIGS_ID IN rig_fod NOT IN riginfo',\
                     sort_cols=['datetime'])

df_rigfood_missing

Unnamed: 0,rigfoodid,rigid,datetime
0,6155,255,2019-06-13 17:45:30
1,6156,255,2019-06-13 17:45:32
2,6157,255,2019-06-13 17:45:33
3,6158,255,2019-06-13 17:45:34
4,6159,255,2019-06-13 17:45:34
5,6160,255,2019-06-13 17:45:36
6,6161,255,2019-06-13 17:45:36
7,6162,255,2019-06-13 17:45:39
9,6164,255,2019-06-13 17:45:41
8,6163,255,2019-06-13 17:45:41


In [14]:
rigfood_missing = (ratinfo.Rigfood & (shadow_lab.Rigfood.proj(rigfoodid='rigfood_id') - new_lab.Rigfood.proj(rigfoodid='rigfood_id'))).fetch('rigid')

print('\nNumber of entries in lab.Rigfood:', rigfood_missing.shape[0])

print('\nNumber of missing `rigids` in ratinfo.Riginfo:', len(set(rigfood_missing)))

print('\nMissing `rigids` in ratinfo.Riginfo:', set(rigfood_missing))


Number of entries in lab.Rigfood: 41

Number of missing `rigids` in ratinfo.Riginfo: 1

Missing `rigids` in ratinfo.Riginfo: {255}


---
## lab.Riginfo: source table - shadow table

In [15]:
ratinfo.Riginfo.proj() - shadow_lab.Riginfo

rigid


## lab.Riginfo: shadow table - new table

In [16]:
shadow_lab.Riginfo.proj() - new_lab.Riginfo

rigid


---
## lab.TrainingRoom: source table - shadow table


In [17]:
ratinfo.TrainingRoom.proj() - shadow_lab.TrainingRoom

tower


## lab.TrainingRoom: shadow table - new table

In [18]:
shadow_lab.TrainingRoom.proj() - new_lab.TrainingRoom

tower


## action.CalibrationInfoTbl: (Calibration that couldn't be copied)

In [59]:
calibration_missing = bdata.CalibrationInfoTbl - shadow_action.CalibrationInfoTbl.proj(calibrationid='calibration_info_tbl_id')
df_calibration_missing = pd.DataFrame(calibration_missing.fetch(as_dict=True))
df_calibration_missing['dateval'] = df_calibration_missing['dateval'].astype(str)

df_calibration_missing = write_df_spreadsheet(df_calibration_missing, sh2, 'calibrationInfo_integrity',\
                     sheet_title='INVALID RIGS_ID IN calibrationInfoTBL',\
                     order_cols=['calibrationid'], sort_cols=['dateval'])

df_calibration_missing

Unnamed: 0,calibrationid,rig_id,initials,dateval,valve,timeval,dispense,isvalid,target,validity
9,76687,401,ED,0000-00-00 00:00:00,left1water,0.15,2.32,0,HIGH,
0,52913,,RE,2011-09-10 12:11:06,left1water,0.12,21.0,1,LOW,PERM
1,52914,,RE,2011-09-10 12:11:06,right1water,0.15,21.0,1,LOW,PERM
2,52915,,RE,2011-09-10 12:12:32,left1water,0.12,26.67,1,HIGH,PERM
3,52916,,RE,2011-09-10 12:12:32,right1water,0.15,26.67,1,HIGH,PERM
4,52919,,ED,2011-09-10 15:14:31,left1water,0.12,20.0,1,LOW,PERM
5,52920,,ED,2011-09-10 15:14:31,right1water,0.15,20.0,1,LOW,PERM
6,52921,,ED,2011-09-10 15:15:40,left1water,0.12,26.67,1,HIGH,PERM
7,52922,,ED,2011-09-10 15:15:40,right1water,0.15,26.67,1,HIGH,PERM
8,53095,,CB,2011-09-15 12:32:29,right1water,0.15,20.0,1,LOW,PERM


In [20]:
calibration_missing = bdata.CalibrationInfoTbl - shadow_action.CalibrationInfoTbl.proj(calibrationid='calibration_info_tbl_id')

print('\nIncorrect dateval:\n', calibration_missing & 'dateval = "0000-00-00 00:00:00"')

print('\nIncorrect rig_id:\n', calibration_missing & 'dateval != "0000-00-00 00:00:00"')


Incorrect dateval:
 rig_id     initials     dateval        valve          timeval     dispense     isvalid     *calibrationid target     validity    
+--------+ +----------+ +------------+ +------------+ +---------+ +----------+ +---------+ +------------+ +--------+ +----------+
401        ED           0000-00-00 00: left1water     0.15        2.32         0           76687          HIGH                   
 (Total: 1)


Incorrect rig_id:
 rig_id     initials     dateval        valve          timeval     dispense     isvalid     *calibrationid target     validity    
+--------+ +----------+ +------------+ +------------+ +---------+ +----------+ +---------+ +------------+ +--------+ +----------+
          RE           2011-09-10 12: left1water     0.12        21.0         1           52913          LOW        PERM        
          RE           2011-09-10 12: right1water    0.15        21.0         1           52914          LOW        PERM        
          RE           2011-09-10 1

## action.CalibrationInfoTbl: shadow table - new table


In [50]:
shadow_action.CalibrationInfoTbl.proj() - new_action.CalibrationInfoTbl

calibration_info_tbl_id
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726


In [61]:
calibration_missing_rigid = shadow_action.CalibrationInfoTbl - new_action.CalibrationInfoTbl.proj()
df_calibration_missing_rigid = pd.DataFrame(calibration_missing_rigid.fetch(as_dict=True))
df_calibration_missing_rigid = df_calibration_missing_rigid.loc[0:5000, :]

df_calibration_missing_rigid = write_df_spreadsheet(df_calibration_missing_rigid, sh2, 'calibrationInfo_integrity2',\
                     sheet_title='RIGS_ID IN calibrationInfoTBL not in RIGINFO',\
                     order_cols=['calibration_info_tbl_id'], sort_cols=['calibration_datetime'])

df_calibration_missing_rigid

Unnamed: 0,calibration_info_tbl_id,rigid,calibration_datetime,open_valve_time,calibration_person,valve,dispense,isvalid,target
0,3717,999,2009-10-20 12:47:32,0.15000,SST,left1water,17.1333,1,
485,4202,999,2009-10-20 12:47:32,0.15000,SST,right1water,13.8000,1,
3860,7609,999,2009-10-20 12:47:32,0.15000,SST,right1water,13.8000,1,
3859,7608,999,2009-10-20 12:47:32,0.15000,SST,left1water,17.1333,1,
3733,7482,999,2009-10-20 12:47:32,0.15000,SST,right1water,13.8000,1,
...,...,...,...,...,...,...,...,...,...
2334,6055,999,2010-06-21 18:42:04,0.27826,SK,right1water,26.0000,1,
4112,7861,999,2010-06-21 18:42:04,0.27826,SK,right1water,26.0000,1,
4111,7860,999,2010-06-21 18:42:04,0.30066,SK,left1water,25.2667,1,
2080,5801,999,2010-06-21 18:42:04,0.27826,SK,right1water,26.0000,1,


---
## action.Mass: source table - shadow table

In [62]:
ratinfo.Mass.proj(mass_id='weighing') - shadow_action.Mass

mass_id
722475
270029
176125
27
24
33
32
25
28
26


## Mass table with invalid date or times

In [87]:
mass_missing_date_or_time = ratinfo.Mass - shadow_action.Mass.proj(weighing='mass_id')
mass_missing_date_or_time_df = pd.DataFrame((mass_missing_date_or_time & 'date = "0000-00-00" or timeval = "0:00:00"').fetch(as_dict=True))

mass_missing_date_or_time_df['date'] = mass_missing_date_or_time_df['date'].astype(str)
mass_missing_date_or_time_df['timeval'] = mass_missing_date_or_time_df['timeval'].astype(str)

mass_missing_date_or_time_df = write_df_spreadsheet(mass_missing_date_or_time_df, sh2, 'mass_datetime_integrity',\
                     sheet_title='INVALID DATE or TIME IN mass table',\
                     sort_cols=['timeval', 'date'])


mass_missing_date_or_time_df


Unnamed: 0,mass,weighing,date,ratname,tech,timeval
1870,264,270029,0000-00-00,F038,CC,0 days 00:00:00
0,284,18,2008-09-07,S029,RF,0 days 00:00:00
1,302,19,2008-09-07,S018,RF,0 days 00:00:00
2,288,20,2008-09-07,S027,RF,0 days 00:00:00
3,310,21,2008-09-07,S030,RF,0 days 00:00:00
...,...,...,...,...,...,...
1869,296,268801,2014-08-04,F050,,0 days 00:00:00
1871,0,319558,2015-05-08,A167,FW,0 days 00:00:00
1872,0,319559,2015-05-08,A178,FW,0 days 00:00:00
1873,900,722475,0000-00-00,,,0 days 04:33:48


In [111]:

mass_missing = ratinfo.Mass - shadow_action.Mass.proj(weighing='mass_id')
mass_missing_tech_df = pd.DataFrame((mass_missing & 'date != "0000-00-00"' & 'timeval != "0:00:00"').fetch('tech',as_dict=True))
mass_missing_tech_df = mass_missing_tech_df['tech'].value_counts().to_frame()
mass_missing_tech_df = mass_missing_tech_df.reset_index()
mass_missing_tech_df = mass_missing_tech_df.rename(columns = {'tech':'count_mass', 'index':'tech'})

contact_initials = pd.DataFrame(ratinfo.Contacts.fetch('initials', as_dict=True))
contact_initials = contact_initials['initials'].value_counts().to_frame()
contact_initials = contact_initials.reset_index()
contact_initials = contact_initials.rename(columns = {'initials':'count_contacts', 'index':'tech'})


mass_missing_tech_df = mass_missing_tech_df.merge(contact_initials, how='left', on=['tech'])

mass_missing_tech_df['count_contacts'] = mass_missing_tech_df['count_contacts'].fillna(0)


mass_missing_tech_df = write_df_spreadsheet(mass_missing_tech_df, sh2, 'mass_contacts_integrity',\
                     sheet_title='TECH INITIALS ON MASS WITH ZERO OR MULTIPLE CONTACTS', sort_cols=['count_contacts'])

mass_missing_tech_df


Unnamed: 0,tech,count_mass,count_contacts
0,FW,157059,0.0
9,TZL,60,0.0
10,EJD,60,0.0
11,TB,51,0.0
1,AS,78096,2.0
2,RL,31253,2.0
3,TJ,30968,2.0
5,SS,15744,2.0
6,SB,14746,2.0
7,MP,2591,2.0


In [116]:
mass_missing = ratinfo.Mass - shadow_action.Mass.proj(weighing='mass_id')

mass_missing_tech = (mass_missing & 'date != "0000-00-00"' & 'timeval != "0:00:00"').fetch('tech')

print('\nNumber of `tech` initials with either zero or multiple entries in ratinfo.Contacts:', len(mass_missing_tech))

print('\n`tech` initials with either zero or multiple entries in ratinfo.Contacts:', set(mass_missing_tech))

print('\nNumber of entries with `date = 0000-00-00` :', len((mass_missing & 'date = "0000-00-00"').proj()))

print('\nNumber of entries with `timeval = 0:00:00` :', len((mass_missing & 'timeval = "0:00:00"').proj()))



Number of `tech` initials with either zero or multiple entries in ratinfo.Contacts: 358192

`tech` initials with either zero or multiple entries in ratinfo.Contacts: {'TH', 'JW', 'TZL', 'EJD', 'FW', 'TB', 'AB', 'AS', 'SB', 'MP', 'SS', 'TJ', 'RL'}

Number of entries with `date = 0000-00-00` : 3

Number of entries with `timeval = 0:00:00` : 1872


## action.Mass: shadow table - new table

In [127]:
mass_rats_missing = shadow_action.Mass - new_action.Mass.proj()
mass_rats_missing_df = pd.DataFrame(mass_rats_missing.fetch(as_dict=True))

ratinfo_rats_df = pd.DataFrame((mass_rats_missing & (dj.U('ratname') & ratinfo.Rats)).fetch('ratname', as_dict=True))
ratinfo_rats_df = ratinfo_rats_df.drop_duplicates()
ratinfo_rats_df = ratinfo_rats_df.reset_index(drop=True)
ratinfo_rats_df['on_blratinfo.Rats'] = 1


mass_rats_missing_df = mass_rats_missing_df.merge(ratinfo_rats_df,  how='left', on=['ratname'])


mass_rats_missing_df = write_df_spreadsheet(mass_rats_missing_df, sh2, 'mass_rats_integrity',\
                     sheet_title='ratnames on mass table missing on ratinfo.rats or subject.rats',\
                     order_cols = ['ratname', 'on_blratinfo.Rats', 'weighing_datetime'], sort_cols=['on_blratinfo.Rats', 'weighing_datetime'])

mass_rats_missing_df

Unnamed: 0,ratname,on_blratinfo.Rats,weighing_datetime,mass_id,weigh_person,mass
0,Z004,1.0,2008-10-20 00:00:00,778,glynb,174
1,Z003,1.0,2008-10-27 00:00:00,864,glynb,212
2,Z003,1.0,2008-11-04 00:00:00,982,glynb,228
3,Z004,1.0,2008-11-04 00:00:00,1019,glynb,180
4,Z003,1.0,2008-11-11 00:00:00,1103,glynb,242
...,...,...,...,...,...,...
931,F100,,2017-01-13 09:51:35,479520,lteachen,344
932,F084,,2017-01-14 08:06:01,479927,lteachen,351
933,F100,,2017-01-14 08:06:30,479928,lteachen,350
934,F084,,2017-01-15 09:56:07,480283,losorio,354


In [128]:
mass_new_missing = shadow_action.Mass - new_action.Mass.proj()

print('\nNumber of `ratname` missing in bl_new_action.Mass: ', len(set(mass_new_missing.fetch('ratname'))))

print('\n`ratname` present in ratinfo.Rats: ', set((mass_new_missing & (dj.U('ratname') & ratinfo.Rats)).fetch('ratname')))

print('\n`ratname` missing in ratinfo.Rats: ', set((mass_new_missing - (dj.U('ratname') & ratinfo.Rats)).fetch('ratname')))

print('\n`ratname` present in bl_new_subject.Rats: ', (mass_new_missing & (dj.U('ratname') & new_subject.Rats)).fetch('ratname'))


Number of `ratname` missing in bl_new_action.Mass:  27

`ratname` present in ratinfo.Rats:  {'J089', 'Z003', 'Z089', 'J096', 'Z004', 'J090', 'T075'}

`ratname` missing in ratinfo.Rats:  {'E057', 'H054', 'B11', 'M021', 'J288', 'F100', 'E068', 'E055', 'F084', 'B518', 'J289', '   0', 'H053', 'U001', 'A171', 'A172', 'J148', 'M084', 'TANK', 'U011'}

`ratname` present in bl_new_subject.Rats:  []


## action.Rigwater: Missing records

In [129]:
ratinfo.Rigwater.proj(rigwater_id='id') - shadow_action.Rigwater

rigwater_id
1098
1100
1102
1104
1106
1108
1110
1112
1114
1116


In [132]:
rigwater_missing_date = ratinfo.Rigwater - shadow_action.Rigwater.proj(id='rigwater_id')
rigwater_missing_date_df = pd.DataFrame(rigwater_missing_date.fetch(as_dict=True))
rigwater_missing_date_df

rigwater_missing_date_df = write_df_spreadsheet(rigwater_missing_date_df, sh2, 'rigwater_date_integrity',\
                     sheet_title='riginfo.rigwater with date issues')



In [130]:
rigwater_missing_date = ratinfo.Rigwater - shadow_action.Rigwater.proj(id='rigwater_id')

print('\nNumber of entries in ratinfo.Rigwater: ', len(rigwater_missing_date.fetch('dateval')))

print('\nError with date:', set(rigwater_missing_date.fetch('dateval')))


Number of entries in ratinfo.Rigwater:  27

Error with date: {'2017-01-00', '0000-00-00', '2017-05-00', '2016-12-00'}


## action.Rigwater: missing rats

In [139]:
rigwater_missing_rats = shadow_action.Rigwater - new_action.Rigwater.proj()
rigwater_missing_rats_df = pd.DataFrame(rigwater_missing_rats.fetch(as_dict=True))
rigwater_missing_rats_df
rigwater_missing_rats_df = write_df_spreadsheet(rigwater_missing_rats_df, sh2, 'rigwater_rats_integrity',\
                     sheet_title='riginfo.rigwater with ratname not found', sort_cols=['earnedwater_datetime'])

rigwater_missing_rats_df


Unnamed: 0,rigwater_id,ratname,earnedwater_datetime,totalvol,trialvol,complete,n_rewarded_trials,target_percent
1,2,Z999,1999-01-01,5.0,24.0,0,186,
0,1,X000,2011-11-08,999.0,9.0,1,100,
2,166,K999,2012-01-16,0.0,0.0,1,0,
3,4453,ratn,2014-08-29,0.0,0.0,0,0,
4,4456,ratn,2014-08-29,0.0,0.0,0,0,
10,4561,ratn,2014-09-01,0.0,0.0,0,6,
8,4559,ratn,2014-09-01,0.0,0.0,0,4,
9,4560,ratn,2014-09-01,0.0,0.0,0,5,
6,4557,ratn,2014-09-01,0.0,0.0,1,2,
5,4556,ratn,2014-09-01,0.0,0.0,0,1,


In [140]:
rigwater_missing_ratname = ratinfo.Rigwater & (shadow_action.Rigwater.proj(id='rigwater_id') - new_action.Rigwater.proj(id='rigwater_id'))

print('\nNumber of entries in ratinfo.Rigwater: ', len(rigwater_missing_ratname.fetch('ratname')))

print('\nMissing ratname in ratinfo.Rats: ', set(rigwater_missing_ratname.fetch('ratname')))


Number of entries in ratinfo.Rigwater:  59

Missing ratname in ratinfo.Rats:  {'K999', 'X000', 'F066', 'BROK', '100', 'ratn', 'M000', 'F100', 'Z999', 'F084'}


## action.Schedule: source table - shadow table


In [148]:
schedule_missing_date = ratinfo.Schedule - shadow_action.Schedule.proj(schedentryid='schedule_id')
schedule_missing_date_df = pd.DataFrame(schedule_missing_date.fetch(as_dict=True))

schedule_missing_date_df = write_df_spreadsheet(schedule_missing_date_df, sh3, 'schedule_date_integrity',\
                     sheet_title='action.Schedule with invalid date', order_cols=['date'])

schedule_missing_date_df

Unnamed: 0,date,timeslot,rig,start_time,ratname,experimenter,protocol,system,instructions,comments,technician,schedentryid,wascompleted,wasqueried,wasstarted,wasvideosaved,Trials_Issue,Bias_Issue
0,0000-00-00,4,27,,S063,Ben,,0,,,,136150,0,0,0,0,,
1,0000-00-00,4,28,,S064,Ben,,0,,,,136151,0,0,0,0,,
2,0000-00-00,5,27,,S065,Ben,,0,,,,136152,0,0,0,0,,
3,0000-00-00,5,28,,S066,Ben,,0,,,,136153,0,0,0,0,,
4,0000-00-00,0,0,,,,,0,,,,136154,0,0,0,0,,
5,0000-00-00,0,29,,,,,0,,,,220996,0,0,0,0,,
6,0000-00-00,5,29,,S134,,,0,,,,221001,0,0,0,0,,
7,0000-00-00,6,29,,S135,,,0,,,,221063,0,0,0,0,,


In [149]:
schedule_missing = ratinfo.Schedule - shadow_action.Schedule.proj(schedentryid='schedule_id')

print('Error with date:', set(schedule_missing.fetch('date')))

Error with date: {'0000-00-00'}


## action.Schedule: shadow table - new table

In [150]:
shadow_action.Schedule.proj() - new_action.Schedule

schedule_id


## action.Surgery: missing dates

In [155]:
surgery_missing_date = ratinfo.Surgery - shadow_action.Surgery.proj(id='surgery_old_id')
surgery_missing_date_df = pd.DataFrame(surgery_missing_date.fetch(as_dict=True))


surgery_missing_date_df = write_df_spreadsheet(surgery_missing_date_df, sh3, 'surgery_date_integrity',\
                     sheet_title='action.Surgery with invalid date', order_cols=['date'])


surgery_missing_date_df


Unnamed: 0,date,id,ratname,surgeon,ratgrams,starttime,endtime,type,eib_num,coordinates,brainregions,ketamine,buprenex,notes,Bregma,IA0,angle,tilt_axis
0,0000-00-00,1,C054,Jeff,0,0 days 00:00:00,0 days 00:00:00,,,,,0.0,0.0,,,,0.0,
1,0000-00-00,2,C057,Jeff,0,0 days 00:00:00,0 days 00:00:00,,,,,0.0,0.0,,,,0.0,
2,0000-00-00,56,J152,Ann Duan,370,0 days 15:50:00,0 days 20:30:00,cannula implant practice,,,Bilateral FOF and PPC,0.2,0.2,Bilateral cannula implant in FOF and PPC,,,0.0,
3,0000-00-00,115,Z54,Tyler,0,0 days 02:00:00,0 days 08:30:00,CTB Injection- Unilateral,,"4.96,25.3D 25.7S, 21.4D 23.4S",Superior Colliculus (Superficial / Deep),20.0,20.0,"Bregma seemed too anterior, adjusted based on ...","11.3,23.9,25.8","2.8,23.9,33.4",0.0,
4,0000-00-00,117,z55,Tyler,0,0 days 00:00:10,0 days 00:00:04,"CTB injection in SC, Unilateral Different AP",,"6/7,25.8,(22.2/21.2)/21.6",Superior Colliculus,20.0,20.0,2nd injection may have been clogged,"13,23.9,26.4","4.1,23.9,33.6",0.0,
5,0000-00-00,119,z59,Tyler,0,0 days 00:00:11,0 days 00:00:04,CTB Injection- Unilateral into SC,,"14.9/13.8,26.1/26.5,55.6/54.3",Superior Colliculus,20.0,20.0,Both appeared to be clogged,"20.3,24.7","11.2,24.2",0.0,
6,0000-00-00,120,Z61,Tyler,0,0 days 00:00:06,0 days 00:00:11,CTB Injection- Unilateral into SC,,"14.3/16,26.5/26.5,20.7/15.9",Superior Colliculus,20.0,20.0,,"21.7,24.9,24.2","11.6,24.4",0.0,
7,0000-00-00,163,W042,Athena,0,0 days 00:00:00,0 days 00:00:00,CTB in mPFC,,(+3.24/0.7),Bilateral mPFC,0.0,0.0,,,,0.0,
8,0000-00-00,192,Z073,Ahmed,0,0 days 10:00:00,0 days 17:00:00,CTB injections,,"(ML = 2.5, AP= -3.8)",PPC,0.2,0.2,,,,0.0,
9,0000-00-00,229,W072,Athena,0,0 days 16:30:00,0 days 21:30:00,Bilateral PPC AAV Halo (princeton v) fiber imp...,,"AP:-3.8, ML: +-2.5, DV:0.9",bilateral PPC,0.2,0.2,"5 injection sites for each PPC, 16 injections ...",,,0.0,


In [156]:
surgery_missing = ratinfo.Surgery - shadow_action.Surgery.proj(id='surgery_old_id')

print('\nIncorrect date: ', set(surgery_missing.fetch('date')))


Incorrect date:  {'0000-00-00'}


## action.Surgery: shadow table - new table

In [170]:
surgery_rats_missing = shadow_action.Surgery - new_action.Surgery.proj()

surgery_rats_missing_df = pd.DataFrame(surgery_rats_missing.fetch(as_dict=True))

surgery_rats_missing = (shadow_action.Surgery & 'ratname="W055" or ratname="W009"') 

surgery_rats_missing_df2 = pd.DataFrame(surgery_rats_missing.fetch(as_dict=True))


surgery_rats_missing_df = pd.concat([surgery_rats_missing_df, surgery_rats_missing_df2])


surgery_rats_missing_df = write_df_spreadsheet(surgery_rats_missing_df, sh3, 'surgery_rats_integrity',\
                     sheet_title='action.Surgery with invalid rats or keys')

surgery_rats_missing_df


Unnamed: 0,surgery_old_id,ratname,surgery_date,surgery_starttime,surgery_stoptime,surgeon,ratgrams,surgery_type,eib_num,coordinates,brainregions,ketamine,buprenex,surgery_notes,bregma,ia_zero,angle,tilt_axis
0,53,T09,2011-05-05,0 days 11:30:00,0 days 16:30:00,Tim,418,right lateral PPC tetrode implant,33.0,"(-4.0, 3.4)",right lateral PPC,0.2,0.2,,,,0.0,
1,121,Z58,2013-07-25,0 days 10:00:00,0 days 02:30:00,Tommy,0,2 CTB injections into unilateral superficial SC,,"(11.1/11.9, 23.0/22.9, 54.5/51.0)",SC,0.2,0.2,Weird sutures; blood vessel forced adjusted lo...,"(17.5, 24.4, 2.2)","(8.9, 24.4, 2.2)",0.0,
2,122,Z60,2013-07-29,0 days 10:00:00,0 days 03:00:00,Tommy,0,2 CTB injections into unilateral superficial SC,,"(-5.7/-6.2, 23.7/23.2, 8.2/10.7)",SC,0.2,0.2,,"(-0.3, 24.7, 12.0)","(-9.2, 24.7, 12.0)",0.0,
3,263,A280,2020-01-31,0 days 00:00:00,0 days 00:00:00,Chuck,0,VGAT-ReaChR acute,,"ap+1.5,ML+2.5",M1 and DMS,0.0,0.0,(this animal had previously been referred to I...,,,0.0,
0,158,W009,2013-08-19,0 days 00:00:00,0 days 00:00:00,"Athena, Anna",0,CTB + AAV ChR2 in mPFC,,(+3.24/0.7),Bilateral mPFC,0.0,0.0,,,,0.0,
1,159,W009,2013-08-19,0 days 00:00:00,0 days 00:00:00,"Athena, Anna",0,CTB + AAV ChR2 in mPFC,,(+3.24/0.7),Bilateral mPFC,0.0,0.0,,,,0.0,
2,160,W009,2013-08-19,0 days 00:00:00,0 days 00:00:00,Athena,0,CTB + AAV ChR2 in mPFC,,(+3.24/0.7),Bilateral mPFC,0.0,0.0,,,,0.0,
3,177,W055,2014-09-18,0 days 14:30:00,0 days 20:30:00,Athena,0,DREADD (AAV5 hsyn) + 32ch TDT array in lPPC,,"AP: -3.8, ML=2.5",left PPC,0.2,0.2,,,,0.0,
4,179,W055,2014-09-18,0 days 14:30:00,0 days 20:30:00,Athena,0,DREADD (AAV5 hsyn) + 32ch TDT array in lPPC,,"AP: -3.8, ML=2.5",left PPC,0.2,0.2,,,,0.0,


In [171]:
print('\nDuplicate primary key for new table (ratname, surgery_date, surgery_starttime): W055, W009')

surgery_missing_ratname = (shadow_action.Surgery & (shadow_action.Surgery.proj() - new_action.Surgery) & 'ratname != "W009"' & 'ratname != "W055"').fetch('ratname')

print('\n`ratname` not in ratinfo.Rats:', surgery_missing_ratname)


Duplicate primary key for new table (ratname, surgery_date, surgery_starttime): W055, W009

`ratname` not in ratinfo.Rats: ['T09' 'Z58' 'Z60' 'A280']


---
## action.TechSchedule: source table - shadow table


In [175]:
techsedule_missing_date = ratinfo.TechSchedule - shadow_action.TechSchedule.proj()
techsedule_missing_date_df = pd.DataFrame(techsedule_missing_date.fetch(as_dict=True))


techsedule_missing_date_df = write_df_spreadsheet(techsedule_missing_date_df, sh3, 'techschedule_date_integrity',\
                     sheet_title='action.TechSchedule with invalid date')

techsedule_missing_date_df




Unnamed: 0,date,day,overnight,scheduleid,morning,evening
0,0000-00-00,,Adrian,810,Klaus,Jovanna


In [176]:
techschedule_missing = ratinfo.TechSchedule & (ratinfo.TechSchedule.proj() - shadow_action.TechSchedule)

print('Error with date:\n', techschedule_missing)

Error with date:
 date           day     overnight     *scheduleid    morning     evening    
+------------+ +-----+ +-----------+ +------------+ +---------+ +---------+
0000-00-00             Adrian        810            Klaus       Jovanna    
 (Total: 1)



## action.TechSchedule: shadow table - new table

In [177]:
shadow_action.TechSchedule.proj() - new_action.TechSchedule



scheduleid


---
## action.Technotes: source table - shadow table

In [178]:
ratinfo.Technotes.proj(technote_id='technoteid') - shadow_action.Technotes


technote_id


## action.Technotes: shadow table - new table

In [179]:
shadow_action.Technotes.proj() - new_action.Technotes

technote_id


## action.Water: date integrity issues

In [186]:
water_missing_date = ratinfo.Water - shadow_action.Water.proj(watering='water_id')
water_missing_date_df = pd.DataFrame(water_missing_date.fetch(as_dict=True))


water_missing_date_df = write_df_spreadsheet(water_missing_date_df, sh3, 'water_date_integrity',\
                     sheet_title='action.Water with invalid date')


water_missing_date_df

Unnamed: 0,date,rat,tech,starttime,stoptime,watering,volume,percent_bodymass,percent_target
0,0000-00-00,,,,,455474,0.0,0.0,0.0
1,0000-00-00,,,,,455475,0.0,0.0,0.0
2,0000-00-00,,,,,455476,0.0,0.0,0.0
3,0000-00-00,,,,,455477,0.0,0.0,0.0
4,0000-00-00,,,,,455479,0.0,0.0,0.0


In [188]:
water_missing = ratinfo.Water - shadow_action.Water.proj(watering='water_id')
print('Error with date:', set(water_missing.fetch('date')))


Error with date: {'0000-00-00'}


## action.Water: shadow table - new table

In [195]:
shadow_action.Water.proj() - new_action.Water

water_rats_missing = shadow_action.Water - new_action.Water.proj()
water_rats_missing_df = pd.DataFrame(water_rats_missing.fetch(as_dict=True))

ratinfo_rats_df = pd.DataFrame((water_rats_missing & (dj.U('ratname') & ratinfo.Rats)).fetch('ratname', as_dict=True))
ratinfo_rats_df = ratinfo_rats_df.drop_duplicates()
ratinfo_rats_df = ratinfo_rats_df.reset_index(drop=True)
ratinfo_rats_df['on_blratinfo.Rats'] = 1


water_rats_missing_df = water_rats_missing_df.merge(ratinfo_rats_df,  how='left', on=['ratname'])

water_rats_missing_df = write_df_spreadsheet(water_rats_missing_df, sh3, 'water_rats_integrity',\
                     sheet_title='ratnames on water table missing on ratinfo.rats or subject.rats',\
                     order_cols = ['ratname', 'on_blratinfo.Rats', 'administration_date'], sort_cols=['on_blratinfo.Rats', 'administration_date'])

water_rats_missing_df




Unnamed: 0,ratname,on_blratinfo.Rats,administration_date,water_id,administration_starttime,administration_stoptime,administration_person,volume,percent_bodymass,percent_target
1,Z003,1.0,2008-10-08,3516,0 days 19:29:50,0 days 20:01:28,RF,0.000,0.000,0.000
2,Z004,1.0,2008-10-08,3517,0 days 19:29:50,0 days 20:01:28,RF,0.000,0.000,0.000
3,Z003,1.0,2008-10-15,4281,0 days 14:04:21,0 days 14:33:53,RF,0.000,0.000,0.000
4,Z004,1.0,2008-10-15,4282,0 days 14:04:21,0 days 14:33:53,RF,0.000,0.000,0.000
5,Z003,1.0,2008-10-16,4403,0 days 14:55:11,0 days 15:27:00,RF,0.000,0.000,0.000
...,...,...,...,...,...,...,...,...,...,...
4632,F133,,2017-10-07,698086,0 days 00:01:00,0 days 23:59:00,JM,0.000,0.000,0.000
4633,F132,,2017-10-08,698436,0 days 00:01:00,0 days 23:59:00,JM,0.000,0.000,0.000
4634,F133,,2017-10-08,698437,0 days 00:01:00,0 days 23:59:00,JM,0.000,0.000,0.000
4635,F132,,2017-10-09,698785,0 days 00:01:00,0 days 23:59:00,AS,0.000,0.000,0.000


In [191]:
water_missing = shadow_action.Water - new_action.Water.proj()

print('\nNumber of `ratname` missing in bl_new_action.Water: ', len(set(water_missing.fetch('ratname'))))

print('\n`ratname` present in ratinfo.Rats: ', set((water_missing & (dj.U('ratname') & ratinfo.Rats)).fetch('ratname')))

print('\n`ratname` missing in ratinfo.Rats: ', set((water_missing - (dj.U('ratname') & ratinfo.Rats)).fetch('ratname')))

print('\n`ratname` present in bl_new_subject.Rats: ', (water_missing & (dj.U('ratname') & new_subject.Rats)).fetch('ratname'))




Number of `ratname` missing in bl_new_action.Water:  50

`ratname` present in ratinfo.Rats:  {'J089', 'Z003', 'Z089', 'J096', 'Z004', 'J090', 'T075'}

`ratname` missing in ratinfo.Rats:  {'E057', 'H054', '0014', 'B010', 'U016', 'F133', 'M021', 'U030', 'Z130', 'U006', 'U032', '0048', 'J288', 'U018', 'U012', 'F100', 'E068', 'E055', 'F084', 'U005', 'CO64', 'J289', 'BROK', 'S191', 'F132', '0058', 'U029', 'U034', 'H053', 'U015', 'U001', 'U017', 'A171', 'A172', 'U010', '`', 'J148', 'U019', 'M084', 'TANK', 'U021', 'U011', 'U009'}

`ratname` present in bl_new_subject.Rats:  []
