# migrate data between schemas

In [1]:
from loadData import db_manager
import pandas as pd
import sys
sys.path.insert(0,'../py_scripts/')
import warnings
import bayes_model_vrclassroom

In [2]:
# find incremental difference in index_name field between db_base and db_head
def findCaseidDiff(db_base, db_head, index_name, table_name):
    if isinstance(index_name,tuple) and isinstance(table_name,tuple):
        cids_from = db_base.sql_query_fetch_df(sql='SELECT distinctrow(%s) FROM %s'%(index_name[0],table_name[0]))
        cids_to  = db_head.sql_query_fetch_df(sql='SELECT distinctrow(%s) FROM %s'%(index_name[1],table_name[1]))
        caseids_diff = cids_from[cids_from[index_name[0]].isin(cids_to[index_name[1]]) == False]
        caseids_diff = caseids_diff[index_name[0]].tolist()
    else:
        cids_from = db_base.sql_query_fetch_df(sql='SELECT distinctrow(%s) FROM %s'%(index_name,table_name))
        cids_to  = db_head.sql_query_fetch_df(sql='SELECT distinctrow(%s) FROM %s'%(index_name,table_name))
        caseids_diff = cids_from[cids_from[index_name].isin(cids_to[index_name]) == False]
        caseids_diff = caseids_diff[index_name].tolist()
    return caseids_diff

In [3]:
def getWhereClause(index_name = 'CaseId',index_values = []):
    if index_values != []:
        sql = 'where %s in (%s)'%(index_name,','.join([str(x) for x in index_values]))
        return sql
    else:
        warnings.warn('no index values specified')
        return ''
    
# copy table between two db
# caseids can be 'all', 'compare', or a list of int
# set drop_table to true with caseids = 'all' to truncate the table first
# set delete_exist to true to overwrite rows with the same caseid
def dbMigrate(db_from,db_to,table_name,CaseIds = 'compare', index_name = 'CaseId', drop_table = False, delete_exist = False):
    if isinstance(CaseIds,list):
        df = db_from.fetch_table(table_name = table_name, where_clause = getWhereClause(index_name,CaseIds))
        db_to.insert_table(df, table_name, index_name, del_row_if_exist = delete_exist)
        
    elif CaseIds == 'all':
        if drop_table:
            db_to.truncate_table(table_name)
        df = db_from.fetch_table(table_name = table_name)
        db_to.insert_table(df, table_name, index_name, del_row_if_exist =  (not drop_table) and (delete_exist))
        
    elif CaseIds == 'compare':
        caseids_diff = findCaseidDiff(db_from, db_to, index_name, table_name)
        if len(caseids_diff) > 0:
            df = db_from.fetch_table(table_name = table_name, where_clause = getWhereClause(index_name,caseids_diff))
            db_to.insert_table(df, table_name, index_name, del_row_if_exist = delete_exist)
        

In [2]:
db_vrclassrooom = db_manager('vrclassroom')
db_rnd = db_manager('rnd_test')
cids_rnd = db_rnd.fetch_table(table_name = '`case`',field_names=['subjectid'],return_array=True)[:,0]
cids_vr  = db_vrclassrooom.fetch_table(table_name = '`case`',field_names=['subjectid'],return_array=True)[:,0]

In [4]:
caseids_diff = findCaseidDiff(db_rnd, db_rnd, index_name=('CasdId','caseid'), table_name=('cpt_output_results','head_features'))

In [4]:
import rose_witherrorhandling
import percentile_witherrorhandling_2
import signal_detection_vrclassroom

# rose_witherrorhandling.main (caseids_diff,'rnd_test','rnd_test')
caseids_diff = [69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86]
# caseids_diff = [21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,40,41,42,43,44,45,46,47,49,50,51,52,61,62,63,64,65,66,67,68]
# percentile_witherrorhandling_2.main (caseids_diff,'rnd_test','rnd_test')
# signal_detection_vrclassroom.main  (caseids_diff,'rnd_test','rnd_test')
bayes_model_vrclassroom.main(caseids_diff,'rnd_test','rnd_test')

training accuracy:  84.2105263158 %
0 caseids got deleted
18 caseids got inserted or updated


In [4]:
# copy raw data tables 
# table_names = ['cpt_output_results','distractoroutput','hmd_data','realtime_data','sensordataoutput','trial_data']
db_base = db_manager('rnd_test')
db_tar = db_manager('webtest')

# table_names = ['patient','`case`']
# ind_names = ['id','SubjectId']
# for name,ind_name in zip(table_names,ind_names):
#     print('table %s sync start'%name)
#     dbMigrate(db_base,db_tar,name,CaseIds = 'compare', index_name=ind_name)
#     print('table %s sync done'%name)

table_names = ['head_features','signal_detection','head_rot','bayes_probabilities']
for name in table_names:
    print('table %s sync start'%name)
    dbMigrate(db_base,db_tar,name,CaseIds = 'compare', index_name='CaseId')
    print('table %s sync done'%name)

table head_features sync start
table head_features sync done
table signal_detection sync start
table signal_detection sync done
table head_rot sync start
table head_rot sync done
table bayes_probabilities sync start
table bayes_probabilities sync done


In [4]:
def dbBackupToFile(db,tableName,fileName):
    df = db.fetch_table(table_name = tableName)
    df.to_csv(fileName)

# dbBackupToFile(db_rnd,'`case`','../backup/case_rnd_0814.csv')
def dbRestoreFromFile(db,tableName,fileName):
    df = pd.read_csv(fileName)
    db.insert_table(df=df,table_name=tableName)

In [7]:
db = db_manager('webtest')
dbBackupToFile(db,'bayes_probabilities','../backup/bayes_probabilities_LOO_A064_0816.csv')

In [19]:
# test if cross-db query is correct
# newdiag = db_vrclassrooom.sql_query_fetch_df(sql="select b.ADHDDiagnose,b.id  from `case` a, patient b where b.Id=a.SubjectId")
# olddiag = db_rnd.fetch_table(table_name='patient',field_names=['ADHDDiagnose'])

# olddiag[olddiag.index.isin(cids_rnd)].ADHDDiagnose.tolist() 
# newdiag[newdiag.id.isin(cids_rnd)].ADHDDiagnose == olddiag[olddiag.index.isin(cids_rnd)].ADHDDiagnose.tolist() 

0     True
1     True
2     True
3     True
4     True
5     True
6     True
7     True
8     True
9     True
10    True
11    True
12    True
13    True
14    True
15    True
16    True
17    True
18    True
19    True
20    True
21    True
22    True
23    True
24    True
25    True
26    True
27    True
28    True
29    True
30    True
31    True
32    True
33    True
34    True
35    True
36    True
37    True
38    True
39    True
40    True
41    True
42    True
43    True
44    True
45    True
46    True
47    True
48    True
49    True
50    True
51    True
52    True
53    True
54    True
55    True
56    True
57    True
58    True
59    True
Name: ADHDDiagnose, dtype: bool