In [1]:
#Imports
import sqlite3
from sqlite3 import Error

import pandas as pd
from datetime import datetime
import os

## Create Database and Tables

In [134]:
def create_connection(db_file):
    """ create a database connection to the SQLite database
        specified by db_file
    :param db_file: database file
    :return: Connection object or None
    """
    conn = None
    try:
        conn = sqlite3.connect(db_file)
        return conn
    except Error as e:
        print(e)
 
    return conn
 

def create_table(conn, create_table_sql):
    """ create a table from the create_table_sql statement
    :param conn: Connection object
    :param create_table_sql: a CREATE TABLE statement
    :return:
    """
    try:
        c = conn.cursor()
        c.execute(create_table_sql)
    except Error as e:
        print(e)

def main():
    database = r"active_learning_20191210.db"
 
    sql_create_training_log = """ CREATE TABLE IF NOT EXISTS training_log (
                                        training_id integer PRIMARY KEY,
                                        training_time datetime,
                                        file_path string,
                                        from_scratch boolean
                                    ); """
 
    sql_create_image_log = """ CREATE TABLE IF NOT EXISTS image_log (
                                        image_id integer PRIMARY KEY,
                                        file_path string,
                                        time_uploaded datetime
                                    ); """
 
    sql_create_map_log = """ CREATE TABLE IF NOT EXISTS map_log (
                                        map_id integer PRIMARY KEY,
                                        file_path string,
                                        time_created datetime,
                                        is_manual boolean
                                    ); """
 
    sql_create_train_to_image_log = """CREATE TABLE IF NOT EXISTS train_to_image_log (
                                    entry_id integer PRIMARY KEY,
                                    image_id integer,
                                    training_id integer,
                                    FOREIGN KEY (image_id) REFERENCES image_log (image_id),
                                    FOREIGN KEY (training_id) REFERENCES training_log (training_id)
                                );"""
    
    sql_create_image_to_map_log = """CREATE TABLE IF NOT EXISTS image_to_map_log (
                                    entry_id integer PRIMARY KEY,
                                    image_id integer,
                                    map_id integer,
                                    FOREIGN KEY (image_id) REFERENCES image_log (image_id),
                                    FOREIGN KEY (map_id) REFERENCES map_log (map_id)
                                );"""
 
    # create a database connection
    conn = create_connection(database)
 
    # create tables
    if conn is not None:
        # create training_log
        create_table(conn, sql_create_training_log)
 
        # create image_log
        create_table(conn, sql_create_image_log)
        
        # create map_log
        create_table(conn, sql_create_map_log)
        
        # create train_to_image_log
        create_table(conn, sql_create_train_to_image_log)
        
        # create image_to_map_log
        create_table(conn, sql_create_image_to_map_log)
    else:
        print("Error! cannot create the database connection.")
 


In [164]:
#Create database

main()

In [165]:
#Look at all tables
con = sqlite3.connect('active_learning_20191210.db')
cursor = con.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
print(cursor.fetchall())

[('training_log',), ('image_log',), ('map_log',), ('train_to_image_log',), ('image_to_map_log',)]


In [166]:
df = pd.read_csv("../data/Stroke_Subjects_20200123/ATLAS_stroke_labels_20200122.csv")

filenames = df[df['label'] == 1]['filename'].tolist()

print(filenames)

['031768_Site1_t01_98', '031768_Site1_t01_94', '031768_Site1_t01_102', '031769_Site1_t01_74', '031769_Site1_t01_67', '031769_Site1_t01_81', '031770_Site1_t01_105', '031770_Site1_t01_103', '031770_Site1_t01_107', '031771_Site1_t01_88', '031771_Site1_t01_76', '031771_Site1_t01_101', '031772_Site1_t01_88', '031772_Site1_t01_80', '031772_Site1_t01_96', '031773_Site1_t01_78', '031773_Site1_t01_77', '031773_Site1_t01_79', '031774_Site1_t01_82', '031774_Site1_t01_69', '031774_Site1_t01_96', '031775_Site1_t01_67', '031775_Site1_t01_65', '031775_Site1_t01_69', '031776_Site1_t01_76', '031776_Site1_t01_70', '031776_Site1_t01_82', '031777_Site1_t01_101', '031777_Site1_t01_93', '031777_Site1_t01_110', '031778_Site1_t01_72', '031778_Site1_t01_65', '031778_Site1_t01_79', '031779_Site1_t01_100', '031779_Site1_t01_93', '031779_Site1_t01_107', '031780_Site1_t01_90', '031780_Site1_t01_84', '031780_Site1_t01_97', '031781_Site1_t01_88', '031781_Site1_t01_81', '031781_Site1_t01_95', '031782_Site1_t01_96', '

In [167]:
#Add images in train and assets to image log

con = sqlite3.connect("active_learning_20191210.db")
cur = con.cursor()

now = datetime.now()
cur_date = str(now.year) + '_' + str(now.month) + '_' + str(
            now.day) + '_' + str(now.hour) + '_' + str(now.minute)

for filename in filenames:
    
    #file_path is relative to app.py!!!
    image_path = "data/train/image/" + filename + '.npy'
    image_log_entry = (image_path, cur_date)
    cur.execute('INSERT INTO image_log (file_path, time_uploaded) VALUES' + str(image_log_entry))
    image_id = cur.lastrowid
    
    map_path = "data/train/label/" + filename
    #Image and its corresponding lable have the same file_path/filename
    map_log_entry = (map_path, cur_date, False)
    cur.execute('INSERT INTO map_log (file_path, time_created, is_manual) VALUES' + str(map_log_entry))
    map_id = cur.lastrowid
    
    #Update image-to-map log 
    cur.execute('INSERT INTO image_to_map_log (image_id, map_id) VALUES' + str((image_id, map_id)))
                
con.commit()
con.close()

In [3]:
# Check if entries were added correctly
# Read sqlite query results into a pandas DataFrame
con = sqlite3.connect("active_learning_20191210.db")
img_df = pd.read_sql_query("SELECT * from image_log", con)

# Verify that result of SQL query is stored in the dataframe
display(img_df)

index = img_df[img_df['image_id'] == 37].index.values.astype(int)[0]
print(index)
print(img_df.iloc[index]['file_path'])

con.close()

Unnamed: 0,image_id,metric,file_path,time_uploaded
0,1,0.0,data/train/image/031768_Site1_t01_98.npy,2020_3_14_11_24
1,2,0.0,data/train/image/031768_Site1_t01_94.npy,2020_3_14_11_24
2,3,0.0,data/train/image/031768_Site1_t01_102.npy,2020_3_14_11_24
3,4,0.0,data/train/image/031769_Site1_t01_74.npy,2020_3_14_11_24
4,5,0.0,data/train/image/031769_Site1_t01_67.npy,2020_3_14_11_24
...,...,...,...,...
673,674,0.0,data/train/image/031986_Site9_t01_85.npy,2020_3_14_11_24
674,675,0.0,data/train/image/031986_Site9_t01_118.npy,2020_3_14_11_24
675,676,0.0,data/train/image/031987_Site9_t01_89.npy,2020_3_14_11_24
676,677,0.0,data/train/image/031987_Site9_t01_75.npy,2020_3_14_11_24


36
data/train/image/031780_Site1_t01_90.npy


In [150]:
print(len(img_df))

679


In [200]:
con = sqlite3.connect("active_learning_20191210.db")
map_df = pd.read_sql_query("SELECT * from map_log", con)

# Verify that result of SQL query is stored in the dataframe
print(map_df)

con.close()

      map_id                                  file_path     time_created  \
0          1       data/train/label/031768_Site1_t01_98  2020_2_19_12_47   
1          2       data/train/label/031768_Site1_t01_94  2020_2_19_12_47   
2          3      data/train/label/031768_Site1_t01_102  2020_2_19_12_47   
3          4       data/train/label/031769_Site1_t01_74  2020_2_19_12_47   
4          5       data/train/label/031769_Site1_t01_67  2020_2_19_12_47   
...      ...                                        ...              ...   
1591    1592  output/2020_2_20_21_54/674_prediction.npy  2020_2_20_21_54   
1592    1593  output/2020_2_20_21_54/675_prediction.npy  2020_2_20_21_54   
1593    1594  output/2020_2_20_21_54/676_prediction.npy  2020_2_20_21_54   
1594    1595  output/2020_2_20_21_54/677_prediction.npy  2020_2_20_21_54   
1595    1596  output/2020_2_20_21_54/678_prediction.npy  2020_2_20_21_54   

      is_manual  
0             0  
1             0  
2             0  
3             0

In [201]:
con = sqlite3.connect("active_learning_20191210.db")
img2map_df = pd.read_sql_query("SELECT * from image_to_map_log", con)

# Verify that result of SQL query is stored in the dataframe
print(img2map_df)

con.close()

      entry_id  image_id  map_id
0            1         1       1
1            2         2       2
2            3         3       3
3            4         4       4
4            5         5       5
...        ...       ...     ...
1591      1592       674    1592
1592      1593       675    1593
1593      1594       676    1594
1594      1595       677    1595
1595      1596       678    1596

[1596 rows x 3 columns]


In [202]:
map_ids = img2map_df[img2map_df['image_id'] == 674]['map_id']
map_ids = map_ids.tolist()
print(map_ids)

for map_id in map_ids: 
    selected_map_df = map_df[map_df['map_id'] == map_id]
    sorted_map_df = selected_map_df.sort_values('time_created', ascending=False)
    print(sorted_map_df)
    

[674, 1592]
     map_id                             file_path     time_created  is_manual
673     674  data/train/label/031986_Site9_t01_85  2020_2_19_12_47          0
      map_id                                  file_path     time_created  \
1591    1592  output/2020_2_20_21_54/674_prediction.npy  2020_2_20_21_54   

      is_manual  
1591          0  


     entry_id  image_id  map_id
0           1         1       1
1           2         2       2
2           3         3       3
3           4         4       4
4           5         5       5
..        ...       ...     ...
704       705       347     705
705       706       572     706
706       707        87     707
707       708       167     708
708       709       668     709

[709 rows x 3 columns]


In [157]:
# Read sqlite query results into a pandas DataFrame
con = sqlite3.connect("active_learning_20191210.db")
df = pd.read_sql_query("SELECT * from training_log", con)

# Verify that result of SQL query is stored in the dataframe
print(df.head())

con.close()

   training_id                  training_time  \
0            1  Wed, 19 Feb 2020 12:03:24 GMT   
1            2  Wed, 19 Feb 2020 12:19:30 GMT   

                                      file_path  from_scratch  
0   models/unet_stroke_2020_2_19_12_3_init.hdf5             1  
1  models/unet_stroke_2020_2_19_12_19_init.hdf5             1  


In [158]:
con = sqlite3.connect("active_learning_20191210.db")

df = pd.read_sql_query("SELECT * from train_to_image_log", con)

# Verify that result of SQL query is stored in the dataframe
print(df)

con.close()

    entry_id  image_id  training_id
0          1       102            1
1          2        61            1
2          3       543            1
3          4       654            1
4          5       458            1
5          6       183            1
6          7       593            1
7          8       149            1
8          9       137            1
9         10       413            1
10        11       247            2
11        12       260            2
12        13       375            2
13        14       363            2
14        15       140            2
15        16       347            2
16        17       572            2
17        18        87            2
18        19       167            2
19        20       668            2


## Input into tables

In [77]:
#Function to prep the columns for the SQL query
def prep_cols(cols):
    
    #init string
    final_str = ''
    
    #loop through columns and construct string
    for i in cols:
        final_str = final_str + i + ','
        
    #return
    return final_str[:-1]

In [86]:
def create_entry(conn, table_name, task):
    """
    Create a new entry
    :param conn:
    :param task:
    :return:
    """
    
    #Read in table
    df = pd.read_sql_query("SELECT * from " + db_name + " ", conn)

    #get list of columns
    columns = list(df.columns)

    #Construct sql query
    sql = ' INSERT INTO ' + db_name + '(' + prep_cols(columns) + ')'  + ' VALUES(?, ?, ?, ?) '

#     sql = ''' INSERT INTO training_log(training_id, training_time, file_path, from_scratch) VALUES(?, ?, ?, ?) '''

    #execute
    cur = conn.cursor()
    cur.execute(sql, task)
    return cur.lastrowid


def main():
    
    #Database name
    database = "active_learning_20191210.db"
 
    # create a database connection
    conn = create_connection(database)
    with conn:
        
        # Specific table and inputs
        table_name = 'training_log'
        task_1 = ('10002', '12/10/2019', 'example/file/path', True)
 
        # create entry
        create_entry(conn, table_name, task_1)
        
    conn.close()


In [87]:
main()

In [88]:
# Read sqlite query results into a pandas DataFrame
con = sqlite3.connect("active_learning_20191210.db")
df = pd.read_sql_query("SELECT * from training_log", con)

# Verify that result of SQL query is stored in the dataframe
print(df.head())

con.close()

   training_id training_time          file_path  from_scratch
0          100    12/10/2019  example/filt/path             1
1        10000    12/10/2019  example/file/path             1
2        10001    12/10/2019  example/file/path             1
3        10002    12/10/2019  example/file/path             1


## Delete Row

In [41]:
def deleteRecord():
    try:
        con = sqlite3.connect('active_learning_20191210.db')
        cursor = con.cursor()
        print("Connected to SQLite")

        # Deleting single record now
        sql_delete_query = """DELETE from training_log where training_id = 0"""
        cursor.execute(sql_delete_query)
        con.commit()
        print("Record deleted successfully ")
        cursor.close()

    except sqlite3.Error as error:
        print("Failed to delete record from sqlite table", error)
    finally:
        if (con):
            con.close()
            print("the sqlite connection is closed")

deleteRecord()

Connected to SQLite
Record deleted successfully 
the sqlite connection is closed


In [42]:
# Read sqlite query results into a pandas DataFrame
con = sqlite3.connect("active_learning_20191210.db")
df = pd.read_sql_query("SELECT * from training_log", con)

# Verify that result of SQL query is stored in the dataframe
print(df.head())

con.close()

   training_id training_time          file_path  from_scratch
0          100    12/10/2019  example/filt/path             1
1        10000    12/10/2019  example/file/path             1
