# Data insertion notebook
First we load the libraries

In [6]:
# Import necessary libraries
import pandas as pd
from sqlalchemy import create_engine
import psycopg2
import os
import io

Function to connect to PostgreSQL:

In [245]:
def create_postgres_engine(user, password, host, port, db_name):
    """Create a SQLAlchemy engine for PostgreSQL."""
    connection_string = f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{db_name}"
    engine = create_engine(connection_string)
    return engine

Database details:

In [248]:
conn = psycopg2.connect(
    dbname="ex_db",
    user="postgres",
    password="password",
    host="localhost",
    port="15432"
)

cur = conn.cursor()

Load data into database, firstly the demographics file:

In [251]:
csv_file = '/Users/nishantsushmakar/Documents/projects_ulb/timeseriesDB-sensor-data-application/Demographics.csv'  # Make sure this file is in the same folder
table_name = 'demographics'
csv_file_path = os.path.join('', csv_file)
try:
    with open(csv_file_path, 'r') as f:
        sql_command = f"COPY {table_name} FROM STDIN WITH CSV HEADER"
        cur.copy_expert(sql_command, f)
    conn.commit()
    print("Data loaded successfully!\n")
except Exception as e:
    print(f"Error: {e}")
    conn.rollback()

Data loaded successfully!



Now the various data folders that are declared in the first lines. This will take a while, since the data needs to be inserted in the database and the tables need to be updated to include the foreign key of participant_id (which is the name of the folder) as a new column

This script loads all the specified files into the database:

In [253]:
# Input all data folders that you wish to insert into the database
csv_folders = ['/Users/nishantsushmakar/Documents/projects_ulb/timeseriesDB-sensor-data-application/001','/Users/nishantsushmakar/Documents/projects_ulb/timeseriesDB-sensor-data-application/002'] 
table_names = {
    'ACC':'accelerometer_data',
    'BVP':'blood_volume_pulse',
    'Dexcom_new':'interstitial_glucose',
    'EDA':'electrodermal_activity',
    # 'Food_Log':'food_log',
    'HR':'heart_rate_data',
    'IBI':'ibi_data',
    'TEMP':'temperature_data'
}

# Files we are interested
interested_files = ['ACC','BVP','Dexcom_new', 'EDA','HR','IBI','TEMP']


for current_csv_folder in csv_folders:
    
    
    for csv_file in os.listdir(current_csv_folder):
       
        
    
        if csv_file.endswith('.csv') and any(file in csv_file for file in interested_files):

            table_name = table_names[csv_file[:-8]]
            
            print(table_name)            
            # Full path to the CSV file
            csv_file_path = os.path.join(current_csv_folder, csv_file)

            csv_folder = current_csv_folder[-3:]
            
            print(f"Loading table {table_name} of {current_csv_folder}:")
            try:
                with open(csv_file_path, 'r') as f:                        
                        if int(current_csv_folder) == 1:
                            sql_command_alter = f"ALTER TABLE {table_name} ALTER COLUMN participant_id SET DEFAULT NULL"
                            cur.execute(sql_command_alter)
                        sql_command_copy = f"COPY {table_name} ({columns_names[table_name]}) FROM STDIN WITH CSV HEADER NULL 'NULL'"
                        cur.copy_expert(sql_command_copy, f)
                        sql_command_update = f"UPDATE {table_name} SET participant_id = {int(current_csv_folder)} WHERE participant_id IS NULL"
                        cur.execute(sql_command_update)
                conn.commit()
                print("Data loaded successfully!\n")
            except Exception as e:
                print(f"Error: {e}")
                conn.rollback()

accelerometer_data
Loading table accelerometer_data of 001:
Data loaded successfully!

blood_volume_pulse
Loading table blood_volume_pulse of 001:
Data loaded successfully!

electrodermal_activity
Loading table electrodermal_activity of 001:
Data loaded successfully!

temperature_data
Loading table temperature_data of 001:
Data loaded successfully!

accelerometer_data
Loading table accelerometer_data of 002:
Data loaded successfully!

blood_volume_pulse
Loading table blood_volume_pulse of 002:
Data loaded successfully!

electrodermal_activity
Loading table electrodermal_activity of 002:
Data loaded successfully!

temperature_data
Loading table temperature_data of 002:
Data loaded successfully!



Close connection:

In [6]:
cur.close()
conn.close()

In [90]:
df = pd.read_csv('/Users/nishantsushmakar/Documents/projects_ulb/timeseriesDB-sensor-data-application/002/Dexcom_002.csv')

In [91]:
df.head()

Unnamed: 0,Index,Timestamp (YYYY-MM-DDThh:mm:ss),Event Type,Event Subtype,Patient Info,Device Info,Source Device ID,Glucose Value (mg/dL),Insulin Value (u),Carb Value (grams),Duration (hh:mm:ss),Glucose Rate of Change (mg/dL/min),Transmitter Time (Long Integer)
0,1,,FirstName,,2019,,,,,,,,
1,2,,LastName,,002,,,,,,,,
2,3,,PatientIdentifier,,2019-002,,,,,,,,
3,4,,DateOfBirth,,1970-01-01,,,,,,,,
4,5,,Device,,,Dexcom G6 Mobile App,Android G6,,,,,,


In [92]:
df.isnull().sum()

Index                                    0
Timestamp (YYYY-MM-DDThh:mm:ss)         12
Event Type                               0
Event Subtype                         2124
Patient Info                          2127
Device Info                           2130
Source Device ID                         4
Glucose Value (mg/dL)                    8
Insulin Value (u)                     2131
Carb Value (grams)                    2131
Duration (hh:mm:ss)                   2130
Glucose Rate of Change (mg/dL/min)    2129
Transmitter Time (Long Integer)         12
dtype: int64

In [93]:
df = df.loc[12:,:]

In [94]:
df = df.reset_index(drop=True)

In [99]:
df.to_csv('/Users/nishantsushmakar/Documents/projects_ulb/timeseriesDB-sensor-data-application/002/Dexcom_new_002.csv',index=False)

In [54]:
df.isnull().sum()

Index                                    0
Timestamp (YYYY-MM-DDThh:mm:ss)          0
Event Type                               0
Event Subtype                         2561
Patient Info                          2561
Device Info                           2561
Source Device ID                         0
Glucose Value (mg/dL)                    0
Insulin Value (u)                     2561
Carb Value (grams)                    2561
Duration (hh:mm:ss)                   2561
Glucose Rate of Change (mg/dL/min)    2561
Transmitter Time (Long Integer)          0
dtype: int64

In [58]:
df = df.fillna(np.nan)

In [56]:
import numpy as np

In [68]:
df.isnull().sum()

Index                                    0
Timestamp (YYYY-MM-DDThh:mm:ss)          0
Event Type                               0
Event Subtype                         2561
Patient Info                          2561
Device Info                           2561
Source Device ID                         0
Glucose Value (mg/dL)                    0
Insulin Value (u)                        0
Carb Value (grams)                    2561
Duration (hh:mm:ss)                   2561
Glucose Rate of Change (mg/dL/min)    2561
Transmitter Time (Long Integer)          0
dtype: int64

In [95]:
df['Insulin Value (u)'] = df['Insulin Value (u)'].fillna(-9999)

In [96]:
df['Carb Value (grams)'] = df['Carb Value (grams)'].fillna(-9999)

In [97]:
df['Glucose Rate of Change (mg/dL/min)'] = df['Glucose Rate of Change (mg/dL/min)'].fillna(-9999)


In [98]:
df['Duration (hh:mm:ss)'] = df['Duration (hh:mm:ss)'].fillna("00:00:00")

In [73]:
df = pd.read_csv('/Users/nishantsushmakar/Documents/projects_ulb/timeseriesDB-sensor-data-application/001/Dexcom_new_001.csv')

In [85]:
df.isnull().sum()

Index                                    0
Timestamp (YYYY-MM-DDThh:mm:ss)          0
Event Type                               0
Event Subtype                         2561
Patient Info                          2561
Device Info                           2561
Source Device ID                         0
Glucose Value (mg/dL)                    0
Insulin Value (u)                        0
Carb Value (grams)                       0
Duration (hh:mm:ss)                      0
Glucose Rate of Change (mg/dL/min)       0
Transmitter Time (Long Integer)          0
dtype: int64

In [83]:
df.dtypes

Index                                   int64
Timestamp (YYYY-MM-DDThh:mm:ss)        object
Event Type                             object
Event Subtype                         float64
Patient Info                          float64
Device Info                           float64
Source Device ID                       object
Glucose Value (mg/dL)                 float64
Insulin Value (u)                     float64
Carb Value (grams)                    float64
Duration (hh:mm:ss)                   float64
Glucose Rate of Change (mg/dL/min)    float64
Transmitter Time (Long Integer)       float64
dtype: object

In [None]:
df.loc