# Data insertion notebook
First we load the libraries

In [2]:
# Import necessary libraries
import pandas as pd
from sqlalchemy import create_engine
import psycopg2
import os
import io

Function to connect to PostgreSQL:

In [4]:
def create_postgres_engine(user, password, host, port, db_name):
    """Create a SQLAlchemy engine for PostgreSQL."""
    connection_string = f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{db_name}"
    engine = create_engine(connection_string)
    return engine

Database details:

In [6]:
conn = psycopg2.connect(
    dbname="glycemic",
    user="postgres",
    password="mysecretpassword",
    host="localhost",
    port="15432"
)

cur = conn.cursor()

Load data into database, firstly the demographics file:

In [8]:
csv_file = 'Demographics.csv'  # Make sure this file is in the same folder
table_name = 'demographics'
csv_file_path = os.path.join('', csv_file)
try:
    with open(csv_file_path, 'r') as f:
        sql_command = f"COPY {table_name} FROM STDIN WITH CSV HEADER"
        cur.copy_expert(sql_command, f)
    conn.commit()
    print("Data loaded successfully!\n")
except Exception as e:
    print(f"Error: {e}")
    conn.rollback()

Data loaded successfully!



Now the various data folders that are declared in the first lines. This will take a while, since the data needs to be inserted in the database and the tables need to be updated to include the foreign key of participant_id (which is the name of the folder) as a new column

This script loads all the specified files into the database:

In [10]:
# Input all data folders that you wish to insert into the database
csv_folders = ['001'] # e.g.['001','002','003']
table_names = {
    'ACC':'accelerometer_data',
    'BVP':'blood_volume_pulse',
    'Dexcom':'interstitial_glucose',
    'EDA':'electrodermal_activity',
    'Food_Log':'food_log',
    'HR':'heart_rate_data',
    'IBI':'ibi_data',
    'TEMP':'temperature_data'
}

# Files we are interested
interested_files = ['TEMP']#['ACC', 'BVP', 'EDA', 'TEMP']
for current_csv_folder in csv_folders:
    for csv_file in os.listdir(current_csv_folder):
        if csv_file.endswith('.csv') and any(file in csv_file for file in interested_files):
            table_name = table_names[csv_file[:-8]]            
            # Full path to the CSV file
            csv_file_path = os.path.join(current_csv_folder, csv_file)
            print(f"Loading table {table_name} of {current_csv_folder}:")
            try:
                with open(csv_file_path, 'r') as f:                        
                        sql_command_copy = f"COPY {table_name} FROM STDIN WITH CSV HEADER NULL 'NULL'"
                        cur.copy_expert(sql_command_copy, f)
                conn.commit()
                print("Data loaded successfully!\n")
            except Exception as e:
                print(f"Error: {e}")
                conn.rollback()

Loading table temperature_data of 001:
Data loaded successfully!



Enabling compression of the loaded data:

In [16]:
for item in interested_files:
    current_table = table_names[item]
    try:                        
        sql_command_compress = f"SELECT add_compression_policy('{current_table}', INTERVAL '2 weeks')"
        cur.execute(sql_command_compress)
        conn.commit()
        print(f"Compression was successful for table {current_table}!\n")
    except Exception as e:
        print(f"Error: {e}")
        conn.rollback()

Compression was successful for table temperature_data!



Close connection:

In [None]:
cur.close()
conn.close()