# Goals

* Test connecting and updating a GCP Cloud SQL database

In [1]:
import os
import psycopg2
import pandas as pd
from pypika import Query, Table, Field, Column

In [2]:
from dotenv import load_dotenv
load_dotenv()

True

# Connect

In [3]:
# Your GCP PostgreSQL connection parameters
# get home directory
host = os.path.join(os.path.expanduser("~"), "cloudsql", os.environ["GCP_SQL_DB_HOST"])

db_params = {
    'host': host,
    'database': os.environ["GCP_SQL_DB_NAME"],
    'user': os.environ["GCP_SQL_DB_USERNAME"],
    'password': os.environ["GCP_SQL_DB_PASSWORD"],
    'port': '5432',
    'connect_timeout': 10 
}

In [4]:
def get_db_connection():
    try:
        return psycopg2.connect(**db_params)
    except Exception as e:
        print(f"Error connecting to database: {e}")
        return None

conn = get_db_connection()    

In [5]:
# # list all tables
# def list_tables():
#     query = "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public';"
#     with conn.cursor() as cur:
#         cur.execute(query)
#         tables = cur.fetchall()
#         return tables
# list_tables()

In [15]:
# list tables in pypika
def list_tables_pypika():
    tables = Table('tables', schema='information_schema')
    query = Query.from_(tables).select('table_name').where(tables.table_schema == 'public')
    with conn.cursor() as cur:
        cur.execute(str(query))
        tables = cur.fetchall()
        return tables
list_tables_pypika()

[('srx_metadata',), ('srx_srr',), ('screcounter',)]

# Create tables

In [7]:
def execute_query(stmt):
    try:
        with conn.cursor() as cur:
            cur.execute(str(stmt))
            conn.commit() 
    except psycopg2.errors.DuplicateTable as e:
        print(f"Table already exists: {e}")

In [8]:
# SRX_metadata
stmt = Query \
    .create_table("srx_metadata") \
    .columns(
        Column("id", "INT", nullable=False),
        Column("database", "VARCHAR(20)", nullable=False),
        Column("entrez_id", "INT", nullable=False),
        Column("SRX", "VARCHAR(20)", nullable=False),
        Column("is_illumina", "VARCHAR(10)", nullable=False),
        Column("is_single_cell", "VARCHAR(10)", nullable=False),
        Column("is_paired_end", "VARCHAR(10)", nullable=False),
        Column("is_10x", "VARCHAR(10)", nullable=False),
        Column("tech_10x", "VARCHAR(20)", nullable=False),
        Column("organism", "VARCHAR(50)", nullable=False)
    ) \
    .unique("database", "entrez_id") \
    .primary_key("id")

execute_query(stmt)

In [9]:
# SRX_SRR
stmt = Query \
    .create_table("srx_srr") \
    .columns(
        Column("id", "INT", nullable=False),
        Column("srx_id", "VARCHAR(20)", nullable=False),
        Column("srr_id", "VARCHAR(20)", nullable=False)
    ) \
    .unique("srx_id", "srr_id") \
    .primary_key("id")

execute_query(stmt)

In [10]:
# scRecounter log
stmt = Query \
    .create_table("screcounter") \
    .columns(
        Column("id", "INT", nullable=False),
        Column("sample_id", "VARCHAR(20)", nullable=False),
        Column("pipeline_version", "VARCHAR(10)", nullable=False),
        Column("run_id", "VARCHAR(30)", nullable=False),
        Column("task_name", "VARCHAR(20)", nullable=False),
        Column("task_exit_status", "VARCHAR(10)"),
        Column("log", "TEXT", nullable=False)
    ) \
    .primary_key("id")

execute_query(stmt)

# Delete tables

In [None]:
stmt = Query.drop_table("test_table")
print(str(stmt))
execute_query(stmt)

DROP TABLE "test_table"


# OLD

In [22]:
# insert data into a table
def insert_data(table_name, columns, data):
    query = f"INSERT INTO {table_name} ({columns}) VALUES {data};"
    with conn.cursor() as cur:
        cur.execute(query)
        conn.commit()
insert_data("test_table", "name", "('test')")

In [23]:
# print out the data in the table
def get_data(table_name):
    query = f"SELECT * FROM {table_name};"
    with conn.cursor() as cur:
        cur.execute(query)
        data = cur.fetchall()
        return data
get_data("test_table")

[(1, 'test')]