# Data import

To start the database used in this notebook, please run `docker run -d --name pgData -p 5432:5432 postgres:latest`

To stop the database (and remove it), run `docker stop pgData && docker rm pgData`

In [1]:
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
import csv

# Connect to an existing database
conn = psycopg2.connect("host=postgresdb user=postgres password=postgres")
conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
# Open a cursor to perform database operations
cur = conn.cursor()
# Execute a command: this creates a new table
try:
    cur.execute("CREATE DATABASE mydata;")
except:
    print("Error while creating database, does it already exist?")
# Close database connection
conn.close()

In [2]:
# Connect to an existing database
conn = psycopg2.connect("host=postgresdb user=postgres dbname=mydata password=postgres")
conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
# Open a cursor to perform database operations
cur = conn.cursor()
# Execute a command: this creates a new table
try:
    cur.execute("""CREATE TABLE lung1 (
    ID text PRIMARY KEY,
    PatientID text,
    Age text,
    Clinical_T_Stage text,
    Clinical_N_Stage text,
    Clinical_M_Stage text,
    Overall_Ajcc_Stage text,
    Histology text,
    Sex text,
    Survival_Time_Days text,
    deadstatus_event text);
    """)
except:
    print("Error while creating table lung1, does it already exist?")
# Close database connection
conn.close()

In [3]:
# Connect to an existing database
conn = psycopg2.connect("host=postgresdb user=postgres dbname=mydata password=postgres")
cur = conn.cursor()

#open CSV file
with open('Lung1_Clinical_Clean.csv', 'r') as f:
    reader = csv.reader(f)
    next(reader)
    for row in reader:
        cur.execute("""INSERT INTO lung1 (ID, PatientID, Age, Clinical_T_Stage, Clinical_N_Stage, Clinical_M_Stage, Overall_Ajcc_Stage, Histology, Sex, Survival_Time_Days, deadstatus_event) 
            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""", row)
    conn.commit()

In [4]:
# Connect to an existing database
conn = psycopg2.connect("host=postgresdb user=postgres dbname=mydata password=postgres")
cur = conn.cursor()

cur.execute("""SELECT ID, PatientID, Age, Clinical_T_Stage, Clinical_N_Stage, Clinical_M_Stage, Overall_Ajcc_Stage, Histology, Sex, Survival_Time_Days, deadstatus_event
    FROM lung1;""")

results = cur.fetchall()
conn.close()

for row in results:
    print(row)

('1', 'LUNG1-001', '79', '2', '3', '0', 'IIIb', 'large_cell', 'male', '2165', '1')
('2', 'LUNG1-002', '84', '2', '0', '0', 'I', 'scc', 'male', '155', '1')
('3', 'LUNG1-003', '68', '2', '3', '0', 'IIIb', 'large_cell', 'male', '256', '1')
('4', 'LUNG1-004', '71', '2', '1', '0', 'II', 'scc', 'male', '141', '1')
('5', 'LUNG1-005', '80', '4', '2', '0', 'IIIb', 'scc', 'male', '353', '1')
('6', 'LUNG1-006', '74', '3', '1', '0', 'IIIa', 'scc', 'male', '173', '1')
('7', 'LUNG1-007', '82', '2', '2', '0', 'IIIa', 'scc', 'male', '137', '1')
('8', 'LUNG1-008', '72', '2', '2', '0', 'IIIa', 'adeno', 'male', '77', '1')
('9', 'LUNG1-009', '56', '2', '2', '0', 'IIIa', 'scc', 'male', '131', '1')
('10', 'LUNG1-010', '71', '4', '3', '0', 'IIIb', 'scc', 'female', '2119', '0')
('11', 'LUNG1-011', '64', '4', '0', '0', 'IIIb', 'scc', 'male', '515', '1')
('12', 'LUNG1-012', '71', '3', '2', '0', 'IIIa', 'scc', 'male', '85', '1')
('13', 'LUNG1-013', '65', '2', '0', '0', 'I', 'nos', 'male', '646', '0')
('14', 'LUN