# Data import

To connect and get a terminal to the postgres docker, run `docker exec -it pbdw2018_hackathon-master_data_postgres_1 /bin/bash`

You can then do things like

dropdb mydata -U postgres

or

psql postgres -U postgres


In [33]:
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
import csv

# Connect to an existing database
try:
    conn = psycopg2.connect("host=postgresdb user=postgres password=postgres")
except:
    print ("Error:  unable to connect to the database")
conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)

# Open a cursor to perform database operations
cur = conn.cursor()

# Execute a command to end all connections to the db
try:
    cur.execute("SELECT pg_terminate_backend(pg_stat_activity.pid) FROM pg_stat_activity WHERE pg_stat_activity.datname = 'mydata' AND pid <> pg_backend_pid();")
except:
    print("Error killing database connections, perhaps it does not exist?")
    
# Execute a command to drop the table
try:
    cur.execute("DROP DATABASE mydata")
except:
    print("Error while dropping database, perhaps it does not exist?")

# Execute a command to create a new table
try:
    cur.execute("CREATE DATABASE mydata;")
except:
    print("Error while creating database, does it already exist?")
# Close database connection
conn.close()

In [34]:
# Connect to an existing database
conn = psycopg2.connect("host=postgresdb user=postgres dbname=mydata password=postgres")
conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
# Open a cursor to perform database operations
cur = conn.cursor()
# Execute a command: this creates a new table
try:
    cur.execute("""CREATE TABLE PBDWHackathon2018 (
    id text PRIMARY KEY,
    treatingCentre text,
    contactPersonForParticleCentre text,
    referringCentre text,
    registrationDate text,
    sex text,
    age text,
    birthYear text,
    educationLevel text,
    relationshipStatus text,
    smokingStatus text,
    packYears text,
    timeStoppedSmoking text,
    alcoholUseHistory text,
    bodyWeight text,
    bodyHeight text,
    charlsonComorbidityIndex text,
    weightLoss3MonthsBeforeRT text
    );
    """)
except:
    print("Error while creating table, does it already exist?")
# Close database connection
conn.close()

In [35]:
import pandas
# Connect to an existing database
conn = psycopg2.connect("host=postgresdb user=postgres dbname=mydata password=postgres")
cur = conn.cursor()

#open CSV file
df = pandas.read_csv('dummyCharacteristics.csv', delimiter=';')
for index, row in df.iterrows():
    #print(str(index) + " | " + row.Idfu)
    #cur.execute
    cur.execute("""INSERT INTO PBDWHackathon2018 (id, treatingCentre, contactPersonForParticleCentre, referringCentre, registrationDate, sex, age, birthYear, educationLevel, relationshipStatus, smokingStatus, packYears, timeStoppedSmoking, alcoholUseHistory, bodyWeight, bodyHeight, charlsonComorbidityIndex, weightLoss3MonthsBeforeRT) 
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""",
          (row.id, row.treatingCentre, row.contactPersonForParticleCentre, row.referringCentre, row.registrationDate, row.sex, row.age, row.birthYear, row.educationLevel, row.relationshipStatus, row.smokingStatus, row.packYears, row.timeStoppedSmoking, row.alcoholUseHistory, row.bodyWeight, row.bodyHeight, row.charlsonComorbidityIndex, row.weightLoss3MonthsBeforeRT))
conn.commit()

In [36]:
df

Unnamed: 0,id,treatingCentre,contactPersonForParticleCentre,referringCentre,registrationDate,sex,age,birthYear,educationLevel,relationshipStatus,smokingStatus,packYears,timeStoppedSmoking,alcoholUseHistory,bodyWeight,bodyHeight,charlsonComorbidityIndex,weightLoss3MonthsBeforeRT
0,1,GPTC,Dhr Jansen,Medisch Centrum Alkmaar,1-1-2019,m,61,1958,no education,divorced,smokes tobacco daily,40,999,current drinker of alcohol,85,177,12,5
1,2,Holland PTC,Dhr Jansen,Vumc,2-1-2019,f,62,1959,primary education,married,occasional cigarette smoker,25,999,current drinker of alcohol,76,167,22,3
2,3,ZonPTC,Dhr Jansen,AMC,3-1-2019,m,63,1960,preparatory covational education,never married,passive smoker,15,999,current drinker of alcohol,67,187,18,0
3,4,APTC,Dhr Jansen,Reinier de Graaf Groep,4-1-2019,f,64,1961,general secondary vocational education,widowed,ex smoker,23,120,current non drinker of alcohol,98,185,25,3
4,5,not applicable,Dhr Jansen,RCWest,5-1-2019,m,65,1962,higher general and preparatory scientific educ...,domestic partner,never smoked tobacco,0,999,ex drinker,88,156,14,6
5,6,GPTC,Dhr Jansen,Catharina Ziekenhuis,6-1-2019,f,66,1963,scientific education,married,smokes tobacco daily,55,999,lifetime non drinker,63,165,24,4
6,7,ZonPTC,Dhr Jansen,LUMC,7-1-2019,m,67,1964,other,never married,ex smoker,35,240,current drinker of alcohol,74,164,19,2
7,8,APTC,Dhr Jansen,Maastro,8-1-2019,f,68,1965,higher professional education,domestic partner,ex smoker,32,360,current drinker of alcohol,78,176,30,1
8,9,ZonPTC,Dhr Jansen,ErasmusMC,9-1-2019,m,69,1966,general secondary vocational education,divorced,never smoked tobacco,0,999,other,62,167,5,7
9,10,GPTC,Dhr Jansen,RadboudUMC,10-1-2019,f,70,1967,higher professional education,never married,never smoked tobacco,0,999,ex drinker,97,169,9,3


In [37]:
# Connect to an existing database
conn = psycopg2.connect("host=postgresdb user=postgres dbname=mydata password=postgres")
cur = conn.cursor()

cur.execute("""SELECT *
    FROM PBDWHackathon2018;""")

results = cur.fetchall()
conn.close()

for row in results:
    print(row)

('1', 'GPTC', 'Dhr Jansen', 'Medisch Centrum Alkmaar', '1-1-2019', 'm', '61', '1958', 'no education', 'divorced', 'smokes tobacco daily', '40', '999', 'current drinker of alcohol', '85', '177', '12', '5')
('2', 'Holland PTC', 'Dhr Jansen', 'Vumc', '2-1-2019', 'f', '62', '1959', 'primary education', 'married', 'occasional cigarette smoker', '25', '999', 'current drinker of alcohol', '76', '167', '22', '3')
('3', 'ZonPTC', 'Dhr Jansen', 'AMC', '3-1-2019', 'm', '63', '1960', 'preparatory covational education', 'never married', 'passive smoker', '15', '999', 'current drinker of alcohol', '67', '187', '18', '0')
('4', 'APTC ', 'Dhr Jansen', 'Reinier de Graaf Groep', '4-1-2019', 'f', '64', '1961', 'general secondary vocational education', 'widowed', 'ex smoker', '23', '120', 'current non drinker of alcohol', '98', '185', '25', '3')
('5', 'not applicable', 'Dhr Jansen', 'RCWest', '5-1-2019', 'm', '65', '1962', 'higher general and preparatory scientific education', 'domestic partner', 'never s