# Synthea Case Study - Preparing for Machine Learning
*Lab 28 April 2022*

In [1]:
%defaultDatasource jdbc:h2:mem:db

In [2]:
DROP TABLE IF EXISTS Patients;

CREATE TABLE Patients(
    id VARCHAR(36),
    birthdate DATE,
    deathdate DATE,
    maiden VARCHAR(20),
    marital CHAR,
    race VARCHAR(10),
    ethnicity VARCHAR(15),
    gender CHAR,
    birthplace VARCHAR(100),
    address VARCHAR(50),
    city VARCHAR(30),
    state VARCHAR(30),
    county VARCHAR(50),
    zip VARCHAR(10),
    lat DECIMAL(18,15),
    lon DECIMAL(18,15),
    PRIMARY KEY(id)
) AS SELECT id,birthdate,deathdate,maiden,marital,race,ethnicity,gender,
            birthplace,address,city,state,county,zip,lat,lon
FROM CSVREAD('../../data/synthea/scenario01/csv/patients.csv');

DROP TABLE IF EXISTS Encounters;

CREATE TABLE Encounters(
    id VARCHAR(36),
    start DATETIME,
    stop DATETIME,
    patient VARCHAR(36),
    organization VARCHAR(36),
    provider VARCHAR(36),
    encounterclass VARCHAR(20),
    code VARCHAR(15),
    description VARCHAR(100),
    reasoncode VARCHAR(15),
    reasondescription VARCHAR(100),
    PRIMARY KEY(id)
) AS SELECT id,start,stop,patient,organization,provider,encounterclass,code,description,
            reasoncode,reasondescription
FROM CSVREAD('../../data/synthea/scenario01/csv/encounters.csv');

DROP TABLE IF EXISTS Conditions;

CREATE TABLE Conditions(
    start DATETIME,
    stop DATETIME,
    patient VARCHAR(36),
    encounter VARCHAR(36),
    code VARCHAR(20),
    description_condition VARCHAR(100),
) AS SELECT start,stop,patient,encounter,code,description
FROM CSVREAD('../../data/synthea/scenario01/csv/conditions.csv');

# Generation of the starting matrix initialized with 0

In [3]:
DROP TABLE IF EXISTS EmergencyML;

CREATE TABLE EmergencyML (
  patient VARCHAR(36),
  last_encounter DATETIME,
  deathdate DATETIME,
  cardiac_arrest SMALLINT DEFAULT 0,
  myocardial_infarction SMALLINT DEFAULT 0,
  PRIMARY KEY(patient)
) AS
SELECT e.patient, MAX(e.start) last_encounter, p.deathdate, 0, 0
       FROM Encounters e, Patients p
       WHERE e.patient = p.id AND encounterclass = 'emergency'
       GROUP BY patient;
       
SELECT * FROM EmergencyML
LIMIT 10;

In [4]:
UPDATE EmergencyML e
SET e.cardiac_arrest = 1
WHERE EXISTS
(SELECT c.patient
 FROM Conditions c
 WHERE c.patient=e.patient AND c.description_condition='Cardiac Arrest');
 
UPDATE EmergencyML e
SET e.myocardial_infarction = 1
WHERE EXISTS
(SELECT c.patient
 FROM Conditions c
 WHERE c.patient=e.patient AND c.description_condition='Myocardial Infarction');

In [5]:
SELECT * FROM EmergencyML;

In [6]:
CALL CSVWRITE('../../data/synthea/scenario01/csv-ml/emergency-ml.csv', 'SELECT * FROM EmergencyML');

800