## Database initialization

Connect to Amazon RDS database and create tables

In [None]:
#!pip install pandas

In [None]:
#!pip install ipython-sql # SQL magic function

In [None]:
#!conda install psycopg2

In [None]:
#!pip install pgspecial

In [None]:
import pandas as pd
import getpass

In [None]:
%load_ext sql

In [None]:
endpoint = "capstone.clihskgj8i7s.us-west-2.rds.amazonaws.com"
user="group3"
db="db1"
pw=getpass.getpass("Enter database password")

In [None]:
%sql postgres://$user:$pw@$endpoint/$db

In [None]:
#%sql drop table CovidCases;
#%sql drop table CovidLocs;

In [None]:
%%sql
create table CovidLocs (
	uid integer primary key not null, 
    iso2 char(4) not null, 
    iso3 char(3) not null, 
    code3 char(4) not null,
    FIPS float, 
    Admin2 Text, 
    Province_State TEXT not null, 
    Country_Region TEXT not null,
    -- couldn't figure out how to use point datatype with pandas
    -- latlon point not null,
    latitude float,
    longitude float,
    combined_key varchar,
    population float
);

In [None]:
#%sql drop table covidcases;

In [None]:
%%sql 
create table CovidCases(
    cid serial primary key,
    cloc integer references CovidLocs(uid),
    rdate date not null, 
    ncas integer not null,
        -- either confirmed case ('C') or death ('D')
    ctype char(1) not null constraint allowed_ctypes check (ctype in ('C', 'D')),
        -- only one entry per location and date
    unique (cloc,rdate,ctype)
);

Create some indices on the two tables associated with covid locations.

In [None]:
%sql create index cloc_index on CovidCases(cloc);

In [None]:
%sql create index fips_index on CovidLocs(fips);

In [None]:
%%sql 
create table hospitals (
    oshpd_id integer primary key,
    zipcode integer not null,
    name varchar not null,
    county_name varchar not null,
    latitude float not null,
    longitude float not null
);

In [None]:
%sql drop table pemslocs;

In [None]:
%%sql 
create table pemslocs (
    sid integer primary key,
    fwy smallint not null,
    direc char(1) constraint allowed_dirs check (direc in ('N', 'S', 'E', 'W')),
    district smallint not null,
    county smallint not null,
    city integer,
    state_pm varchar not null,
    abs_pm float not null,
    latitude float,
    longitude float, 
    length float,
    stype varchar(2) constraint allowed_types check (stype in ('ML', 'CH', 'OR', 'FR', 'HV', 'FF', 'CD')),
    lanes smallint not null,
    name varchar not null
    -- also dropping USER_ID_[1-4] for now
);

In [None]:
%%sql
drop table if exists traffic;
drop index if exists traffic_station_index;
drop index if exists traffic_time_index;

In [None]:
%%sql
create table traffic (
    timestamp timestamp not null,
    station integer references pemslocs(sid),
    samples smallint not null,
    pct_observed smallint not null,
    total_flow integer,
    avg_occupancy float,
    avg_speed float,
    primary key (timestamp, station)
);

In [None]:
%sql create index traffic_station_index on traffic(station);

In [None]:
%sql create index traffic_time_index on traffic(timestamp);

In [None]:
import sqlalchemy as sal

In [None]:
engine = sal.create_engine('postgresql://%s:%s@%s/%s' % (user, pw, endpoint, db))

In [None]:
engine

In [None]:
pd.read_sql('select * from covidlocs', engine)

In [None]:
%sql \dt

In [None]:
%sql drop table CovidLocs;

In [None]:
%sql DROP SCHEMA public CASCADE;

In [None]:
%sql create schema public;

In [None]:
%sql select * from covidlocs order by uid desc limit 5