In [18]:
from sqlalchemy import create_engine
from config import username, password
import pandas as pd

engine = create_engine('postgresql://{}:{}@localhost:5432/HMIS_db'.format(username,password))

con = engine.connect()

with engine.connect() as c:
    sql = '''
    DROP TABLE IF EXISTS Clients CASCADE;

CREATE TABLE Clients(
"Race" varchar,
"Ethnicity" varchar,
"Gender" varchar,
"Vet_Status" varchar,
"Vet_Discharge_Status" varchar,
"Created_Date" date,
"Updated_Date" date,
"Birth_Date" date,
"Client_Id" bigint PRIMARY KEY
);

DROP TABLE IF EXISTS Assessment;

CREATE TABLE Assessment (
"Client_Id" bigint,
"Assessment_Id" bigint PRIMARY KEY,
"Assessment_Type" varchar,
"Assessment_Score" int,
"Assessment_Date" date,
FOREIGN KEY ("Client_Id") REFERENCES Clients("Client_Id")
);


DROP TABLE IF EXISTS Programs;

CREATE TABLE Programs (
"ignore" int,
"Program_Id" int PRIMARY KEY,
"Agency_Id" int,
"Program_Name" varchar, 
"Program_Start" date,
"Program_End" date,
"Continuum" int,
"Project_Type" varchar,
"ignore_API" varchar,
"ignore_ARP" int,
"ignore_TM" varchar,
"Target_Pop" varchar,
"ignore_VSP" int,
"Housing_Type" varchar,
"Added_Date" date,
"Updated_Date" date
);

DROP TABLE IF EXISTS Enrollment CASCADE;

CREATE TABLE Enrollment (
"ignore" int,
"Client_Id" bigint,
"Enrollment_Id" bigint PRIMARY KEY,
"Household_Id" bigint,
"Program_Id" int,
"Added_Date" date,
"Housing_Status" varchar,
"LOS_Prior" varchar,
"blank Entry Screen Times Homeless in the Past Three Years" varchar,
"blank Entry Screen Total Months Homeless in Past Three Years" varchar,
"ignore Entry Screen Client Became Enrolled in PATH (Yes / No)" varchar,
"ignore blank Entry Screen Reason not Enrolled" varchar,
"ignore blank Entry Screen City" varchar,
"ignore blank Entry Screen State" varchar,
"Zip" int,
"Chronic_Homeless" varchar,
"Prior_Residence" varchar,
"Last_Grade_Completed" varchar,
-- FOREIGN KEY ("Program_Id") REFERENCES Programs("Program_Id"), -program table missing programs
FOREIGN KEY ("Client_Id") REFERENCES Clients("Client_Id")
);

DROP TABLE IF EXISTS Exit_Screen CASCADE; 

CREATE TABLE Exit_Screen (
"Client_Id" bigint,
"Enrollment_Id" bigint,
"Exit_Destination" varchar,
"Exit_Reason" varchar, 
"Exit_Date" date,
FOREIGN KEY ("Client_Id") REFERENCES Clients("Client_Id"),
FOREIGN KEY ("Enrollment_Id") REFERENCES Enrollment("Enrollment_Id")
);
    '''
    c.execute(sql)
    
    


In [19]:
assessment = pd.read_csv("Sacramento_County_-_Assessment_Table_2019-09-05T0401_pTq3TT.csv")
client = pd.read_csv("Sacramento_County_-_Client_Table_2019-09-05T0101_Kky8n7.csv")
exit = pd.read_csv("Sacramento_County_-_edit_Exit_Table_2019-09-01T0601_FDwNWs.csv")
enrollment = pd.read_csv("Sacramento_County_-_Enrollment_Table_2019-09-05T0131_KptDcM.csv")
project = pd.read_csv("Sacramento_County_-_Project_Table_2019-09-05T0200_DdZb5N.csv")

In [20]:
for i in assessment.columns:
    assessment.rename(columns = {
        i:str(i).replace(' ', '_')
    }, inplace=True)
    
    
assessment.rename(columns={
    'Personal_ID': 'Client_Id'
}, inplace=True)

In [21]:
assessment.drop(columns=['ignore'], inplace=True)
assessment.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8944 entries, 0 to 8943
Data columns (total 5 columns):
Client_Id           8944 non-null int64
Assessment_ID       8944 non-null int64
Assessment_Type     8944 non-null object
Assessment_Score    8944 non-null int64
Assessment_Date     8944 non-null object
dtypes: int64(3), object(2)
memory usage: 349.5+ KB


In [22]:
client.rename(columns={
    'Clients Race': 'Race',
    'Clients Ethnicity':'Ethnicity',
    'Clients Gender': 'Gender',
    'Clients Veteran Status':'Vet_Status',
    'Clients Discharge Status': 'Vet_Discharge_Status',
    'Clients Date Created Date': 'Created_Date',
    'Clients Date Update': 'Updated_Date',
    'Birth_Date_d':'Birth_date',
    'Personal_Id_d':'Client_Id'
},inplace=True)

client.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 65822 entries, 0 to 65821
Data columns (total 9 columns):
Race                    59828 non-null object
Ethnicity               62972 non-null object
Gender                  64882 non-null object
Vet_Status              51723 non-null object
Vet_Discharge_Status    4708 non-null object
Created_Date            65822 non-null object
Clients Date Updated    65822 non-null object
Birth_date              65665 non-null object
Client_Id               65822 non-null int64
dtypes: int64(1), object(8)
memory usage: 4.5+ MB


In [23]:
project.rename(columns={
    'Program Id': 'Program_Id',
    'Agency Id': 'Agency_Id',
    'Name': 'Program_Name',
    'Availability Start Date':'Program_Start',
    'Availability End Date': 'Program_End',
    'Continuum Project': 'Continuum',
    'Project Type Code': 'Project_Type',
    'Housing Type':'Housing_Type',
    'Added Date':'Added_date',
    'Last Updated Date':'Updated_date',
    'Target Population':'Target_Pop'
}, inplace=True)
project.drop(columns=['Unnamed: 0','Affiliated Project Ids','Affiliated with a Residential Project', 'Tracking Method',
                     'Victim Service Provider'], inplace=True)
project.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 298 entries, 0 to 297
Data columns (total 11 columns):
Program_Id       298 non-null int64
Agency_Id        298 non-null int64
Program_Name     298 non-null object
Program_Start    298 non-null object
Program_End      134 non-null object
Continuum        298 non-null int64
Project_Type     298 non-null object
Target_Pop       292 non-null object
Housing_Type     156 non-null object
Added_date       298 non-null object
Updated_date     298 non-null object
dtypes: int64(3), object(8)
memory usage: 25.7+ KB


In [24]:
for i in exit.columns:
    if i == 'Project Exit Date':
        exit.rename(columns={
            i:'Exit_Date'
        }, inplace=True)
        continue
    exit.rename(columns={
        i:str(i).replace(' ', '_')
    }, inplace=True)
exit.rename(columns={
    'Personal_ID':'Client_Id'
}, inplace=True)

exit.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 135217 entries, 0 to 135216
Data columns (total 5 columns):
Client_Id           135217 non-null int64
Enrollment_Id       135217 non-null int64
Exit_Destination    98426 non-null object
Exit_Reason         108585 non-null object
Exit_Date           135217 non-null object
dtypes: int64(2), object(3)
memory usage: 5.2+ MB


In [25]:
enrollment.rename(columns={
    'Personal ID':'Client_Id',
    'Enrollment Id': 'Enrollment_Id',
    'Household ID': 'Household_Id',
    'Enrollments Proeject ID': 'Program_Id',
    'Entry Screen Added Date':'Added_Date',
    'Entry Screen Housing Status':'Housing_Status',
    'Entry screen Length of Stay in Prior Living Situation':'LOS_Prior',
    'Entry Screen Zip Code':'Zip',
    'Entry Screen Chronic Homeless at Project Start':'Chronic_Homeless',
    'Entry Screen Residence Prior to Project Entry':'Prior_Residence',
    'Entry Screen Last Grade Completed':'Last_Grade_Comleted'
}, inplace=True)
enrollment.drop(columns=['ignore','Entry Screen Times Homeless in the Past Three Years',
                        'Entry Screen Total Months Homeless in Past Three Years',
                        'Entry Screen Client Became Enrolled in PATH (Yes / No)',
                        'Entry Screen Reason not Enrolled','Entry Screen City','Entry Screen State'
                        ], inplace=True)

In [26]:
enrollment.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 144067 entries, 0 to 144066
Data columns (total 11 columns):
Client_Id                                                144067 non-null int64
Enrollment_Id                                            144067 non-null int64
Household_Id                                             144067 non-null int64
Enrollments Project Id                                   144067 non-null int64
Added_Date                                               144067 non-null object
Housing_Status                                           91654 non-null object
Entry Screen Length of Stay in Prior Living Situation    106927 non-null object
Zip                                                      70744 non-null float64
Chronic_Homeless                                         144067 non-null object
Prior_Residence                                          122455 non-null object
Last_Grade_Comleted                                      2494 non-null object
dtypes: float64(

In [28]:
exit = exit.loc[((exit['Client_Id'] != 383403592) & (exit['Client_Id'] != 455040993)),:]
exit.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 135217 entries, 0 to 135216
Data columns (total 5 columns):
Client_Id           135217 non-null int64
Enrollment_Id       135217 non-null int64
Exit_Destination    98426 non-null object
Exit_Reason         108585 non-null object
Exit_Date           135217 non-null object
dtypes: int64(2), object(3)
memory usage: 6.2+ MB


In [31]:
exit.to_sql(name="Exit_Screen",if_exists="append", index=False, con=con)

In [32]:
with engine.connect() as c:
    sql = '''
    ALTER TABLE Exit_Screen
ADD COLUMN ES_Id bigserial PRIMARY KEY;
    '''
    c.execute(sql)

In [33]:
enrollment.to_sql(name="Enrollment",if_exists="append", index=False, con=con)

In [None]:
enrollment.to_sql(name="Enrollment",if_exists="append", index=False, con=con)