In [6]:
import pandas as pd
import numpy as np
import sqlite3
from dotenv import load_dotenv
import os
import psycopg2
from psycopg2 import extras
load_dotenv();

### Load Data

In [7]:
campaign = pd.read_csv("../data/Campaign.csv", encoding="latin-1")
profile_creation = pd.read_csv("../data/EPO_Teradata_Employer_Profile_Creation_Report.csv")
job_seeker = pd.read_csv("../data/EPO_Teradata_Job Seeker_Profile_Creation_Report.csv")
job_board = pd.read_csv("../data/EPO_Teradata_Job_Board_Sales_Report.csv")
feedback = pd.read_csv("../data/Feedback__c.csv", encoding="latin-1", low_memory=False)
account = pd.read_csv("../data/SalesForce_Account.csv", encoding="latin-1", low_memory=False)
sf_case = pd.read_csv("../data/SalesForce_Case.csv", encoding="latin-1", low_memory=False)
info_c = pd.read_csv("../data/SalesForce_Hire_Information__c.csv", encoding="latin-1")
opp = pd.read_csv("../data/SalesForce_Opportunity.csv", low_memory=False)
record = pd.read_csv("../data/SalesForce_RecordType.csv")

#no columns to pull data from
#sales_2018 = pd.read_csv("../data/SalesForce_2018Activities.csv")
#sales_force = pd.read_csv("../data/SalesForce_Contact.csv")
#email = pd.read_csv("../data/vr__VR_Email_History_Contact__c.csv")

ls = [campaign, profile_creation, job_seeker, job_board, feedback, account, sf_case, 
      info_c, opp, record]
ls_name = ["campaign", "profile_creation", "job_seeker", "job_board", "feedback", "account", "sf_case", "info_c", "opp", "record"]

# Lower and replace spaces with "_" in column names
for x in ls:
    x.columns = map(str.lower, x.columns.str.replace(" ", "_").str.replace("?", ""))

df_dict = dict(zip(ls_name, ls))
print("---Dataframes Shapes---")
for x in range(len(df_dict)):
    print(f"{list(df_dict.keys())[x]} Shape: {list(df_dict.values())[x].shape}")  

---Dataframes Shapes---
campaign Shape: (1386, 103)
profile_creation Shape: (778, 5)
job_seeker Shape: (4342, 5)
job_board Shape: (521, 8)
feedback Shape: (15807, 70)
account Shape: (16858, 168)
sf_case Shape: (14845, 56)
info_c Shape: (30754, 34)
opp Shape: (10849, 130)
record Shape: (80, 13)


In [8]:
def df_sql(df_ls,name, conn):
    """
    Load all data into a sqlite3 DB.
    
    df_ls: YOu list of dataframes
    name: The list containing the names of your dataframes
    conn: The SQLite3 connection
    """
    
    conn = conn
    
    # Load all dataframes into a single SQLite3 database
    for x in range(len(df_ls)):
        df_ls[x].to_sql(f"{name[x]}", conn, index=False)
        
    return   

def create_table(conn):
    """
    Returns a list a create table statements as strings
    
    conn: The SQLite3 connection
    """
    
    curs = conn.cursor()
    
    # Fetch all create table statements from the SQLite3 DB
    query = curs.execute("SELECT * FROM sqlite_master WHERE type='table'").fetchall()
    table_ls = []
    
    # Index into the query and format string to get rid of uneeded characters
    for x in range(len(query)):
        table_ls.append(query[x][4].replace("\n", "") + ";")
    
    return table_ls

def insert_pg(conn, name):
    """
    Create and append insert statements to a list
    
    conn: SQLite3 connection
    name: list of dataframe names
    """
    
    curs = conn.cursor()
    insert_ls = []
    
    # Iterate through name list of names and query the schema of each table
    for x in range(len(name)):
        curs.execute(f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME='{name[x]}';")
        # Prime the insert_str to create a proper insert statement
        insert_str = f"INSERT INTO {name[x]} ("
        # Iterate through curs objects and append schema to col_ls
        for y in curs:
            col_ls = []
            col_ls.append(y[0])
            # Iterate through col_ls and format string with comma and space
            for z in range(len(col_ls)):
                insert_str += col_ls[z] + "," + " "
        insert_str = insert_str[:-2] # remove the "," and " " from the last line
        # Add the ending to the string with Psycopg2 formatting
        insert_str += ") VALUES %s"
        insert_ls.append(insert_str)
        
    return insert_ls

### SQLite3

In [4]:
# Make connection
conn = sqlite3.Connection("../data/heroes.sqlite3")

# Load the DB
df_sql(ls, ls_name, conn)

# Extract CREATE TABLE statements and append to list
table_ls = create_table(conn)

### Psycopg2 and Azure

In [9]:
# Load credentials from .env
name = os.getenv("AZURE_DB_NAME")
pw = os.getenv("AZURE_PASS")
host = os.getenv("AZURE_HOST")
user = os.getenv("AZURE_USER")
ssl = os.getenv("AZURE_SSLMODE")

# Connect
conn_string = f"host={host} user={user} dbname={name} password={pw} sslmode={ssl}"
pg_conn = psycopg2.connect(conn_string)
pg_curs = pg_conn.cursor()

# Drop and Create tables
for x in ls_name:
    pg_curs.execute(f"DROP TABLE IF EXISTS {x};")
    pg_conn.commit()

for x in range(len(table_ls)):
    pg_curs.execute(table_ls[x])
    pg_conn.commit()

# Gather insert statements to a list
print("---Generating Insert List---")
insert_ls = insert_pg(pg_conn, ls_name)

# Insert the Data
print("---Starting Data Insert---")
for x in range(len(ls_name)):
    curs = conn.cursor()
    data = curs.execute(f"SELECT * FROM {ls_name[x]}").fetchall()
    query = insert_ls[x]
    extras.execute_values(pg_curs, query, data)
    pg_conn.commit()
    print(f"{ls_name[x]} data inserted")
print("---Data Insert Finished---")

---Generating Insert List---
---Starting Data Insert---
campaign data inserted
profile_creation data inserted
job_seeker data inserted
job_board data inserted
feedback data inserted
account data inserted
sf_case data inserted
info_c data inserted
opp data inserted
record data inserted
---Data Insert Finished---


In [10]:
# Test that data can be queried
for x in range(len(ls_name)):
    pg_curs.execute(f"SELECT * FROM {ls_name[x]}")
    for y in range(1):
        print(f"---First row from {ls_name[x]} Azure Postgres DB---")
        print(pg_curs.fetchone(), "\n")

---First row from campaign Azure Postgres DB---
('7010z000000mBaVAAU', 0, 'O2O Volunteer Program Email', '000000000000000AAA', 'Email', '012380000009wCN', 'Completed', None, None, None, None, None, 0, 0.0, 1, '000000000000000AAA', 0, 0, 1006, 0, 0, 0, 0.0, 0.0, 0, 0, 1006, 0, 0, 0, 0.0, 0.0, 0, 0.0, 0.0, 0.0, '00538000005Ey04AAC', '1/15/2019 14:11', '00538000005Ey04AAC', '1/15/2019 14:11', '00538000005Ey04AAC', '1/15/2019 14:13', None, None, 0, 0.0, 0, 0, 0, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 0, 0, None, None, '1/15/2019 0:00', 0, 0, None, None, 0.0, 0, 0, None, 0.0, 0.0, None, None) 

---First row from profile_creation Azure Postgres DB---
('4c5dd3d1-9c8c-4203-a179-3a2590f8d596', 'Company 1', '2018-03-28 10:12:14 UTC', 0, None) 

---First row from job_seeker Azure Postgres DB---
('Job Seeker 1', 'Jack

In [11]:
curs.close()
conn.close()
pg_curs.close()
pg_conn.close()