## Create Dependencies

In [1]:
import numpy as np
import pandas as pd
import datetime as dt
import os 

from config import pwd, uname

# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func
from sqlalchemy import Column, Integer, String, Float 
from sqlalchemy import desc

## Create Variables

In [2]:
caDataSet = os.path.join('..','DataSets','CA Weed Data.csv')
cenDataSet = os.path.join('..','DataSets','Census Data.csv')
coDataSet = os.path.join('..','DataSets','CO Weed Data.csv')
massDataSet = os.path.join('..','DataSets','Mass Weed Data.csv')
stateDataSet = os.path.join('..','DataSets','State Names and Abbr.csv')


## Establish Database Connections and Get Tables Lists

In [3]:
# create engine to postgres db
postgres = f'postgresql://{uname}:{pwd}@localhost:5432/etl_project'  #path to local db

engine = create_engine(postgres)

In [4]:
# reflect an existing database into a new model
base = automap_base()

# reflect the tables
base.prepare(engine, reflect=True)

In [5]:
# View all of the classes that automap found
base.classes.keys()

['ca_raw', 'co_raw', 'mass_raw', 'states', 'sales_by_qtr', 'census']

In [6]:
# Save references to each table
# adding these for fun and for sanity checks

caRaw = base.classes.ca_raw
coRaw = base.classes.co_raw
massRaw = base.classes.mass_raw
states = base.classes.states
sales = base.classes.sales_by_qtr
census = base.classes.census

In [7]:
# Create our session (link) from Python to the DB
session = Session(bind=engine)

## Exploratory Analysis

In [8]:
# Load data into dataframes
massRawDF = pd.read_csv(massDataSet) 
cenRawDF = pd.read_csv(cenDataSet)

In [9]:
massRawDF.head()

Unnamed: 0,activitysummarydate,total_plantimmaturecount,total_planttrackedcount,total_plantfloweringcount,total_plantvegetativecount,total_plantdestroyedcount,total_plantharvestedcount,total_plantcount,salestotal,total_active_harvestcount,total_active_packagecount,total_plantbatchcount,total_activeproducts,total_activestrains,total_employees
0,6/4/2021 0:00,140747,225685,114894,110791,231460,966667,1423812,1658394000.0,1231,131289,3388,155452,30337,8334
1,6/3/2021 0:00,140747,225685,114894,110791,231460,966667,1423812,1658393000.0,1231,131271,3388,155452,30337,8334
2,6/2/2021 0:00,146669,221316,111723,109593,229462,961421,1412199,1651406000.0,1208,128029,3466,155053,30165,8282
3,6/1/2021 0:00,153497,211654,108947,102707,228692,960539,1400885,1648229000.0,1212,126709,3520,154325,30045,8282
4,5/31/2021 0:00,155021,210406,108882,101524,228595,959964,1398965,1645279000.0,1232,127630,3626,154348,30010,8274


## Load Raw Data into Database

In [10]:
connection = engine.connect()
connection.execute( '''TRUNCATE TABLE mass_raw; TRUNCATE TABLE census''' )

connection.close()

In [11]:
massRawDF.to_sql('mass_raw', engine, if_exists='append', index=False)
cenRawDF.to_sql('census', engine, if_exists='append', index=False)

DataError: (psycopg2.errors.StringDataRightTruncation) value too long for type character varying(8)

[SQL: INSERT INTO mass_raw (activitysummarydate, total_plantimmaturecount, total_planttrackedcount, total_plantfloweringcount, total_plantvegetativecount, total_plantdestroyedcount, total_plantharvestedcount, total_plantcount, salestotal, total_active_harvestcount, total_active_packagecount, total_plantbatchcount, total_activeproducts, total_activestrains, total_employees) VALUES (%(activitysummarydate)s, %(total_plantimmaturecount)s, %(total_planttrackedcount)s, %(total_plantfloweringcount)s, %(total_plantvegetativecount)s, %(total_plantdestroyedcount)s, %(total_plantharvestedcount)s, %(total_plantcount)s, %(salestotal)s, %(total_active_harvestcount)s, %(total_active_packagecount)s, %(total_plantbatchcount)s, %(total_activeproducts)s, %(total_activestrains)s, %(total_employees)s)]
[parameters: ({'activitysummarydate': '6/4/2021 0:00', 'total_plantimmaturecount': 140747, 'total_planttrackedcount': 225685, 'total_plantfloweringcount': 114894, 'total_plantvegetativecount': 110791, 'total_plantdestroyedcount': 231460, 'total_plantharvestedcount': 966667, 'total_plantcount': 1423812, 'salestotal': 1658393701.0, 'total_active_harvestcount': 1231, 'total_active_packagecount': 131289, 'total_plantbatchcount': 3388, 'total_activeproducts': 155452, 'total_activestrains': 30337, 'total_employees': 8334}, {'activitysummarydate': '6/3/2021 0:00', 'total_plantimmaturecount': 140747, 'total_planttrackedcount': 225685, 'total_plantfloweringcount': 114894, 'total_plantvegetativecount': 110791, 'total_plantdestroyedcount': 231460, 'total_plantharvestedcount': 966667, 'total_plantcount': 1423812, 'salestotal': 1658393171.0, 'total_active_harvestcount': 1231, 'total_active_packagecount': 131271, 'total_plantbatchcount': 3388, 'total_activeproducts': 155452, 'total_activestrains': 30337, 'total_employees': 8334}, {'activitysummarydate': '6/2/2021 0:00', 'total_plantimmaturecount': 146669, 'total_planttrackedcount': 221316, 'total_plantfloweringcount': 111723, 'total_plantvegetativecount': 109593, 'total_plantdestroyedcount': 229462, 'total_plantharvestedcount': 961421, 'total_plantcount': 1412199, 'salestotal': 1651406184.0, 'total_active_harvestcount': 1208, 'total_active_packagecount': 128029, 'total_plantbatchcount': 3466, 'total_activeproducts': 155053, 'total_activestrains': 30165, 'total_employees': 8282}, {'activitysummarydate': '6/1/2021 0:00', 'total_plantimmaturecount': 153497, 'total_planttrackedcount': 211654, 'total_plantfloweringcount': 108947, 'total_plantvegetativecount': 102707, 'total_plantdestroyedcount': 228692, 'total_plantharvestedcount': 960539, 'total_plantcount': 1400885, 'salestotal': 1648229061.0, 'total_active_harvestcount': 1212, 'total_active_packagecount': 126709, 'total_plantbatchcount': 3520, 'total_activeproducts': 154325, 'total_activestrains': 30045, 'total_employees': 8282}, {'activitysummarydate': '5/31/2021 0:00', 'total_plantimmaturecount': 155021, 'total_planttrackedcount': 210406, 'total_plantfloweringcount': 108882, 'total_plantvegetativecount': 101524, 'total_plantdestroyedcount': 228595, 'total_plantharvestedcount': 959964, 'total_plantcount': 1398965, 'salestotal': 1645279055.0, 'total_active_harvestcount': 1232, 'total_active_packagecount': 127630, 'total_plantbatchcount': 3626, 'total_activeproducts': 154348, 'total_activestrains': 30010, 'total_employees': 8274}, {'activitysummarydate': '5/30/2021 0:00', 'total_plantimmaturecount': 155009, 'total_planttrackedcount': 210406, 'total_plantfloweringcount': 108882, 'total_plantvegetativecount': 101524, 'total_plantdestroyedcount': 228595, 'total_plantharvestedcount': 959964, 'total_plantcount': 1398965, 'salestotal': 1645149521.0, 'total_active_harvestcount': 1232, 'total_active_packagecount': 127706, 'total_plantbatchcount': 3623, 'total_activeproducts': 154348, 'total_activestrains': 30010, 'total_employees': 8274}, {'activitysummarydate': '5/29/2021 0:00', 'total_plantimmaturecount': 155191, 'total_planttrackedcount': 209633, 'total_plantfloweringcount': 109335, 'total_plantvegetativecount': 100298, 'total_plantdestroyedcount': 228505, 'total_plantharvestedcount': 958927, 'total_plantcount': 1397065, 'salestotal': 1638043121.0, 'total_active_harvestcount': 1225, 'total_active_packagecount': 128933, 'total_plantbatchcount': 3582, 'total_activeproducts': 154245, 'total_activestrains': 29999, 'total_employees': 8274}, {'activitysummarydate': '5/28/2021 0:00', 'total_plantimmaturecount': 158343, 'total_planttrackedcount': 206760, 'total_plantfloweringcount': 107354, 'total_plantvegetativecount': 99406, 'total_plantdestroyedcount': 228135, 'total_plantharvestedcount': 958802, 'total_plantcount': 1393697, 'salestotal': 1635794211.0, 'total_active_harvestcount': 1223, 'total_active_packagecount': 129220, 'total_plantbatchcount': 3661, 'total_activeproducts': 154209, 'total_activestrains': 29995, 'total_employees': 8270}  ... displaying 10 of 956 total bound parameter sets ...  {'activitysummarydate': '10/16/2018 0:00', 'total_plantimmaturecount': 0, 'total_planttrackedcount': 0, 'total_plantfloweringcount': 0, 'total_plantvegetativecount': 0, 'total_plantdestroyedcount': 0, 'total_plantharvestedcount': 0, 'total_plantcount': 0, 'salestotal': 0.0, 'total_active_harvestcount': 0, 'total_active_packagecount': 0, 'total_plantbatchcount': 0, 'total_activeproducts': 0, 'total_activestrains': 0, 'total_employees': 9}, {'activitysummarydate': '10/15/2018 0:00', 'total_plantimmaturecount': 0, 'total_planttrackedcount': 0, 'total_plantfloweringcount': 0, 'total_plantvegetativecount': 0, 'total_plantdestroyedcount': 0, 'total_plantharvestedcount': 0, 'total_plantcount': 0, 'salestotal': 0.0, 'total_active_harvestcount': 0, 'total_active_packagecount': 0, 'total_plantbatchcount': 0, 'total_activeproducts': 0, 'total_activestrains': 0, 'total_employees': 3})]
(Background on this error at: http://sqlalche.me/e/13/9h9h)

In [None]:
#check mass
session.query(massRaw).count()

In [None]:
#check cen
session.query(census).count()