In [1]:
import io
import pandas as pd
import plotly.plotly as py
import plotly.graph_objs as go

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from psycopg2 import connect
from sqlalchemy import create_engine

# Creates the initial table for items purchased
# It is only needed for loading data for the first time
csv = "~/Desktop/items_purchased.csv"

def writeToPostgres(f):
    conn = connect("postgresql://tlapinskas@localhost/postgres")
    cursor = conn.cursor()
    cursor.execute('create table purchases (kiosk_id text, product_id text, card_hash text, date_time text, fc_number text);COMMIT; ')
    cursor.copy_from(f, 'purchases', columns=('kiosk_id', 'product_id', 'card_hash', 'date_time', 'fc_number'), sep='\t')
    cursor.close()

# Read columns
df = pd.read_csv(csv, usecols = ['kiosk_id', 'product_id', 'card_hash', 'date_time', 'fc_number'])

# Load data to temp CSV
f = io.StringIO()
df.to_csv(f, index=False, header=False, sep='\t')
f.seek(0)

# Load to Postgres
writeToPostgres(f)

print("Data load complete")


The psycopg2 wheel package will be renamed from release 2.8; in order to keep installing from binary please use "pip install psycopg2-binary" instead. For details see: <http://initd.org/psycopg/docs/install.html#binary-install-from-pypi>.



Data load complete


In [4]:
# Loads data incrementally if format is the same as items_purchased.csv
# Use case: Hourly/Daily/Weekly load of items purchased data that needs to live in a Data Lake
csv = "/Users/tlapinskas/Desktop/items_purchased.csv"

conn = connect("postgresql://tlapinskas@localhost/postgres")
cur = conn.cursor()
with open(csv, 'r') as f:
    # Notice that we don't need the `csv` module.
    next(f)  # Skip the header row.
    cur.copy_from(f, 'purchases', sep=',')
    
conn.commit()

cur.execute ('''SELECT COUNT(*) FROM purchases;''')
count = cur.fetchone()

print("Data load complete")
print ("Total Records:", count)

Data load complete
Total Records: (2231144,)


In [3]:
engine = create_engine("postgresql://tlapinskas@localhost/postgres")

# Grab data from Postgres
df = pd.read_sql('''
                    SELECT product_id, COUNT(*) FROM purchases GROUP BY product_id ORDER BY count DESC LIMIT 50;
                    ''', con=engine)
df = df.fillna('')

# Bar plot
init_notebook_mode(connected=True)    

iplot([go.Bar(
            x=df['product_id'],
            y=df['count'],
)])

# Pie chart
trace = go.Pie(labels=df['product_id'], values=df['count'])

iplot([trace], filename='basic_pie_chart')