In [None]:
### Dependencies

# data mgmt
import pandas as pd
import numpy as np
import requests

# SQL
from sqlalchemy import create_engine
import pymysql 
import os 


In [None]:
### select latest order id

# set up variables for db access - note this requires environmental variables
db_user = os.environ.get('USER')
db_password = os.environ.get('PASS')
db_host = os.environ.get('HOST')
url = os.environ.get('URL')
db_port = 25060
db = 'shopify'

# connect to ts database
connection = pymysql.connect(host = db_host,
                             user = db_user,
                             password = db_password,
                             port = db_port,
                             database = db,
                             charset = 'utf8mb4',
                             cursorclass = pymysql.cursors.DictCursor)

# create cursor
cursor=connection.cursor()

# select id for most recent order
sql = "SELECT id FROM shopify.orders WHERE key_id LIKE (SELECT MAX(key_id) FROM shopify.orders);"
cursor.execute(sql)
res = cursor.fetchone()
res = res['id']

# commit to save changes & close conneciton
connection.commit()
connection.close()

print(res)

In [None]:
### query order data from shopify

def get_all_orders(res, url):
    res = res
    url = url
    orders = pd.DataFrame()
    while True:
        response = requests.request("GET", url)
        df = pd.json_normalize(response.json()['orders'], record_path =['line_items'], meta=[
            'id','discount_applications',['customer','id'],['customer','default_address','name'],'contact_email','email', 
            ['customer','default_address','phone'],['customer','orders_count'],['customer','total_spent'],
            ['customer','last_order_id'],['customer','last_order_name'],
            'browser_ip','confirmed','created_at','processed_at','closed_at',
            'checkout_id','financial_status','fulfillment_status',
            'gateway','name','number','order_number',
            'source_name','tags','token','subtotal_price',['total_discounts_set','presentment_money','amount'],
            ['total_shipping_price_set','shop_money','amount'],'total_tax','total_price','currency','total_weight',
            ['billing_address','name'],['billing_address','address1'],['billing_address','address2'],
            ['billing_address','phone'], ['billing_address','city'],['billing_address','zip'],['billing_address','province'],
            ['billing_address','province_code'],['billing_address','country'],['billing_address','country_code'],
            ['billing_address','first_name'],['billing_address','last_name'],
            ['billing_address','company'],['billing_address','latitude'],['billing_address','longitude'],
            ['shipping_address','name'],['shipping_address','address1'],['shipping_address','address2'],['shipping_address','phone'],
            ['shipping_address','city'],['shipping_address','zip'],['shipping_address','province'],['shipping_address','province_code'],
            ['shipping_address','country'],['shipping_address','country_code'],['shipping_address','first_name'],['shipping_address','last_name'],
            ['shipping_address','company'],['shipping_address','latitude'],['shipping_address','longitude']], meta_prefix='orders_', errors='ignore')
        orders = pd.concat([orders,df])
        res = df['orders_id'].iloc[-1]
        if len(df)<250:
            break
    return(orders)

In [None]:
### rund get_all_orders function 

orders_df = get_all_orders(res, url)
orders_df.shape

In [None]:
### QC 

# check first order id
orders_df[['orders_id','orders_name','orders_created_at']].head()

In [None]:
### clean data

# select only useful
df2 = orders_df[['orders_id','orders_customer.id','orders_customer.default_address.name',
    'orders_contact_email','orders_email', 
    'orders_customer.default_address.phone','orders_customer.orders_count','orders_customer.total_spent',
    'orders_customer.last_order_id','orders_customer.last_order_name',
    'orders_browser_ip','orders_confirmed','orders_created_at','orders_processed_at','orders_closed_at',
    'orders_checkout_id','orders_financial_status','orders_fulfillment_status',
    'orders_gateway','orders_name','orders_number','orders_order_number',
    'orders_source_name','orders_tags','orders_token','orders_subtotal_price','orders_total_discounts_set.presentment_money.amount',
    'orders_total_shipping_price_set.shop_money.amount','orders_total_tax','orders_total_price','orders_currency',
    'orders_discount_applications',
    'id','sku','title','variant_title','variant_id','quantity','grams','price', 
    'orders_total_weight','orders_billing_address.name','orders_billing_address.address1','orders_billing_address.address2',
    'orders_billing_address.phone', 'orders_billing_address.city','orders_billing_address.zip','orders_billing_address.province',
    'orders_billing_address.province_code','orders_billing_address.country','orders_billing_address.country_code',
    'orders_billing_address.first_name','orders_billing_address.last_name',
    'orders_billing_address.company','orders_billing_address.latitude','orders_billing_address.longitude',
    'orders_shipping_address.name','orders_shipping_address.address1','orders_shipping_address.address2','orders_shipping_address.phone',
    'orders_shipping_address.city','orders_shipping_address.zip','orders_shipping_address.province','orders_shipping_address.province_code',
    'orders_shipping_address.country','orders_shipping_address.country_code','orders_shipping_address.first_name','orders_shipping_address.last_name',
    'orders_shipping_address.company','orders_shipping_address.latitude','orders_shipping_address.longitude'
    ]]

# rename columns
dict = {'orders_id': 'id',
        'orders_contact_email': 'contact_email',
        'orders_email': 'email',
        'orders_customer.id': 'customer_id',
        'orders_customer.default_address.name': 'default_name',
        'orders_customer.default_address.phone': 'default_phone',
        'orders_customer.orders_count': 'order_count',
        'orders_customer.total_spent': 'total_spent',
        'orders_customer.last_order_id': 'last_order_id',
        'orders_customer.last_order_name': 'last_order_name',
        'orders_browser_ip': 'browser_ip',
        'orders_confirmed': 'confirmed',
        'orders_created_at': 'created_at',
        'orders_processed_at': 'processed_at',
        'orders_closed_at': 'closed_at',
        'orders_checkout_id': 'checkout_id',
        'orders_financial_status': 'financial_status',
        'orders_fulfillment_status': 'fulfillment_status',
        'orders_gateway': 'gateway',
        'orders_name': 'name',
        'orders_number':'number',
        'orders_order_number':'order_number',
        'orders_source_name':'source',
        'orders_tags':'tags',
        'orders_token': 'token',
        'orders_subtotal_price':'subtotal_price',
        'orders_total_discounts_set.presentment_money.amount': 'total_discount',
        'orders_total_shipping_price_set.shop_money.amount': 'total_shipping',
        'orders_total_tax':'total_tax',
        'orders_total_price':'total_price',
        'orders_currency':'currency',
        'orders_discount_applications': 'discount_code',
        'id': 'line_item_id',
        'orders_total_weight':'total_weight',
        'orders_billing_address.name': 'billing_name',
        'orders_billing_address.address1': 'billing_address1',
        'orders_billing_address.address2':'billing_address2',
        'orders_billing_address.phone': 'billing_phone',
        'orders_billing_address.city': 'billing_city',
        'orders_billing_address.zip': 'billing_zip',
        'orders_billing_address.province': 'billing_province',
        'orders_billing_address.province_code': 'billing_province_code',
        'orders_billing_address.country' : 'billing_country',
        'orders_billing_address.country_code': 'billing_country_code',
        'orders_billing_address.first_name': 'billing_first_name',
        'orders_billing_address.last_name': 'billing_last_name',
        'orders_billing_address.company': 'billing_company',
        'orders_billing_address.latitude': 'billing_latitude',
        'orders_billing_address.longitude': 'billing_longitude',
        'orders_shipping_address.name': 'shiping_name',
        'orders_shipping_address.address1': 'shipping_address1',
        'orders_shipping_address.address2': 'shipping_address2',
        'orders_shipping_address.phone': 'shipping_phone',
        'orders_shipping_address.city': 'shipping_city',
        'orders_shipping_address.zip': 'shiping_zip',
        'orders_shipping_address.province': 'shipping_province',
        'orders_shipping_address.province_code': 'shipping_province_code',
        'orders_shipping_address.country': 'shipping_country',
        'orders_shipping_address.country_code': 'shipping_country_code',   
        'orders_shipping_address.first_name': 'shipping_first_name',
        'orders_shipping_address.last_name': 'shipping_last_name',
        'orders_shipping_address.company': 'shipping_company',
        'orders_shipping_address.latitude':'shipping_latitude',
        'orders_shipping_address.longitude': 'shipping_longitude'}
df2 = df2.rename(columns=dict)

# clean discount code data
d_code = pd.json_normalize(df2['discount_code'])
d_code = pd.json_normalize(d_code[0])
df2['discount_code'] = d_code['code']

# replace NaN with '' for SQL DB
df2 = df2.astype(object).replace(np.nan, '')
df2.head()

In [None]:
### insert data into db

# connect to ts database 
connection = pymysql.connect(host=db_host,
                             user=db_user,
                             password=db_password,
                             port=db_port,
                             database=db,
                             charset='utf8mb4',
                             cursorclass=pymysql.cursors.DictCursor)

# create cursor
cursor = connection.cursor()

# create column list for insertion
cols = "`,`".join([str(i) for i in df2.columns.tolist()])

# insert DataFrame records one by one 
for i,row in df2.iterrows():
    sql = "INSERT INTO `orders` (`" +cols + "`) VALUES (" + "%s,"*(len(row)-1) + "%s)"
    cursor.execute(sql, tuple(row))

    # commit to save changes.
    connection.commit()

connection.close()