In [142]:
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import plotly
import plotly.express as px
import plotly.graph_objects as go
import helpers as h

In [219]:
orders_df = pd.read_csv('data/pbe-orders.csv')
replace_df = pd.read_csv('data/pbe-replacements.csv')
returns_df = pd.read_csv('data/pbe-returns.csv')
# for col in orders_df:
#     print(col)

order_sku_df = orders_df[['amazon-order-id', 'sku']]
order_sku_df = order_sku_df.rename(columns={'amazon-order-id': 'order-id'})

In [218]:
replace_orders = replace_df[['sku', 'reason', 'original-amazon-order-id']]
replace_orders = replace_orders.rename(columns={'original-amazon-order-id': 'order-id', 'status': 'reason'})

In [216]:
returns_df.head()
returns_orders = returns_df[['sku','detailed-disposition', 'order-id']]
returns_orders = returns_orders.rename(columns={'detailed-disposition': 'reason'})
returns_orders

all_returns_replace = pd.DataFrame.append(returns_orders, replace_orders)
all_returns_replace['reason'] = all_returns_replace['reason'].str.lower()

In [225]:
joined = pd.merge(order_sku_df, 
                  all_returns_replace, 
                  how='left', 
                  left_on=['sku', 'order-id'], 
                  right_on=['sku', 'order-id'])

order_status_counts = joined.fillna('quality')\
                            .replace({'customer_damaged': 'defective', 'sellable': 'quality', 'wrong item': 'quality'})\
                            .groupby('reason')\
                            .count()\
                            .reset_index()\
                            .rename(columns={'order-id': 'order-count'})\
                            .drop('sku',axis=1)
defective_count = order_status_counts.iloc[0,1]
quality_count = order_status_counts.iloc[1,1]
total_orders = order_status_counts['order-count'].sum()
defective_prop = defective_count/total_orders
quality_prop = def

In [215]:
fig = px.bar(order_status_counts, x='reason', y='order-count')
fig.show()

In [130]:
total_unique_orders = len(orders_df['amazon-order-id'].unique())
total_orders = orders_df['amazon-order-id'].count()
total_orders

1256

In [131]:
orders_containing_multiple_skus = (orders_df['amazon-order-id'].duplicated().sum())

In [132]:
percent_orders_containing_mskus = (orders_df['amazon-order-id'].duplicated().sum())/len(orders_df['amazon-order-id'])

In [220]:
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'American Samoa': 'AS',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Guam': 'GU',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}

lower_state = {}

for k, v in us_state_abbrev.items():
    lower_state[k.lower()] = v
    lower_state[v.lower()] = v
    lower_state['n. j.'] = 'NJ'

orders_df['ship-state'] = orders_df['ship-state'].str.lower()
orders_df['ship-state-abbr'] = orders_df['ship-state'].map(lower_state)

In [221]:
state_df = grouped

In [136]:
state_df = state_df.reset_index().rename(columns={'ship-state-abbr': 'State', 'shipment-id': 'Orders Count'})

In [137]:
orders_by_state_df = state_df[['State', 'Orders Count']]

In [139]:
order_counts_by_state_dict = state_df.to_dict()

In [140]:
fig = go.Figure(data=go.Choropleth(
    locations=orders_by_state_df['State'], # Two-Letter State Codes
    z = orders_by_state_df['Orders Count'].astype(int), # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'Blues',
    colorbar_title = "Order Count",
))

fig.update_layout(
    title_text = 'Pale Blue Earth Order Quantities by State',
    geo_scope='usa', # limite map scope to USA
)

fig.write_html('/Users/ryankirkland/galvanize/capstones/capstone-one/state-heatmap.html')

fig.show()