In [1]:
# import dependencies
import pandas as pd
import datetime as dt
import numpy as np

In [2]:
# read in Superstore data (all orders & countries)
super_df = pd.read_csv('resources/superstore.csv')
super_df.head()

Unnamed: 0,Order ID,index,Order Date,Ship Date,Ship Mode,Segment,City,State,Country,Region,...,Sales,Quantity,Discount,Profit,Shipping Cost,Order Priority,Days to Ship,Returned,Profit Margin Percentage,Returned $ Amount
0,CA-2014-AB10015140-41954,0,2014-11-11,2014-11-13,First Class,Consumer,Oklahoma City,Oklahoma,United States,Central US,...,221.98,2,0.0,62.15,40.77,High,2,0.0,27.998018,0.0
1,IN-2014-JR162107-41675,1,2014-02-05,2014-02-07,Second Class,Corporate,Wollongong,New South Wales,Australia,Oceania,...,3709.4,9,0.1,-288.77,923.63,Critical,2,0.0,-7.784817,0.0
2,IN-2014-CR127307-41929,2,2014-10-17,2014-10-18,First Class,Consumer,Brisbane,Queensland,Australia,Oceania,...,5175.17,9,0.1,919.97,915.49,Medium,1,0.0,17.776614,0.0
3,ES-2014-KM1637548-41667,3,2014-01-28,2014-01-30,First Class,Home Office,Berlin,Berlin,Germany,Western Europe,...,2892.51,5,0.1,-96.54,910.16,Medium,2,0.0,-3.337586,0.0
4,SG-2014-RH9495111-41948,4,2014-11-05,2014-11-06,Same Day,Consumer,Dakar,Dakar,Senegal,Western Africa,...,2832.96,8,0.0,311.52,903.04,Critical,1,0.0,10.996272,0.0


## Data Exploration - All Orders

#### Sales & Profit

In [3]:
# Average Sale Price by Category 

super_df[["Category", "Sales"]].groupby("Category").mean().reset_index()

Unnamed: 0,Category,Sales
0,Furniture,416.881646
1,Office Supplies,121.048786
2,Technology,467.859063


In [4]:
# Average Sales Price by Sub_Category

super_df.groupby(['Category', 'Sub-Category'])['Sales'].mean().reset_index()

Unnamed: 0,Category,Sub-Category,Sales
0,Furniture,Bookcases,608.283928
1,Furniture,Chairs,437.298241
2,Furniture,Furnishings,122.116725
3,Furniture,Tables,879.259199
4,Office Supplies,Appliances,580.100896
5,Office Supplies,Art,76.400843
6,Office Supplies,Binders,75.149705
7,Office Supplies,Envelopes,70.89132
8,Office Supplies,Fasteners,34.407993
9,Office Supplies,Labels,28.200903


In [5]:
# Sales totals by Sub_Category
super_df.groupby(['Category', 'Sub-Category'])['Sales'].sum().reset_index()

Unnamed: 0,Category,Sub-Category,Sales
0,Furniture,Bookcases,1466572.55
1,Furniture,Chairs,1501682.16
2,Furniture,Furnishings,385156.15
3,Furniture,Tables,757042.17
4,Office Supplies,Appliances,1010535.76
5,Office Supplies,Art,371613.7
6,Office Supplies,Binders,461870.09
7,Office Supplies,Envelopes,169217.58
8,Office Supplies,Fasteners,89495.19
9,Office Supplies,Labels,73350.55


In [6]:
# Sales totals by country
super_df.groupby(['Country'])['Sales'].sum().reset_index()

Unnamed: 0,Country,Sales
0,Afghanistan,21673.32
1,Albania,3888.12
2,Algeria,36091.59
3,Angola,25554.00
4,Argentina,57511.75
...,...,...
160,Vietnam,65210.11
161,Western Sahara,666.36
162,Yemen,2465.72
163,Zambia,26035.50


In [7]:
# Profit Margin by Sub/Category

super_df.groupby(['Category','Sub-Category'])['Profit Margin Percentage'].mean()

Category         Sub-Category
Furniture        Bookcases        1.488165
                 Chairs           2.477528
                 Furnishings      5.390950
                 Tables         -24.204800
Office Supplies  Appliances      -0.198507
                 Art              6.485630
                 Binders         -0.172981
                 Envelopes        8.292237
                 Fasteners        7.312973
                 Labels          11.930966
                 Paper           19.371858
                 Storage          1.261661
                 Supplies         4.412333
Technology       Accessories      8.705600
                 Copiers          7.167309
                 Machines        -4.350493
                 Phones           4.213040
Name: Profit Margin Percentage, dtype: float64

#### Order Priority & Shipping

In [8]:
# Count for each Order Priority

super_df[['Order Priority', 'Ship Mode']].groupby('Order Priority').count().reset_index()

Unnamed: 0,Order Priority,Ship Mode
0,Critical,3932
1,High,15501
2,Low,2424
3,Medium,29433


In [9]:
# Shipping Cost for each Order Priority
super_df.groupby(['Order Priority'])['Shipping Cost'].mean().reset_index()

Unnamed: 0,Order Priority,Shipping Cost
0,Critical,59.74744
1,High,32.933931
2,Low,27.139061
3,Medium,18.579989


In [10]:
# Average Profit Margin by Order Priority
super_df[['Order Priority', 'Profit Margin Percentage']].groupby('Order Priority').mean().reset_index()

Unnamed: 0,Order Priority,Profit Margin Percentage
0,Critical,5.759158
1,High,4.734895
2,Low,4.759029
3,Medium,4.60994


In [11]:
# Order Priority (Count, Profit Margin, Dollars) within each Ship Mode
super_df.groupby(['Ship Mode','Order Priority'])['Profit Margin Percentage'].agg(['count','mean','sum']).reset_index()     

Unnamed: 0,Ship Mode,Order Priority,count,mean,sum
0,First Class,Critical,1734,4.677052,8110.008639
1,First Class,High,3413,2.585397,8823.960557
2,First Class,Medium,2358,3.833527,9039.456233
3,Same Day,Critical,742,8.21702,6097.028987
4,Same Day,High,1269,3.267872,4146.929458
5,Same Day,Medium,690,5.689389,3925.678157
6,Second Class,Critical,1456,5.795311,8437.973228
7,Second Class,High,4010,6.149506,24659.520773
8,Second Class,Medium,4843,4.430822,21458.470219
9,Standard Class,High,6809,5.252636,35765.198387


#### Returns

In [12]:
# Get Count of Returns
super_df.value_counts('Returned')

Returned
0.0    49070
1.0     2220
dtype: int64

##### By Country

In [13]:
# Returns total Dollars by Country
super_df.loc[super_df.Returned==1].groupby(['Country'])[['Returned $ Amount']].sum().sort_values(by = 'Returned $ Amount', ascending = False ).reset_index()

Unnamed: 0,Country,Returned $ Amount
0,United States,108118.09
1,Australia,41023.85
2,France,39419.79
3,China,31023.19
4,Germany,25606.92
...,...,...
92,Martinique,32.20
93,United Arab Emirates,23.64
94,Denmark,21.29
95,Zimbabwe,20.16


In [14]:
# Average return by Country
super_df.loc[super_df.Returned==1].groupby(['Country'])['Sales'].mean()

Country
Afghanistan      481.710000
Albania          424.065000
Algeria          303.484286
Angola           199.462500
Argentina        154.771875
                    ...    
United States    231.021560
Uruguay          157.980000
Vietnam           93.063333
Zambia           164.826000
Zimbabwe          10.080000
Name: Sales, Length: 97, dtype: float64

In [15]:
# Average return Profit Margin Percent for returned orders
super_df.loc[super_df.Returned==1].groupby(['Category', 'Sub-Category'])['Profit Margin Percentage'].mean()

Category         Sub-Category
Furniture        Bookcases       -0.821275
                 Chairs           3.282007
                 Furnishings      5.090651
                 Tables         -21.763412
Office Supplies  Appliances      -4.302427
                 Art              6.234819
                 Binders          4.769743
                 Envelopes        1.845386
                 Fasteners       12.253599
                 Labels          11.382428
                 Paper           20.336994
                 Storage          1.879159
                 Supplies        -1.863690
Technology       Accessories     12.605949
                 Copiers         12.291191
                 Machines         0.493772
                 Phones           5.792309
Name: Profit Margin Percentage, dtype: float64

In [16]:
# Returns total Dollars by Sub/Category
super_df.loc[super_df.Returned==1].groupby(['Category', 'Sub-Category'])[['Returned $ Amount']].sum().sort_values(by = 'Returned $ Amount', ascending = False ).reset_index()

Unnamed: 0,Category,Sub-Category,Returned $ Amount
0,Technology,Phones,69034.53
1,Technology,Copiers,65145.87
2,Furniture,Bookcases,60710.97
3,Furniture,Chairs,57439.13
4,Office Supplies,Storage,52938.59
5,Technology,Accessories,33991.08
6,Furniture,Tables,33283.18
7,Office Supplies,Appliances,32975.13
8,Technology,Machines,32550.63
9,Office Supplies,Binders,23571.8


##### By Category & Sub-Category

In [17]:
# Average return by Category and Subcategory

super_df.loc[super_df.Returned==1].groupby(['Category', 'Sub-Category'])['Sales'].mean()

Category         Sub-Category
Furniture        Bookcases       583.759327
                 Chairs          390.742381
                 Furnishings     122.828222
                 Tables          811.784878
Office Supplies  Appliances      558.900508
                 Art              74.753502
                 Binders          87.627509
                 Envelopes        68.527374
                 Fasteners        30.376275
                 Labels           24.446569
                 Paper            66.191533
                 Storage         249.710330
                 Supplies         80.851262
Technology       Accessories     246.312174
                 Copiers         658.039091
                 Machines        516.676667
                 Phones          476.100207
Name: Sales, dtype: float64

In [18]:
# Average return Profit Margin Percent for returned orders
super_df.loc[super_df.Returned==1].groupby(['Category', 'Sub-Category'])['Profit Margin Percentage'].mean()

Category         Sub-Category
Furniture        Bookcases       -0.821275
                 Chairs           3.282007
                 Furnishings      5.090651
                 Tables         -21.763412
Office Supplies  Appliances      -4.302427
                 Art              6.234819
                 Binders          4.769743
                 Envelopes        1.845386
                 Fasteners       12.253599
                 Labels          11.382428
                 Paper           20.336994
                 Storage          1.879159
                 Supplies        -1.863690
Technology       Accessories     12.605949
                 Copiers         12.291191
                 Machines         0.493772
                 Phones           5.792309
Name: Profit Margin Percentage, dtype: float64

In [19]:
# Dollars in Profit Lost to Returns by Subcategory
super_df.loc[super_df.Returned==1].groupby(['Category', 'Sub-Category'])['Profit Margin Percentage'].sum()

Category         Sub-Category
Furniture        Bookcases        -85.412617
                 Chairs           482.455097
                 Furnishings      687.237875
                 Tables          -892.299893
Office Supplies  Appliances      -253.843195
                 Art             1352.955817
                 Binders         1283.060967
                 Envelopes        182.693228
                 Fasteners       1249.867123
                 Labels          1559.392569
                 Paper           3050.549110
                 Storage          398.381630
                 Supplies        -191.960024
Technology       Accessories     1739.620927
                 Copiers         1216.827898
                 Machines          31.107626
                 Phones           839.884808
Name: Profit Margin Percentage, dtype: float64