In [1]:
# import dependencies
import pandas as pd
import datetime as dt
import numpy as np

In [2]:
# read in Superstore data (all orders & countries)
super_topcountries = pd.read_csv('resources/superstore_topcountries.csv')
super_topcountries.head()

Unnamed: 0,Order ID,index,Order Date,Ship Date,Ship Mode,Segment,City,State,Country,Region,...,Quantity,Discount,Profit,Shipping Cost,Order Priority,Days to Ship,Returned,Profit Margin Percentage,Returned $ Amount,Orders per Country
0,CA-2014-AB10015140-41954,0,2014-11-11,2014-11-13,First Class,Consumer,Oklahoma City,Oklahoma,United States,Central US,...,2,0.0,62.15,40.77,High,2,0.0,27.998018,0.0,9994
1,IN-2014-JR162107-41675,1,2014-02-05,2014-02-07,Second Class,Corporate,Wollongong,New South Wales,Australia,Oceania,...,9,0.1,-288.77,923.63,Critical,2,0.0,-7.784817,0.0,2837
2,IN-2014-CR127307-41929,2,2014-10-17,2014-10-18,First Class,Consumer,Brisbane,Queensland,Australia,Oceania,...,9,0.1,919.97,915.49,Medium,1,0.0,17.776614,0.0,2837
3,ES-2014-KM1637548-41667,3,2014-01-28,2014-01-30,First Class,Home Office,Berlin,Berlin,Germany,Western Europe,...,5,0.1,-96.54,910.16,Medium,2,0.0,-3.337586,0.0,2063
4,IN-2014-JM156557-41818,5,2014-06-28,2014-07-01,Second Class,Corporate,Sydney,New South Wales,Australia,Oceania,...,5,0.1,763.28,897.35,Critical,3,0.0,26.663127,0.0,2837


In [3]:
super_topcountries.dtypes

Order ID                     object
index                         int64
Order Date                   object
Ship Date                    object
Ship Mode                    object
Segment                      object
City                         object
State                        object
Country                      object
Region                       object
Market                       object
Category                     object
Sub-Category                 object
Sales                       float64
Quantity                      int64
Discount                    float64
Profit                      float64
Shipping Cost               float64
Order Priority               object
Days to Ship                  int64
Returned                    float64
Profit Margin Percentage    float64
Returned $ Amount           float64
Orders per Country            int64
dtype: object

## Data Exploration - Orders from Top Countries

#### Sales & Profit

In [4]:
# Average Sale Price by Category 

super_topcountries[["Category", "Sales"]].groupby("Category").mean().reset_index()

Unnamed: 0,Category,Sales
0,Furniture,427.466502
1,Office Supplies,125.94128
2,Technology,487.612912


In [5]:
# Average Sales Price by Sub_Category

super_topcountries.groupby(['Category', 'Sub-Category'])['Sales'].mean().reset_index()

Unnamed: 0,Category,Sub-Category,Sales
0,Furniture,Bookcases,630.246783
1,Furniture,Chairs,454.275702
2,Furniture,Furnishings,122.435771
3,Furniture,Tables,874.658308
4,Office Supplies,Appliances,582.423241
5,Office Supplies,Art,77.712536
6,Office Supplies,Binders,84.048078
7,Office Supplies,Envelopes,74.454187
8,Office Supplies,Fasteners,35.016496
9,Office Supplies,Labels,29.476869


In [6]:
# Sales totals by Sub_Category
super_topcountries.groupby(['Category', 'Sub-Category'])['Sales'].sum().reset_index()

Unnamed: 0,Category,Sub-Category,Sales
0,Furniture,Bookcases,1136334.95
1,Furniture,Chairs,1220638.81
2,Furniture,Furnishings,304008.02
3,Furniture,Tables,625380.69
4,Office Supplies,Appliances,781611.99
5,Office Supplies,Art,266554.0
6,Office Supplies,Binders,388806.41
7,Office Supplies,Envelopes,130890.46
8,Office Supplies,Fasteners,68457.25
9,Office Supplies,Labels,57715.71


In [7]:
# Sales totals by country
super_topcountries.groupby(['Country'])['Sales'].sum().sort_values(ascending=False).reset_index()

Unnamed: 0,Country,Sales
0,United States,2297201.07
1,Australia,925236.91
2,France,858931.65
3,China,700562.09
4,Germany,628136.55
5,Mexico,621552.81
6,India,589486.12
7,United Kingdom,528576.55
8,Indonesia,404887.67
9,Brazil,359924.85


In [8]:
# Profit Margin by Sub/Category
super_topcountries.groupby(['Category','Sub-Category'])['Profit Margin Percentage'].mean()

Category         Sub-Category
Furniture        Bookcases        0.541087
                 Chairs           1.435187
                 Furnishings      5.026421
                 Tables         -18.744122
Office Supplies  Appliances      -2.090619
                 Art              6.287120
                 Binders         -2.270469
                 Envelopes        9.670892
                 Fasteners        7.444036
                 Labels          13.413669
                 Paper           22.434320
                 Storage         -0.383588
                 Supplies         4.090786
Technology       Accessories     10.330371
                 Copiers          8.028268
                 Machines        -7.638471
                 Phones           4.199878
Name: Profit Margin Percentage, dtype: float64

#### Order Priority & Shipping

In [9]:
# Count for each Order Priority
super_topcountries[['Order Priority', 'Ship Mode']].groupby('Order Priority').count().reset_index()

Unnamed: 0,Order Priority,Ship Mode
0,Critical,2978
1,High,11642
2,Low,1853
3,Medium,22141


In [10]:
# Shipping Cost for each Order Priority
super_topcountries.groupby(['Order Priority'])['Shipping Cost'].mean().reset_index()

Unnamed: 0,Order Priority,Shipping Cost
0,Critical,61.481847
1,High,34.453142
2,Low,28.957864
3,Medium,19.216954


In [11]:
# Shipping Cost for each Sub/Category
super_topcountries.groupby(['Category','Sub-Category'])['Shipping Cost'].mean().sort_values(ascending=False)

Category         Sub-Category
Furniture        Tables          90.591274
Technology       Copiers         76.598332
Furniture        Bookcases       66.918057
Office Supplies  Appliances      62.043388
Technology       Machines        58.481199
                 Phones          55.269026
Furniture        Chairs          49.710404
Technology       Accessories     28.051420
Office Supplies  Storage         25.130849
Furniture        Furnishings     13.190373
Office Supplies  Supplies        11.072042
                 Binders          8.975482
                 Art              8.686851
                 Envelopes        8.106120
                 Paper            7.802928
                 Fasteners        3.995608
                 Labels           3.547345
Name: Shipping Cost, dtype: float64

In [12]:
# Average Profit Margin by Order Priority
super_topcountries[['Order Priority', 'Profit Margin Percentage']].groupby('Order Priority').mean().reset_index()

Unnamed: 0,Order Priority,Profit Margin Percentage
0,Critical,5.830454
1,High,4.959978
2,Low,4.976048
3,Medium,4.373748


In [13]:
# Order Priority (Count, Profit Margin, Dollars) within each Ship Mode
super_topcountries.groupby(['Ship Mode','Order Priority'])['Profit Margin Percentage'].agg(['count','mean','sum']).reset_index()  

Unnamed: 0,Ship Mode,Order Priority,count,mean,sum
0,First Class,Critical,1283,5.12384,6573.886299
1,First Class,High,2638,3.590731,9472.348079
2,First Class,Medium,1772,4.297597,7615.342422
3,Same Day,Critical,576,9.223021,5312.460225
4,Same Day,High,925,3.66593,3390.985483
5,Same Day,Medium,500,3.384213,1692.10646
6,Second Class,Critical,1119,4.894321,5476.745041
7,Second Class,High,2910,6.38931,18592.892196
8,Second Class,Medium,3651,4.22708,15433.068225
9,Standard Class,High,5169,5.085673,26287.843896


#### Returns

In [14]:
# Get Count of Returns
super_topcountries.value_counts('Returned')

Returned
0.0    36876
1.0     1738
dtype: int64

##### By Country

In [15]:
# Returns total Dollars by Country
super_topcountries.loc[super_topcountries.Returned==1].groupby(['Country'])[['Returned $ Amount']].sum().sort_values(by = 'Returned $ Amount', ascending = False ).reset_index()

Unnamed: 0,Country,Returned $ Amount
0,United States,108118.09
1,Australia,41023.85
2,France,39419.79
3,China,31023.19
4,Germany,25606.92
5,Brazil,22436.3
6,Indonesia,19576.87
7,Mexico,19275.77
8,India,18087.15
9,United Kingdom,16597.07


In [16]:
# Average return by Country
super_topcountries.loc[super_topcountries.Returned==1].groupby(['Country'])['Sales'].mean()

Country
Australia             323.022441
Brazil                273.613415
China                 319.826701
Cuba                  140.978929
Dominican Republic    117.973438
El Salvador           121.628750
France                317.901532
Germany               324.138228
Guatemala             307.188750
Honduras               86.133125
India                 278.263846
Indonesia             261.024933
Iran                  128.550000
Italy                 203.390159
Mexico                196.691531
New Zealand           219.240741
Nicaragua             244.818649
Nigeria                37.720000
Philippines           271.233571
Spain                 420.590000
Turkey                 75.662031
United Kingdom        251.470758
United States         231.021560
Name: Sales, dtype: float64

In [17]:
# Average return Profit Margin Percent for returned orders
super_topcountries.loc[super_topcountries.Returned==1].groupby(['Category', 'Sub-Category'])['Profit Margin Percentage'].mean()

Category         Sub-Category
Furniture        Bookcases        0.126531
                 Chairs           1.350439
                 Furnishings      2.533391
                 Tables         -17.806220
Office Supplies  Appliances      -5.454726
                 Art              7.400597
                 Binders          4.547258
                 Envelopes        1.826378
                 Fasteners       11.133885
                 Labels          14.598796
                 Paper           23.908546
                 Storage          2.234042
                 Supplies        -4.527110
Technology       Accessories     12.691426
                 Copiers         13.064029
                 Machines        -2.782203
                 Phones           5.445291
Name: Profit Margin Percentage, dtype: float64

In [18]:
# Returns total Dollars by Sub/Category
super_topcountries.loc[super_topcountries.Returned==1].groupby(['Category', 'Sub-Category'])[['Returned $ Amount']].mean().sort_values(by = 'Returned $ Amount', ascending = False ).reset_index()

Unnamed: 0,Category,Sub-Category,Returned $ Amount
0,Furniture,Tables,752.843939
1,Technology,Copiers,725.146232
2,Office Supplies,Appliances,594.249362
3,Furniture,Bookcases,571.457654
4,Technology,Machines,570.420222
5,Technology,Phones,476.218018
6,Furniture,Chairs,374.746641
7,Technology,Accessories,262.317265
8,Office Supplies,Storage,259.553354
9,Furniture,Furnishings,124.228942


##### By Category & Sub-Category

In [19]:
# Average return by Category and Subcategory
super_topcountries.loc[super_topcountries.Returned==1].groupby(['Category', 'Sub-Category'])['Sales'].mean()

Category         Sub-Category
Furniture        Bookcases       571.457654
                 Chairs          374.746641
                 Furnishings     124.228942
                 Tables          752.843939
Office Supplies  Appliances      594.249362
                 Art              74.076604
                 Binders          94.026498
                 Envelopes        74.544875
                 Fasteners        29.179000
                 Labels           24.516224
                 Paper            67.697970
                 Storage         259.553354
                 Supplies         82.095256
Technology       Accessories     262.317265
                 Copiers         725.146232
                 Machines        570.420222
                 Phones          476.218018
Name: Sales, dtype: float64

In [20]:
# Average return Profit Margin Percent for returned orders
super_topcountries.loc[super_topcountries.Returned==1].groupby(['Category', 'Sub-Category'])['Profit Margin Percentage'].mean()

Category         Sub-Category
Furniture        Bookcases        0.126531
                 Chairs           1.350439
                 Furnishings      2.533391
                 Tables         -17.806220
Office Supplies  Appliances      -5.454726
                 Art              7.400597
                 Binders          4.547258
                 Envelopes        1.826378
                 Fasteners       11.133885
                 Labels          14.598796
                 Paper           23.908546
                 Storage          2.234042
                 Supplies        -4.527110
Technology       Accessories     12.691426
                 Copiers         13.064029
                 Machines        -2.782203
                 Phones           5.445291
Name: Profit Margin Percentage, dtype: float64