In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.tools as tls
import plotly.figure_factory as ff 
from plotly.grid_objs import Grid, Column 
import colorlover as cl


path = 'https://s3.amazonaws.com/stockx-sneaker-analysis/wp-content/uploads/2019/02/StockX-Data-Contest-2019.xlsx'

df = pd.read_excel(path, sheet_name=1)

df.head()

Unnamed: 0,Order Date,Brand,Sneaker Name,Sale Price,Retail Price,Release Date,Shoe Size,Buyer Region
0,2017-09-01,Yeezy,Adidas-Yeezy-Boost-350-Low-V2-Beluga,1097.0,220,2016-09-24,11.0,California
1,2017-09-01,Yeezy,Adidas-Yeezy-Boost-350-V2-Core-Black-Copper,685.0,220,2016-11-23,11.0,California
2,2017-09-01,Yeezy,Adidas-Yeezy-Boost-350-V2-Core-Black-Green,690.0,220,2016-11-23,11.0,California
3,2017-09-01,Yeezy,Adidas-Yeezy-Boost-350-V2-Core-Black-Red,1075.0,220,2016-11-23,11.5,Kentucky
4,2017-09-01,Yeezy,Adidas-Yeezy-Boost-350-V2-Core-Black-Red-2017,828.0,220,2017-02-11,11.0,Rhode Island


In [2]:
#gonna do some simple subtraction!
def profit_after_fees(x,y):
    '''
    Taking into account StockX's seller fees at 9.5% maximum and the additional 3% transaction fees.
    Should be 12.5% max 
    https://help.stockx.com/selling-on-stockx/what-are-stockx-selling-fees
    '''
    profit = x - y
    return round((profit * .875),2)

In [3]:
data = df.copy()

In [4]:
data.head()

Unnamed: 0,Order Date,Brand,Sneaker Name,Sale Price,Retail Price,Release Date,Shoe Size,Buyer Region
0,2017-09-01,Yeezy,Adidas-Yeezy-Boost-350-Low-V2-Beluga,1097.0,220,2016-09-24,11.0,California
1,2017-09-01,Yeezy,Adidas-Yeezy-Boost-350-V2-Core-Black-Copper,685.0,220,2016-11-23,11.0,California
2,2017-09-01,Yeezy,Adidas-Yeezy-Boost-350-V2-Core-Black-Green,690.0,220,2016-11-23,11.0,California
3,2017-09-01,Yeezy,Adidas-Yeezy-Boost-350-V2-Core-Black-Red,1075.0,220,2016-11-23,11.5,Kentucky
4,2017-09-01,Yeezy,Adidas-Yeezy-Boost-350-V2-Core-Black-Red-2017,828.0,220,2017-02-11,11.0,Rhode Island


In [5]:
data['Profit After Fees'] = data.apply(lambda x: profit_after_fees(x['Sale Price'], x['Retail Price']), axis=1)


In [6]:
data['Favorite_by_State'] = data.groupby(['Buyer Region','Sneaker Name'])['Sneaker Name'].transform('count')
data['Favorite_by_Size'] = data.groupby(['Shoe Size','Sneaker Name'])['Sneaker Name'].transform('count')

In [7]:
state_favorite = data.groupby(['Buyer Region','Sneaker Name'], as_index=False)['Favorite_by_State'].max()
pivot_state_fav = state_favorite.pivot('Buyer Region', 'Sneaker Name', 'Favorite_by_State').fillna(0)
pivot_state_fav.head()

Sneaker Name,Adidas-Yeezy-Boost-350-Low-Moonrock,Adidas-Yeezy-Boost-350-Low-Oxford-Tan,Adidas-Yeezy-Boost-350-Low-Pirate-Black-2015,Adidas-Yeezy-Boost-350-Low-Pirate-Black-2016,Adidas-Yeezy-Boost-350-Low-Turtledove,Adidas-Yeezy-Boost-350-Low-V2-Beluga,Adidas-Yeezy-Boost-350-V2-Beluga-2pt0,Adidas-Yeezy-Boost-350-V2-Blue-Tint,Adidas-Yeezy-Boost-350-V2-Core-Black-Copper,Adidas-Yeezy-Boost-350-V2-Core-Black-Green,...,Nike-Blazer-Mid-Off-White-Wolf-Grey,Nike-React-Hyperdunk-2017-Flyknit-Off-White,Nike-Zoom-Fly-Mercurial-Off-White-Black,Nike-Zoom-Fly-Mercurial-Off-White-Total-Orange,Nike-Zoom-Fly-Off-White,Nike-Zoom-Fly-Off-White-Black-Silver,Nike-Zoom-Fly-Off-White-Pink,adidas-Yeezy-Boost-350-V2-Butter,adidas-Yeezy-Boost-350-V2-Static,adidas-Yeezy-Boost-350-V2-Static-Reflective
Buyer Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Alabama,0.0,0.0,0.0,1.0,0.0,4.0,47.0,47.0,0.0,0.0,...,0.0,1.0,1.0,3.0,2.0,5.0,12.0,48.0,23.0,16.0
Alaska,0.0,0.0,0.0,0.0,0.0,0.0,4.0,5.0,0.0,0.0,...,0.0,0.0,0.0,1.0,1.0,1.0,1.0,4.0,5.0,0.0
Arizona,1.0,0.0,1.0,1.0,1.0,5.0,147.0,139.0,1.0,2.0,...,6.0,14.0,10.0,6.0,6.0,14.0,22.0,164.0,59.0,29.0
Arkansas,1.0,0.0,0.0,0.0,0.0,1.0,20.0,18.0,1.0,0.0,...,1.0,1.0,0.0,1.0,2.0,1.0,1.0,26.0,13.0,3.0
California,25.0,16.0,28.0,38.0,22.0,115.0,2146.0,1682.0,58.0,58.0,...,47.0,118.0,174.0,160.0,117.0,250.0,177.0,1810.0,923.0,696.0


In [34]:
pivot_state_fav['Favorite_Sneaker'] = pivot_state_fav[['Adidas-Yeezy-Boost-350-Low-Moonrock',
       'Adidas-Yeezy-Boost-350-Low-Oxford-Tan',
       'Adidas-Yeezy-Boost-350-Low-Pirate-Black-2015',
       'Adidas-Yeezy-Boost-350-Low-Pirate-Black-2016',
       'Adidas-Yeezy-Boost-350-Low-Turtledove',
       'Adidas-Yeezy-Boost-350-Low-V2-Beluga',
       'Adidas-Yeezy-Boost-350-V2-Beluga-2pt0',
       'Adidas-Yeezy-Boost-350-V2-Blue-Tint',
       'Adidas-Yeezy-Boost-350-V2-Core-Black-Copper',
       'Adidas-Yeezy-Boost-350-V2-Core-Black-Green',
       'Adidas-Yeezy-Boost-350-V2-Core-Black-Red',
       'Adidas-Yeezy-Boost-350-V2-Core-Black-Red-2017',
       'Adidas-Yeezy-Boost-350-V2-Core-Black-White',
       'Adidas-Yeezy-Boost-350-V2-Cream-White',
       'Adidas-Yeezy-Boost-350-V2-Semi-Frozen-Yellow',
       'Adidas-Yeezy-Boost-350-V2-Sesame', 'Adidas-Yeezy-Boost-350-V2-Zebra',
       'Air-Jordan-1-Retro-High-Off-White-Chicago',
       'Air-Jordan-1-Retro-High-Off-White-University-Blue',
       'Air-Jordan-1-Retro-High-Off-White-White',
       'Nike-Air-Force-1-Low-Off-White',
       'Nike-Air-Force-1-Low-Off-White-Black-White',
       'Nike-Air-Force-1-Low-Off-White-Volt',
       'Nike-Air-Force-1-Low-Virgil-Abloh-Off-White-AF100',
       'Nike-Air-Max-90-Off-White', 'Nike-Air-Max-90-Off-White-Black',
       'Nike-Air-Max-90-Off-White-Desert-Ore', 'Nike-Air-Max-97-Off-White',
       'Nike-Air-Max-97-Off-White-Black',
       'Nike-Air-Max-97-Off-White-Elemental-Rose-Queen',
       'Nike-Air-Max-97-Off-White-Menta', 'Nike-Air-Presto-Off-White',
       'Nike-Air-Presto-Off-White-Black-2018',
       'Nike-Air-Presto-Off-White-White-2018', 'Nike-Air-VaporMax-Off-White',
       'Nike-Air-VaporMax-Off-White-2018', 'Nike-Air-VaporMax-Off-White-Black',
       'Nike-Blazer-Mid-Off-White',
       'Nike-Blazer-Mid-Off-White-All-Hallows-Eve',
       'Nike-Blazer-Mid-Off-White-Grim-Reaper',
       'Nike-Blazer-Mid-Off-White-Wolf-Grey',
       'Nike-React-Hyperdunk-2017-Flyknit-Off-White',
       'Nike-Zoom-Fly-Mercurial-Off-White-Black',
       'Nike-Zoom-Fly-Mercurial-Off-White-Total-Orange',
       'Nike-Zoom-Fly-Off-White', 'Nike-Zoom-Fly-Off-White-Black-Silver',
       'Nike-Zoom-Fly-Off-White-Pink', 'adidas-Yeezy-Boost-350-V2-Butter',
       'adidas-Yeezy-Boost-350-V2-Static',
       'adidas-Yeezy-Boost-350-V2-Static-Reflective']].max(axis=1)

In [43]:
pivot_state_fav['Favorite_Sneaker'].value_counts

<bound method IndexOpsMixin.value_counts of Buyer Region
Alabama                   71.0
Alaska                     9.0
Arizona                  164.0
Arkansas                  26.0
California              2146.0
Colorado                 131.0
Connecticut              145.0
Delaware                 236.0
District of Columbia      36.0
Florida                  695.0
Georgia                  210.0
Hawaii                    38.0
Idaho                     14.0
Illinois                 501.0
Indiana                  130.0
Iowa                      61.0
Kansas                    46.0
Kentucky                  55.0
Louisiana                 69.0
Maine                     30.0
Maryland                 205.0
Massachusetts            342.0
Michigan                 341.0
Minnesota                 80.0
Mississippi               25.0
Missouri                  84.0
Montana                    9.0
Nebraska                  37.0
Nevada                    73.0
New Hampshire             36.0
New Jersey   

In [9]:
l = []
for i in range(0,len(pivot_state_fav)):
    max_val = (pivot_state_fav == pivot_state_fav['Favorite_Sneaker'][i]).idxmax(axis=1)[i]
    l.append(max_val)

new_df = pd.DataFrame(l,columns={'Favorite_Sneaker'})
new_df['State'] = pivot_state_fav.index
new_df.head()

Unnamed: 0,Favorite_Sneaker,State
0,Adidas-Yeezy-Boost-350-V2-Zebra,Alabama
1,Adidas-Yeezy-Boost-350-V2-Cream-White,Alaska
2,adidas-Yeezy-Boost-350-V2-Butter,Arizona
3,Adidas-Yeezy-Boost-350-V2-Cream-White,Arkansas
4,Adidas-Yeezy-Boost-350-V2-Beluga-2pt0,California


In [27]:
size_favorite = data.groupby(['Shoe Size', 'Sneaker Name'], as_index=False)['Favorite_by_Size'].max()
pivot_size_fav = size_favorite.pivot('Shoe Size', 'Sneaker Name', 'Favorite_by_Size').fillna(0)

pivot_size_fav.head()

Sneaker Name,Adidas-Yeezy-Boost-350-Low-Moonrock,Adidas-Yeezy-Boost-350-Low-Oxford-Tan,Adidas-Yeezy-Boost-350-Low-Pirate-Black-2015,Adidas-Yeezy-Boost-350-Low-Pirate-Black-2016,Adidas-Yeezy-Boost-350-Low-Turtledove,Adidas-Yeezy-Boost-350-Low-V2-Beluga,Adidas-Yeezy-Boost-350-V2-Beluga-2pt0,Adidas-Yeezy-Boost-350-V2-Blue-Tint,Adidas-Yeezy-Boost-350-V2-Core-Black-Copper,Adidas-Yeezy-Boost-350-V2-Core-Black-Green,...,Nike-Blazer-Mid-Off-White-Wolf-Grey,Nike-React-Hyperdunk-2017-Flyknit-Off-White,Nike-Zoom-Fly-Mercurial-Off-White-Black,Nike-Zoom-Fly-Mercurial-Off-White-Total-Orange,Nike-Zoom-Fly-Off-White,Nike-Zoom-Fly-Off-White-Black-Silver,Nike-Zoom-Fly-Off-White-Pink,adidas-Yeezy-Boost-350-V2-Butter,adidas-Yeezy-Boost-350-V2-Static,adidas-Yeezy-Boost-350-V2-Static-Reflective
Shoe Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4.0,2.0,0.0,0.0,0.0,0.0,12.0,296.0,233.0,1.0,1.0,...,12.0,1.0,0.0,0.0,4.0,26.0,25.0,312.0,155.0,120.0
4.5,2.0,0.0,0.0,0.0,0.0,1.0,65.0,34.0,1.0,1.0,...,13.0,0.0,0.0,0.0,0.0,14.0,30.0,221.0,135.0,92.0
5.0,1.0,0.0,0.0,1.0,1.0,16.0,437.0,428.0,3.0,5.0,...,16.0,1.0,0.0,0.0,1.0,32.0,42.0,598.0,236.0,200.0
5.5,3.0,0.0,0.0,1.0,0.0,2.0,200.0,162.0,4.0,1.0,...,7.0,2.0,0.0,0.0,2.0,25.0,28.0,586.0,232.0,212.0


In [38]:
# pivot_size_fav.columns

pivot_size_fav['Favorite_Sneaker'] = pivot_size_fav[['Adidas-Yeezy-Boost-350-Low-Moonrock',
       'Adidas-Yeezy-Boost-350-Low-Oxford-Tan',
       'Adidas-Yeezy-Boost-350-Low-Pirate-Black-2015',
       'Adidas-Yeezy-Boost-350-Low-Pirate-Black-2016',
       'Adidas-Yeezy-Boost-350-Low-Turtledove',
       'Adidas-Yeezy-Boost-350-Low-V2-Beluga',
       'Adidas-Yeezy-Boost-350-V2-Beluga-2pt0',
       'Adidas-Yeezy-Boost-350-V2-Blue-Tint',
       'Adidas-Yeezy-Boost-350-V2-Core-Black-Copper',
       'Adidas-Yeezy-Boost-350-V2-Core-Black-Green',
       'Adidas-Yeezy-Boost-350-V2-Core-Black-Red',
       'Adidas-Yeezy-Boost-350-V2-Core-Black-Red-2017',
       'Adidas-Yeezy-Boost-350-V2-Core-Black-White',
       'Adidas-Yeezy-Boost-350-V2-Cream-White',
       'Adidas-Yeezy-Boost-350-V2-Semi-Frozen-Yellow',
       'Adidas-Yeezy-Boost-350-V2-Sesame', 'Adidas-Yeezy-Boost-350-V2-Zebra',
       'Air-Jordan-1-Retro-High-Off-White-Chicago',
       'Air-Jordan-1-Retro-High-Off-White-University-Blue',
       'Air-Jordan-1-Retro-High-Off-White-White',
       'Nike-Air-Force-1-Low-Off-White',
       'Nike-Air-Force-1-Low-Off-White-Black-White',
       'Nike-Air-Force-1-Low-Off-White-Volt',
       'Nike-Air-Force-1-Low-Virgil-Abloh-Off-White-AF100',
       'Nike-Air-Max-90-Off-White', 'Nike-Air-Max-90-Off-White-Black',
       'Nike-Air-Max-90-Off-White-Desert-Ore', 'Nike-Air-Max-97-Off-White',
       'Nike-Air-Max-97-Off-White-Black',
       'Nike-Air-Max-97-Off-White-Elemental-Rose-Queen',
       'Nike-Air-Max-97-Off-White-Menta', 'Nike-Air-Presto-Off-White',
       'Nike-Air-Presto-Off-White-Black-2018',
       'Nike-Air-Presto-Off-White-White-2018', 'Nike-Air-VaporMax-Off-White',
       'Nike-Air-VaporMax-Off-White-2018', 'Nike-Air-VaporMax-Off-White-Black',
       'Nike-Blazer-Mid-Off-White',
       'Nike-Blazer-Mid-Off-White-All-Hallows-Eve',
       'Nike-Blazer-Mid-Off-White-Grim-Reaper',
       'Nike-Blazer-Mid-Off-White-Wolf-Grey',
       'Nike-React-Hyperdunk-2017-Flyknit-Off-White',
       'Nike-Zoom-Fly-Mercurial-Off-White-Black',
       'Nike-Zoom-Fly-Mercurial-Off-White-Total-Orange',
       'Nike-Zoom-Fly-Off-White', 'Nike-Zoom-Fly-Off-White-Black-Silver',
       'Nike-Zoom-Fly-Off-White-Pink', 'adidas-Yeezy-Boost-350-V2-Butter',
       'adidas-Yeezy-Boost-350-V2-Static',
       'adidas-Yeezy-Boost-350-V2-Static-Reflective']].max(axis=1)

In [54]:
len(pivot_size_fav.columns) == len(pivot_state_fav.columns)

True

In [56]:
data['Sneaker Name'].nunique()

50

In [40]:
pivot_size_fav['Favorite_Sneaker'].value_counts

<bound method IndexOpsMixin.value_counts of Shoe Size
3.5        2.0
4.0      312.0
4.5      221.0
5.0      598.0
5.5      586.0
6.0      618.0
6.5      262.0
7.0      439.0
7.5      321.0
8.0      682.0
8.5      597.0
9.0     1207.0
9.5      998.0
10.0    1261.0
10.5     961.0
11.0    1028.0
11.5     539.0
12.0     780.0
12.5     106.0
13.0     469.0
13.5      30.0
14.0     238.0
14.5      20.0
15.0      18.0
16.0      19.0
17.0       3.0
Name: Favorite_Sneaker, dtype: float64>

In [44]:
pivot_size_fav.index = pivot_size_fav.index.map(str)

In [46]:
m = []
for i in range(0,len(pivot_size_fav)):
    max_val = (pivot_size_fav == pivot_size_fav['Favorite_Sneaker'][i]).idxmax(axis=1)[i]
    m.append(max_val)

new_df2 = pd.DataFrame(m,columns={'Favorite_Sneaker'})
new_df2['Size'] = pivot_size_fav.index
new_df2.head()

Unnamed: 0,Favorite_Sneaker,Size
0,Nike-Air-VaporMax-Off-White-2018,3.5
1,adidas-Yeezy-Boost-350-V2-Butter,4.0
2,adidas-Yeezy-Boost-350-V2-Butter,4.5
3,adidas-Yeezy-Boost-350-V2-Butter,5.0
4,adidas-Yeezy-Boost-350-V2-Butter,5.5


In [53]:
# do by favorite sneaker by size

# sizes = []

# for i in range(0, len

data.tail()

Unnamed: 0,Order Date,Brand,Sneaker Name,Sale Price,Retail Price,Release Date,Shoe Size,Buyer Region,Profit After Fees,Favorite_by_State,Favorite_by_Size
99951,2019-02-13,Yeezy,adidas-Yeezy-Boost-350-V2-Static-Reflective,565.0,220,2018-12-26,8.0,Oregon,301.88,893,183
99952,2019-02-13,Yeezy,adidas-Yeezy-Boost-350-V2-Static-Reflective,598.0,220,2018-12-26,8.5,California,330.75,696,272
99953,2019-02-13,Yeezy,adidas-Yeezy-Boost-350-V2-Static-Reflective,605.0,220,2018-12-26,5.5,New York,336.88,339,212
99954,2019-02-13,Yeezy,adidas-Yeezy-Boost-350-V2-Static-Reflective,650.0,220,2018-12-26,11.0,California,376.25,696,81
99955,2019-02-13,Yeezy,adidas-Yeezy-Boost-350-V2-Static-Reflective,640.0,220,2018-12-26,11.5,Texas,367.5,117,31


# So for more analysis

- Created two data frames, favorite by state, favorite by size
- We still can do some more time series data?
- Honestly I want to do some predictions
- Like we can do logistic regression?
- We regonize what our continuous variables are..
- We have release dates.... so we can do some type of subtraction to measure how long it takes to resell a sneaker for profit
- THERE IS NEGATIVE PROFIT WE CAN LOGISTIC REGRESSION!!!


In [75]:
(data['Profit After Fees'] <= 0.0).value_counts()

False    99070
True       886
Name: Profit After Fees, dtype: int64

In [77]:
print(886/99070)

0.008943171494902594


In [79]:
# use np.busday_count(begindates, enddates, weekmask='1111111')
# we want everyday
order_dates = data['Order Date']
release_dates = data['Release Date']

len(order_dates) == len(release_dates)

for i 

True

In [50]:
off_white = df.copy()
off_white = off_white[off_white['Brand'].isin(['Off-White'])]

yeezy = df.copy()
yeezy = yeezy[yeezy['Brand'].isin([' Yeezy'])]

print('Shape of Off-White dataframe : {}'.format(off_white.shape))
print('Shape of Yeezy dataframe : {}'.format(yeezy.shape))

Shape of Off-White dataframe : (27794, 8)
Shape of Yeezy dataframe : (72162, 8)


In [51]:
off_white['Profit After Fees'] = off_white.apply(lambda x: profit_after_fees(x['Sale Price'],x['Retail Price']), axis=1)
yeezy['Profit After Fees'] = yeezy.apply(lambda x: profit_after_fees(x['Sale Price'],x['Retail Price']), axis=1)


In [52]:
off_white['Favorite'] = off_white.groupby(['Buyer Region','Sneaker Name'])['Sneaker Name'].transform('count')
yeezy['Favorite'] = yeezy.groupby(['Buyer Region','Sneaker Name'])['Sneaker Name'].transform('count')