In [2]:
import numpy as np
import pandas as pd

ds = pd.read_csv("./data/cleaned_sneaks.csv")

In [22]:
ds.isna().sum()

Unnamed: 0                0
item                      0
brand                     0
retail                    0
release                  90
lowestAsk                 0
numberOfAsks              0
salesThisPeriod           0
highestBid                0
numberOfBids              0
annualHigh                0
annualLow                 0
volatility                0
deadstockSold             0
pricePremium              0
averageDeadstockPrice     0
lastSale                  0
changePercentage          0
img_path                  0
dtype: int64

In [23]:
# fix null release dates by taking the mode of each brand
ds["release"] = ds.groupby("brand")["release"].transform(lambda x: x.fillna(x.mode()[0] if not x.mode().empty else "Unkown"))

In [24]:
# change date format and check for null dates
ds["release"] = pd.to_datetime(ds["release"], errors="coerce")
missing_dates = ds["release"].isna().sum()

print(f"missing dates: {missing_dates}")
ds.head()

missing dates: 0


Unnamed: 0.1,Unnamed: 0,item,brand,retail,release,lowestAsk,numberOfAsks,salesThisPeriod,highestBid,numberOfBids,annualHigh,annualLow,volatility,deadstockSold,pricePremium,averageDeadstockPrice,lastSale,changePercentage,img_path
0,0,Jordan 4 Retro SB Pine Green,Jordan,225,2023-03-21,325,1995,2675,480,3697,952,280,0.061347,5408,0.542,388,347,0.0,feetflix/Jordan 4 Retro SB Pine Green.jpg
1,1,Jordan 3 Retro White Cement Reimagined,Jordan,210,2023-03-11,190,2361,1289,280,2597,757,150,0.115522,16894,0.111,297,234,0.016666,feetflix/Jordan 3 Retro White Cement Reimagine...
2,2,Nike Air Force 1 Low Tiffany & Co. 1837,Nike,400,2023-03-07,893,1310,282,1526,2436,4831,705,0.232434,4711,1.125,1160,850,-0.132653,feetflix/Nike Air Force 1 Low Tiffany & Co. 18...
3,3,Nike Air Max 1 '86 Big Bubble Sport Red,Nike,150,2023-03-26,177,704,240,200,366,550,150,0.095096,603,0.2,228,180,0.0,feetflix/Nike Air Max 1 '86 Big Bubble Sport R...
4,4,MSCHF Big Red Boot,MSCHF,350,2023-02-16,570,334,68,611,345,2445,498,0.186197,323,1.373,883,831,0.037438,feetflix/MSCHF Big Red Boot.jpg


In [26]:
# feature extraction: dropping unused columns
ds.drop(columns=["Unnamed: 0", "numberOfAsks", "salesThisPeriod", "numberOfBids", "changePercentage"], inplace=True)

ds.head(10)

Unnamed: 0,item,brand,retail,release,lowestAsk,highestBid,annualHigh,annualLow,volatility,deadstockSold,pricePremium,averageDeadstockPrice,lastSale,img_path
0,Jordan 4 Retro SB Pine Green,Jordan,225,2023-03-21,325,480,952,280,0.061347,5408,0.542,388,347,feetflix/Jordan 4 Retro SB Pine Green.jpg
1,Jordan 3 Retro White Cement Reimagined,Jordan,210,2023-03-11,190,280,757,150,0.115522,16894,0.111,297,234,feetflix/Jordan 3 Retro White Cement Reimagine...
2,Nike Air Force 1 Low Tiffany & Co. 1837,Nike,400,2023-03-07,893,1526,4831,705,0.232434,4711,1.125,1160,850,feetflix/Nike Air Force 1 Low Tiffany & Co. 18...
3,Nike Air Max 1 '86 Big Bubble Sport Red,Nike,150,2023-03-26,177,200,550,150,0.095096,603,0.2,228,180,feetflix/Nike Air Max 1 '86 Big Bubble Sport R...
4,MSCHF Big Red Boot,MSCHF,350,2023-02-16,570,611,2445,498,0.186197,323,1.373,883,831,feetflix/MSCHF Big Red Boot.jpg
5,New Balance 990v6 Action Bronson Baklava,New Balance,220,2023-03-17,320,450,844,251,0.13197,281,0.586,403,349,feetflix/New Balance 990v6 Action Bronson Bakl...
6,Nike Dunk Low Retro White Black Panda (2021),Nike,110,2021-03-10,139,165,697,0,0.046333,233962,0.318,191,145,feetflix/Nike Dunk Low Retro White Black Panda...
7,Nike Dunk Low Retro White Black Panda (2021) (...,Nike,100,2021-03-10,130,141,308,110,0.059427,109763,0.44,179,144,feetflix/Nike Dunk Low Retro White Black Panda...
8,Nike Dunk Low Grey Fog,Nike,100,2021-09-21,145,265,569,120,0.09535,25651,0.69,216,169,feetflix/Nike Dunk Low Grey Fog.jpg
9,Nike Dunk Low Retro White Black Panda (2021) (GS),Nike,85,2021-03-10,112,126,278,88,0.042461,87660,0.529,162,130,feetflix/Nike Dunk Low Retro White Black Panda...


In [35]:
# found issue with duplicate brand name
ds[ds["brand"].isin(["ASICS", "Asics"])].head()
ds["brand"] = ds["brand"].replace({"Asics": "ASICS"})
ds[ds["brand"].isin(["ASICS"])].head()

Unnamed: 0,item,brand,retail,release,lowestAsk,highestBid,annualHigh,annualLow,volatility,deadstockSold,pricePremium,averageDeadstockPrice,lastSale,img_path
563,ASICS ACTIBREEZE 3D Sandal Black,ASICS,80,2022-07-21,117,140,375,80,0.144152,544,0.938,158,155,feetflix/ASICS ACTIBREEZE 3D Sandal Black.jpg
926,ASICS Gel-1130 Cream Pure Silver (Women's),ASICS,70,2022-07-21,78,90,219,78,0.111731,248,0.129,121,79,feetflix/ASICS Gel-1130 Cream Pure Silver (Wom...
929,ASICS Gel-Kayano 14 White Midnight (Women's),ASICS,180,2022-07-21,145,255,381,110,0.273995,133,-0.179,192,148,feetflix/ASICS Gel-Kayano 14 White Midnight (W...
1433,ASICS Gel-Lyte III Sneaker Politics Always Ready,ASICS,150,2022-05-30,140,280,500,120,0.091679,216,0.353,195,203,feetflix/ASICS Gel-Lyte III Sneaker Politics A...
1965,ASICS Gel-Lyte III AFEW Beauty of Imperfection,ASICS,170,2022-02-26,110,185,570,101,0.123714,372,-0.353,201,110,feetflix/ASICS Gel-Lyte III AFEW Beauty of Imp...


In [40]:
# check for extremely large values
print(ds.describe().T[["min", "max"]])
print(ds.isna().sum())

                                       min                  max
retail                                 0.0               1110.0
release                2004-01-01 00:00:00  2023-04-22 00:00:00
lowestAsk                             37.0               1804.0
highestBid                            50.0               3167.0
annualHigh                            83.0               6500.0
annualLow                              0.0               1258.0
volatility                             0.0             0.754551
deadstockSold                         18.0             233962.0
pricePremium                        -0.889               16.654
averageDeadstockPrice                 50.0               2319.0
lastSale                              36.0               2295.0
item                     0
brand                    0
retail                   0
release                  0
lowestAsk                0
highestBid               0
annualHigh               0
annualLow                0
volatility      

In [38]:
# replace inf values with NaN
ds.replace([np.inf, -np.inf], np.nan, inplace=True)

In [None]:
# fill in null values with median
ds["pricePremium"] = ds["pricePremium"].fillna(ds["pricePremium"].median())

In [3]:
def add_air_to_jordan(name):
    if "Jordan" in name and not name.startswith("Air Jordan"):
        return name.replace("Jordan", "Air Jordan", 1)
    return name

ds["item"] = ds["item"].apply(add_air_to_jordan)
ds.head()   

Unnamed: 0,item,brand,retail,release,lowestAsk,highestBid,annualHigh,annualLow,volatility,deadstockSold,pricePremium,averageDeadstockPrice,lastSale,img_path
0,Air Jordan 4 Retro SB Pine Green,Jordan,225,2023-03-21,325,480,952,280,0.061347,5408,0.542,388,347,feetflix/Jordan 4 Retro SB Pine Green.jpg
1,Air Jordan 3 Retro White Cement Reimagined,Jordan,210,2023-03-11,190,280,757,150,0.115522,16894,0.111,297,234,feetflix/Jordan 3 Retro White Cement Reimagine...
2,Nike Air Force 1 Low Tiffany & Co. 1837,Nike,400,2023-03-07,893,1526,4831,705,0.232434,4711,1.125,1160,850,feetflix/Nike Air Force 1 Low Tiffany & Co. 18...
3,Nike Air Max 1 '86 Big Bubble Sport Red,Nike,150,2023-03-26,177,200,550,150,0.095096,603,0.2,228,180,feetflix/Nike Air Max 1 '86 Big Bubble Sport R...
4,MSCHF Big Red Boot,MSCHF,350,2023-02-16,570,611,2445,498,0.186197,323,1.373,883,831,feetflix/MSCHF Big Red Boot.jpg


In [4]:
ds.to_csv("cleaned_sneaks.csv", index=False, mode="w")