In [121]:
# Warning
import warnings
warnings.filterwarnings("ignore")

# Main Library
import pandas as pd 
import numpy as np
pd.set_option('display.width', 1000)
pd.set_option('display.max_columns', 1000)

# Visualization Library
import matplotlib.pyplot as plt
%matplotlib inline
from yellowbrick.cluster import KElbowVisualizer # cluster visualizer
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [122]:
dataset = pd.read_csv("nike_shoes_sales.csv")
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 643 entries, 0 to 642
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   product_name   643 non-null    object 
 1   product_id     643 non-null    object 
 2   listing_price  643 non-null    int64  
 3   sale_price     643 non-null    int64  
 4   discount       643 non-null    int64  
 5   brand          643 non-null    object 
 6   description    640 non-null    object 
 7   rating         643 non-null    float64
 8   reviews        643 non-null    int64  
 9   images         572 non-null    object 
dtypes: float64(1), int64(4), object(5)
memory usage: 50.4+ KB


In [123]:
print(dataset)

                             product_name  product_id  listing_price  sale_price  discount brand                                        description  rating  reviews                                             images
0          Nike Air Force 1 '07 Essential  CJ1646-600              0        7495         0  Nike  Let your shoe game shimmer in the Nike Air For...     0.0        0  ["https://static.nike.com/a/images/t_PDP_1728_...
1                    Nike Air Force 1 '07  CT4328-101              0        7495         0  Nike  The legend lives on in the Nike Air Force 1 '0...     0.0        0  ["https://static.nike.com/a/images/t_PDP_1728_...
2            Nike Air Force 1 Sage Low LX  CI3482-200              0        9995         0  Nike  Taking both height and craft to new levels, th...     0.0        0  ["https://static.nike.com/a/images/t_PDP_1728_...
3                     Nike Air Max Dia SE  CD0479-200              0        9995         0  Nike  Designed for a woman's foot, the Nike 

In [124]:
dataset.isnull().sum()

product_name      0
product_id        0
listing_price     0
sale_price        0
discount          0
brand             0
description       3
rating            0
reviews           0
images           71
dtype: int64

In [125]:
df = dataset.ffill()
dataset.isnull().sum()

product_name      0
product_id        0
listing_price     0
sale_price        0
discount          0
brand             0
description       3
rating            0
reviews           0
images           71
dtype: int64

In [126]:
filtered_df = dataset[["product_name", "rating"]].value_counts().reset_index(name="count").sort_values("rating", ascending=False)
filtered_df.head(5)

Unnamed: 0,product_name,rating,count
440,Nike x Undercover React Presto,5.0,1
9,Nike Air Max 97,5.0,3
20,Nike Mercurial Superfly 7 Academy IC,5.0,2
31,Nike Mercurial Superfly 7 Academy TF,5.0,2
1,Nike Air Max 90,5.0,5


In [127]:
df1 = df.groupby('product_name')['rating'].apply(set)
filtered_df1 = df1[df1.apply(lambda x: x == {5.0})]

In [128]:
print(filtered_df1)
print("\nProducts with 5.0 rating : ", len(filtered_df1))

product_name
Air Jordan 1 Jester XX Low Laced        {5.0}
Air Jordan 1 Nova XX                    {5.0}
Air Jordan 13 Retro Chinese New Year    {5.0}
Air Jordan 8 Retro                      {5.0}
Air Jordan XXXIV PF                     {5.0}
                                        ...  
Nike x Hawkins High Cortez              {5.0}
Nike x Olivia Kim Air Force 1 '07       {5.0}
Nike x Undercover React Presto          {5.0}
NikeCourt Royale AC                     {5.0}
Zoom Freak 1 'Employee of the Month'    {5.0}
Name: rating, Length: 71, dtype: object

Products with 5.0 rating :  71


In [129]:
df2 = df[(df['product_name'] == 'Nike Air Max 97')]
df2

Unnamed: 0,product_name,product_id,listing_price,sale_price,discount,brand,description,rating,reviews,images
17,Nike Air Max 97,921733-104,0,16995,0,Nike,The Nike Air Max 97 keeps a sneaker icon going...,4.3,16,"[""https://static.nike.com/a/images/t_PDP_1728_..."
20,Nike Air Max 97,CT4525-001,0,15995,0,Nike,Remastered from the OG that shook up the runni...,0.0,0,"[""https://static.nike.com/a/images/t_PDP_1728_..."
58,Nike Air Max 97,CI3708-700,0,14995,0,Nike,The Air Max 97 shook up the running world with...,5.0,1,"[""https://static.nike.com/a/images/t_PDP_1728_..."
131,Nike Air Max 97,CT4526-100,0,15995,0,Nike,Remastered from the OG that shook up the runni...,5.0,1,"[""https://static.nike.com/a/images/t_PDP_1728_..."
456,Nike Air Max 97,921826-016,16995,11897,0,Nike,The Nike Air Max 97 Men's Shoe keeps the sneak...,4.3,17,"[""https://static.nike.com/a/images/t_PDP_1728_..."
479,Nike Air Max 97,CT1549-001,15995,9597,0,Nike,Featuring the same ripple design of the OG tha...,0.0,0,"[""https://static.nike.com/a/images/t_PDP_1728_..."
564,Nike Air Max 97,CI7388-600,0,16995,0,Nike,The Nike Air Max 97 keeps a sneaker icon going...,5.0,1,"[""https://static.nike.com/a/images/t_PDP_1728_..."


In [130]:
filtered_df2 = df[(df['product_name'] == 'Nike Air Max 97') & (df['rating'] == 4.3)]
filtered_df2

Unnamed: 0,product_name,product_id,listing_price,sale_price,discount,brand,description,rating,reviews,images
17,Nike Air Max 97,921733-104,0,16995,0,Nike,The Nike Air Max 97 keeps a sneaker icon going...,4.3,16,"[""https://static.nike.com/a/images/t_PDP_1728_..."
456,Nike Air Max 97,921826-016,16995,11897,0,Nike,The Nike Air Max 97 Men's Shoe keeps the sneak...,4.3,17,"[""https://static.nike.com/a/images/t_PDP_1728_..."
