In [1]:
import tensorflow_datasets as tfds
import pandas as pd

# 1. Dataset Collection

### Here, Downloading the shoes data from the url and storing in a dataframe.

In [52]:
df, info = tfds.load('amazon_us_reviews/Shoes_v1_00', split='train[-20000:]', with_info=True)
# df, info = tfds.load('amazon_us_reviews/Shoes_v1_00', split='train[:1000]', with_info=True,num_parallel_reads=16)

# subset_dataset = tfds.Subset(df, range(1000))  # Extract first 1000 examples

In [53]:
dfr = tfds.as_dataframe(df)

In [54]:
dfr.head(5)

Unnamed: 0,data/customer_id,data/helpful_votes,data/marketplace,data/product_category,data/product_id,data/product_parent,data/product_title,data/review_body,data/review_date,data/review_headline,data/review_id,data/star_rating,data/total_votes,data/verified_purchase,data/vine
0,b'3341504',0,b'US',b'Shoes',b'B00JAWAI6A',b'761838730',"b""Giorgio Brutini Men's 24876 Slip On Loafer""",b'My size 44 and asked for this product and yo...,b'2015-04-27',b'My size 44 and asked for this product and yo...,b'R2NE0S3W0I8R7K',1,2,0,1
1,b'7691763',2,b'US',b'Shoes',b'B007XHBN2W',b'922964218',b'Danshuz Womens Girls Purple Front Zipper Dan...,b'Light weight and adorable for my dance shoes...,b'2014-02-19',b'Adorable',b'R391UT33CBXKFF',5,2,0,1
2,b'1726917',4,b'US',b'Shoes',b'B00I9TN8LW',b'996316693',"b""RYKA Women's Fanatic Plus Running Shoe""","b""I have worn Nike's for as long as I can reme...",b'2015-02-07',"b""I had been shopping for a pair and could not...",b'R3BYN7HBR7CTA9',5,4,0,1
3,b'29482983',0,b'US',b'Shoes',b'B00HZOKDII',b'790286692',"b""UGG Women's Cozy Flannel Slipper""",b'Great fit warm and cozy slippers',b'2014-12-23',b'Five Stars',b'RHV1NO3Z3JYD9',5,1,0,1
4,b'12715156',0,b'US',b'Shoes',b'B003OYJ9LK',b'489344064',"b""ASICS Women's GEL-Resort 2 Walking Shoe""",b'I have really bad feet so I am glad when I c...,b'2013-04-28',b'Fit is good',b'R2K09Q5TORL56F',4,0,0,1


### Dataset Description

customer_id: unique identifier for each customer

helpful_votes: number of helpful votes for the review

marketplace: the Amazon marketplace where the review was written (US)

product_category: category of the product (Shoes)

product_id: product unique identifier

product_parent: parent product identifier (used to group together variations of the same product)

product_title: title of the product

review_body: body part of the review

review_date: date the review was written

review_headline: headline of the review

review_id: review unique identifier

star_rating: rating given by the customer (1-5 stars)

total_votes: total number of votes for the review (i.e helpful and unhelpful)

verified_purchase: whether or not the product is purchased from Amazon and the review is verified

vine: whether or not the review was written as part of the Vine program (where customers receive free products in exchange for reviews)

### Storing the extracted last 20000 rows from the shoes dataset in the CSV file.

In [55]:
dfr.to_csv("shoes_dataset.csv",index=False)

### Loading data from the csv file

In [56]:
df = pd.read_csv("shoes_dataset.csv")

## 1.1 Querying the data / Dataset Preview

In [57]:
df.head(5)

Unnamed: 0,data/customer_id,data/helpful_votes,data/marketplace,data/product_category,data/product_id,data/product_parent,data/product_title,data/review_body,data/review_date,data/review_headline,data/review_id,data/star_rating,data/total_votes,data/verified_purchase,data/vine
0,b'3341504',0,b'US',b'Shoes',b'B00JAWAI6A',b'761838730',"b""Giorgio Brutini Men's 24876 Slip On Loafer""",b'My size 44 and asked for this product and yo...,b'2015-04-27',b'My size 44 and asked for this product and yo...,b'R2NE0S3W0I8R7K',1,2,0,1
1,b'7691763',2,b'US',b'Shoes',b'B007XHBN2W',b'922964218',b'Danshuz Womens Girls Purple Front Zipper Dan...,b'Light weight and adorable for my dance shoes...,b'2014-02-19',b'Adorable',b'R391UT33CBXKFF',5,2,0,1
2,b'1726917',4,b'US',b'Shoes',b'B00I9TN8LW',b'996316693',"b""RYKA Women's Fanatic Plus Running Shoe""","b""I have worn Nike's for as long as I can reme...",b'2015-02-07',"b""I had been shopping for a pair and could not...",b'R3BYN7HBR7CTA9',5,4,0,1
3,b'29482983',0,b'US',b'Shoes',b'B00HZOKDII',b'790286692',"b""UGG Women's Cozy Flannel Slipper""",b'Great fit warm and cozy slippers',b'2014-12-23',b'Five Stars',b'RHV1NO3Z3JYD9',5,1,0,1
4,b'12715156',0,b'US',b'Shoes',b'B003OYJ9LK',b'489344064',"b""ASICS Women's GEL-Resort 2 Walking Shoe""",b'I have really bad feet so I am glad when I c...,b'2013-04-28',b'Fit is good',b'R2K09Q5TORL56F',4,0,0,1


In [58]:
df.tail(5)

Unnamed: 0,data/customer_id,data/helpful_votes,data/marketplace,data/product_category,data/product_id,data/product_parent,data/product_title,data/review_body,data/review_date,data/review_headline,data/review_id,data/star_rating,data/total_votes,data/verified_purchase,data/vine
19995,b'11063331',0,b'US',b'Shoes',b'B005PLM016',b'804246440',"b""Fila Women's Skele-Toes EZ Slide Shoe""",b'I love them a lot and recommend them to my f...,b'2012-11-06',b'MsPrissy',b'R2DNZYMLY5JT69',5,0,0,1
19996,b'80829',0,b'US',b'Shoes',b'B00FWW0ZSI',b'734310501',"b""Very Volatile Women's Rockaway Wedge Sandal""",b'Very stylish. I can dress them up or dress ...,b'2015-03-31',b'Very stylish. I can dress them up or dress ...',b'R3DDLSHSRV1R5U',4,0,0,1
19997,b'38953442',0,b'US',b'Shoes',b'B000I6ZXTE',b'357804452',"b""Charles David Women's Elsa Ankle Boot""","b""Given that I wear high heels on a daily basi...",b'2007-10-16',b'Gorgeous and comfortable!',b'R37DEZWATTM0CA',5,0,0,1
19998,b'22683249',0,b'US',b'Shoes',b'B000EP6OUQ',b'27136041',"b""Sperry Top-Sider Men's Billfish 3-Eye Boat S...",b'Given as a gift and were just what I expected!',b'2015-03-03',b'Five Stars',b'R6JYMC8402NEB',5,0,0,1
19999,b'42820972',0,b'US',b'Shoes',b'B000WGAW50',b'514002090',b'The Original MuckBoots Daily Garden Shoe',b'My husband absolutely loves these garden boo...,b'2013-10-13',b'Awesome',b'R3DXCHSBV6PV29',5,0,0,1


## 1.2 Descriptive Analysis / Summary statistics
Checking total number of rows and columns

In [59]:
df_rows, df_columns = df.shape
print(f"Dataset has {df_rows} rows and {df_columns} columns.")

Dataset has 20000 rows and 15 columns.


In [60]:
df.describe()

Unnamed: 0,data/helpful_votes,data/star_rating,data/total_votes,data/verified_purchase,data/vine
count,20000.0,20000.0,20000.0,20000.0,20000.0
mean,0.83515,4.2499,1.03905,0.09805,0.9999
std,4.594315,1.15335,4.984643,0.29739,0.01
min,0.0,1.0,0.0,0.0,0.0
25%,0.0,4.0,0.0,0.0,1.0
50%,0.0,5.0,0.0,0.0,1.0
75%,1.0,5.0,1.0,0.0,1.0
max,317.0,5.0,333.0,1.0,1.0


## 1.3 Data Cleaning
 In this step, we are modifying the original column names to make them more readable and relevant to our analysis.

In [61]:
df.columns

Index(['data/customer_id', 'data/helpful_votes', 'data/marketplace',
       'data/product_category', 'data/product_id', 'data/product_parent',
       'data/product_title', 'data/review_body', 'data/review_date',
       'data/review_headline', 'data/review_id', 'data/star_rating',
       'data/total_votes', 'data/verified_purchase', 'data/vine'],
      dtype='object')

In [62]:
df = df.rename(columns={'data/customer_id' : 'customer_id',
                        'data/helpful_votes' : 'helpful_votes', 
                        'data/marketplace':'marketplace',
                        'data/product_category':'product_category', 
                        'data/product_id':'product_id', 
                        'data/product_parent':'product_parent',
                        'data/product_title':'product_title', 
                        'data/review_body':'review_body',
                        'data/review_date':'review_date',
                        'data/review_headline':'review_headline',
                        'data/review_id':'review_id',
                        'data/star_rating':'star_rating',
                        'data/total_votes':'total_votes',
                        'data/verified_purchase':'verified_purchase',
                        'data/vine':'vine'
                       })

In [63]:
df.head(2)

Unnamed: 0,customer_id,helpful_votes,marketplace,product_category,product_id,product_parent,product_title,review_body,review_date,review_headline,review_id,star_rating,total_votes,verified_purchase,vine
0,b'3341504',0,b'US',b'Shoes',b'B00JAWAI6A',b'761838730',"b""Giorgio Brutini Men's 24876 Slip On Loafer""",b'My size 44 and asked for this product and yo...,b'2015-04-27',b'My size 44 and asked for this product and yo...,b'R2NE0S3W0I8R7K',1,2,0,1
1,b'7691763',2,b'US',b'Shoes',b'B007XHBN2W',b'922964218',b'Danshuz Womens Girls Purple Front Zipper Dan...,b'Light weight and adorable for my dance shoes...,b'2014-02-19',b'Adorable',b'R391UT33CBXKFF',5,2,0,1


### Here, in the dataset we can clearly see that most of the columns has the row starting with the value 'b' prefix. The error related to 'b""' or 'b'' in the dataframe columns is caused by the fact that some of the values in the columns are byte strings. These byte strings need to be converted to regular strings.

### Querying the columns data before cleaning

In [64]:
df[['customer_id','product_title','review_body']]

Unnamed: 0,customer_id,product_title,review_body
0,b'3341504',"b""Giorgio Brutini Men's 24876 Slip On Loafer""",b'My size 44 and asked for this product and yo...
1,b'7691763',b'Danshuz Womens Girls Purple Front Zipper Dan...,b'Light weight and adorable for my dance shoes...
2,b'1726917',"b""RYKA Women's Fanatic Plus Running Shoe""","b""I have worn Nike's for as long as I can reme..."
3,b'29482983',"b""UGG Women's Cozy Flannel Slipper""",b'Great fit warm and cozy slippers'
4,b'12715156',"b""ASICS Women's GEL-Resort 2 Walking Shoe""",b'I have really bad feet so I am glad when I c...
...,...,...,...
19995,b'11063331',"b""Fila Women's Skele-Toes EZ Slide Shoe""",b'I love them a lot and recommend them to my f...
19996,b'80829',"b""Very Volatile Women's Rockaway Wedge Sandal""",b'Very stylish. I can dress them up or dress ...
19997,b'38953442',"b""Charles David Women's Elsa Ankle Boot""","b""Given that I wear high heels on a daily basi..."
19998,b'22683249',"b""Sperry Top-Sider Men's Billfish 3-Eye Boat S...",b'Given as a gift and were just what I expected!'


### Checking data types for each column

In [65]:
print(df.dtypes)

customer_id          object
helpful_votes         int64
marketplace          object
product_category     object
product_id           object
product_parent       object
product_title        object
review_body          object
review_date          object
review_headline      object
review_id            object
star_rating           int64
total_votes           int64
verified_purchase     int64
vine                  int64
dtype: object


### Applying the regular expression to clean each row for whole dataset except five columns 'helpful_votes', 'star_rating', 'total_votes', 'verified_purchase', and 'vine'

In [66]:
import re

# RE to match the 'b' prefix
pattern = re.compile(r"^b['\"](.*)['\"]$")

# column names that donot have 'b' prefix
int_columns = ['helpful_votes', 'star_rating', 'total_votes', 'verified_purchase', 'vine']

# Apply pattern to each element of the dataframe
for col in df.columns:
    if col not in int_columns:
        df[col] = df[col].apply(lambda x: pattern.match(x).group(1) if isinstance(x, str) and pattern.match(x) else x)


In [67]:
# df = df.applymap(lambda x: x.decode('utf-8').strip("b'") if isinstance(x, bytes) else str(x).strip("b''"))

In [68]:
df.head(5)

Unnamed: 0,customer_id,helpful_votes,marketplace,product_category,product_id,product_parent,product_title,review_body,review_date,review_headline,review_id,star_rating,total_votes,verified_purchase,vine
0,3341504,0,US,Shoes,B00JAWAI6A,761838730,Giorgio Brutini Men's 24876 Slip On Loafer,My size 44 and asked for this product and you ...,2015-04-27,My size 44 and asked for this product and you ...,R2NE0S3W0I8R7K,1,2,0,1
1,7691763,2,US,Shoes,B007XHBN2W,922964218,Danshuz Womens Girls Purple Front Zipper Dance...,Light weight and adorable for my dance shoes.....,2014-02-19,Adorable,R391UT33CBXKFF,5,2,0,1
2,1726917,4,US,Shoes,B00I9TN8LW,996316693,RYKA Women's Fanatic Plus Running Shoe,I have worn Nike's for as long as I can rememb...,2015-02-07,I had been shopping for a pair and could not f...,R3BYN7HBR7CTA9,5,4,0,1
3,29482983,0,US,Shoes,B00HZOKDII,790286692,UGG Women's Cozy Flannel Slipper,Great fit warm and cozy slippers,2014-12-23,Five Stars,RHV1NO3Z3JYD9,5,1,0,1
4,12715156,0,US,Shoes,B003OYJ9LK,489344064,ASICS Women's GEL-Resort 2 Walking Shoe,I have really bad feet so I am glad when I can...,2013-04-28,Fit is good,R2K09Q5TORL56F,4,0,0,1


In [69]:
df['product_title']

0               Giorgio Brutini Men's 24876 Slip On Loafer
1        Danshuz Womens Girls Purple Front Zipper Dance...
2                   RYKA Women's Fanatic Plus Running Shoe
3                         UGG Women's Cozy Flannel Slipper
4                  ASICS Women's GEL-Resort 2 Walking Shoe
                               ...                        
19995                Fila Women's Skele-Toes EZ Slide Shoe
19996          Very Volatile Women's Rockaway Wedge Sandal
19997                Charles David Women's Elsa Ankle Boot
19998      Sperry Top-Sider Men's Billfish 3-Eye Boat Shoe
19999             The Original MuckBoots Daily Garden Shoe
Name: product_title, Length: 20000, dtype: object

## 1.3 Check for null Values

**Compute Ratio**

$$ R_{m} = \frac{Number \, of \, missing \, values}{ Total \, number \, of \, values} $$

<br>

In this step, a verification of NaN values must be done. According to the previous formula, if the $R_{m}$ is too high, it is better to delete the entire column. If $R_{m}$ is low, it is recommended to impute NaN values with the mean, median or mode.

In [70]:
df.isnull().sum()

customer_id          0
helpful_votes        0
marketplace          0
product_category     0
product_id           0
product_parent       0
product_title        0
review_body          0
review_date          0
review_headline      0
review_id            0
star_rating          0
total_votes          0
verified_purchase    0
vine                 0
dtype: int64

There are no null values in this dataset. Applying the compute ration formula to show a $R_{m} = 0$ for each numerical feature.

In [71]:
def compute_ratio(dataset, columns):

  Rms = {}
  for col in columns:
    Rm = dataset[col].isnull().sum() / dataset[col].count()
    Rms[col] = Rm

  Rms = pd.DataFrame(Rms.items(), columns=['Feature', 'Rm'])

  return Rms 

In [72]:
Rms = compute_ratio(df, df.columns)
Rms

Unnamed: 0,Feature,Rm
0,customer_id,0.0
1,helpful_votes,0.0
2,marketplace,0.0
3,product_category,0.0
4,product_id,0.0
5,product_parent,0.0
6,product_title,0.0
7,review_body,0.0
8,review_date,0.0
9,review_headline,0.0


# 2 Data Preprocessing

## 2.1 Check Duplicates

In [73]:
duplicate_df = df[df.duplicated(keep=False)]
print(duplicate_df)

Empty DataFrame
Columns: [customer_id, helpful_votes, marketplace, product_category, product_id, product_parent, product_title, review_body, review_date, review_headline, review_id, star_rating, total_votes, verified_purchase, vine]
Index: []


### While checking for the whole dataframe there are no duplicate values.

In [74]:
df_duplicates = df[df.duplicated(subset=['product_id'], keep=False)]
print(f"Total number of duplicated items is :{len(df_duplicates)}")
df_duplicates.head(5)

Total number of duplicated items is :1019


Unnamed: 0,customer_id,helpful_votes,marketplace,product_category,product_id,product_parent,product_title,review_body,review_date,review_headline,review_id,star_rating,total_votes,verified_purchase,vine
81,14988518,14,US,Shoes,B00H3QNDEE,149312772,Skechers Women's Reggae-Zig Swag Flip-Flop,I love these shoes-the sole is great for walki...,2014-04-09,My new favorite shoes!,R2C6I4U5MQG7EA,5,14,0,1
83,47755919,0,US,Shoes,B004DEPOI2,582094940,ASICS Little Kid/Big Kid Gel-Matflex GS 3 Wres...,Had to return this. Shoe was way too narrow an...,2013-03-28,"Shoe was too narrow, not enough padding",R143NPUAHYNGXV,1,0,0,1
94,47602539,0,US,Shoes,B002NU6MM0,620625631,Sperry Top-Sider Women's Angelfish Oat Slip-On...,Love my sperries,2015-04-09,Five Stars,R4IX0BNMHANU8,5,0,0,1
106,26312958,0,US,Shoes,B0041FI6OC,79816154,Kamik Women's Jennifer Rain Boot,These boots are stylish and very good looking ...,2012-12-09,Good product,R3ES5D2177OC0F,4,0,0,1
130,4979793,0,US,Shoes,B005BQBMO6,71299635,DC Men's Pure Action Sport Sneaker,good item,2015-05-12,Five Stars,R2UBOI4PT6SEHT,5,0,0,1


In [77]:
df[df['product_id'] == 'B00H3QNDEE']

Unnamed: 0,customer_id,helpful_votes,marketplace,product_category,product_id,product_parent,product_title,review_body,review_date,review_headline,review_id,star_rating,total_votes,verified_purchase,vine
81,14988518,14,US,Shoes,B00H3QNDEE,149312772,Skechers Women's Reggae-Zig Swag Flip-Flop,I love these shoes-the sole is great for walki...,2014-04-09,My new favorite shoes!,R2C6I4U5MQG7EA,5,14,0,1
5963,36653573,0,US,Shoes,B00H3QNDEE,149312772,Skechers Women's Reggae-Zig Swag Flip-Flop,I love these sandals. I bought another pair! ...,2015-02-13,Love them!,R2ATKSY67CW6ZK,5,0,0,1


### although it seems like the data is duplicated having the same product id but actually it is the review for the same product from different customers. So we do not need to remove these columns.

In [80]:
df[df['total_votes']>150]

Unnamed: 0,customer_id,helpful_votes,marketplace,product_category,product_id,product_parent,product_title,review_body,review_date,review_headline,review_id,star_rating,total_votes,verified_purchase,vine
14275,26341811,177,US,Shoes,B00MK5425W,794738828,Skechers Women's Reggae-Rasta Thong Sandal,I love these sandals! Let me start by saying ...,2015-06-14,Great choice for Plantar fasciitis sufferers,RXZI6QK52X00J,5,178,0,1
14712,47439977,317,US,Shoes,B001OQUKV8,21905829,Ray Ban RB 2140 Original Wayfarer 901 Black/Cr...,"Without a doubt, the 2140 Original Wayfarers a...",2009-03-27,"The Coolest Sunglasses, But Be Sure to Order t...",RD0I0IV80YWGC,5,333,0,1
19294,12684301,164,US,Shoes,B0097O0FZ0,915657171,FitFlop Women's Lulu Thong Sandal,FitFlop seems to offer 2 different footpads wh...,2013-06-09,Disappointed,R1VABBDP4IWAAU,2,168,0,1


In [81]:
df[df['verified_purchase']==1].head(5)

Unnamed: 0,customer_id,helpful_votes,marketplace,product_category,product_id,product_parent,product_title,review_body,review_date,review_headline,review_id,star_rating,total_votes,verified_purchase,vine
21,904155,0,US,Shoes,B00QQ93KPG,113155012,Elegent Women's Plush HandBag Faux Fur Clutch ...,Was a little bit of a wait but well worth the ...,2015-04-20,Adorable crossbody bag,RH8DMSBBL65UL,5,0,1,1
30,6612747,1,US,Shoes,B0083DMUYA,663068936,New Balance Women's WT610 Trail Running Shoe,I gave this shoe a 5 star because it was exact...,2013-03-20,Great shoe,R16RFN90XBXBYS,5,2,1,1
35,31932745,2,US,Shoes,B008F0LORK,911090892,KEEN Nashoba CNX Athletic Sandal (Toddler/Litt...,Keen is a great sturdy shoe but for some reaso...,2013-08-21,Good Shoe,R3GFZ346ROXHPL,4,2,1,1
49,31714180,36,US,Shoes,B001CEBNGS,602363633,AmeriBag Zena Shoulder Bag,I bought this bag (in black) from ebags in Mar...,2010-08-08,Great kindle carrier,R1MOSBWPE831ND,5,37,1,1
66,42808640,8,US,Shoes,B00008CF4W,590078338,Women's Summer Athletic Slides,"My feet are just a little bit wide, size 6 1/2...",2003-04-14,Comfy and coral,R31FTXQC0IMMCU,4,8,1,1


# 3. Feature Selection

### checking each columns whether these features are required or not

In [82]:
df.columns

Index(['customer_id', 'helpful_votes', 'marketplace', 'product_category',
       'product_id', 'product_parent', 'product_title', 'review_body',
       'review_date', 'review_headline', 'review_id', 'star_rating',
       'total_votes', 'verified_purchase', 'vine'],
      dtype='object')

In [83]:
df['marketplace'].unique()

array(['US'], dtype=object)

### since this dataset only consists the data from US, so we don't need this column.

In [84]:
df['helpful_votes'].unique()

array([  0,   2,   4,   1,   3,  36,   8,  14,  16,   5,   9,   7,  45,
         6,  17,  11,  19,  12,  10,  18,  32,  15,  23,  37,  38,  13,
        21,  79,  30,  91,  71,  43,  22,  20,  81,  28,  35,  64,  33,
        58,  53,  25,  29, 114,  24,  72,  31,  27,  57,  26, 104,  34,
        83, 102,  39,  89,  56, 177, 317,  90,  41,  70,  40,  51, 164,
        78, 107], dtype=int64)

In [85]:
df['product_category'].unique()

array(['Shoes'], dtype=object)

### This dataset product category is shoes for all columns so we don't need this column as well.

In [86]:
df[['review_body','review_headline']].head(5)

Unnamed: 0,review_body,review_headline
0,My size 44 and asked for this product and you ...,My size 44 and asked for this product and you ...
1,Light weight and adorable for my dance shoes.....,Adorable
2,I have worn Nike's for as long as I can rememb...,I had been shopping for a pair and could not f...
3,Great fit warm and cozy slippers,Five Stars
4,I have really bad feet so I am glad when I can...,Fit is good


### The columns review_headline consists of the heading about what is inside the body column. So we can only consider review_column for further analysis.
### Similarly, removing all the unnecessary columns that are not relevant for our analysis.

In [87]:
df = df[['customer_id','helpful_votes', 'product_id', 'product_title', 'review_body', 'review_date', 'star_rating','total_votes','verified_purchase']]
df

Unnamed: 0,customer_id,helpful_votes,product_id,product_title,review_body,review_date,star_rating,total_votes,verified_purchase
0,3341504,0,B00JAWAI6A,Giorgio Brutini Men's 24876 Slip On Loafer,My size 44 and asked for this product and you ...,2015-04-27,1,2,0
1,7691763,2,B007XHBN2W,Danshuz Womens Girls Purple Front Zipper Dance...,Light weight and adorable for my dance shoes.....,2014-02-19,5,2,0
2,1726917,4,B00I9TN8LW,RYKA Women's Fanatic Plus Running Shoe,I have worn Nike's for as long as I can rememb...,2015-02-07,5,4,0
3,29482983,0,B00HZOKDII,UGG Women's Cozy Flannel Slipper,Great fit warm and cozy slippers,2014-12-23,5,1,0
4,12715156,0,B003OYJ9LK,ASICS Women's GEL-Resort 2 Walking Shoe,I have really bad feet so I am glad when I can...,2013-04-28,4,0,0
...,...,...,...,...,...,...,...,...,...
19995,11063331,0,B005PLM016,Fila Women's Skele-Toes EZ Slide Shoe,I love them a lot and recommend them to my fri...,2012-11-06,5,0,0
19996,80829,0,B00FWW0ZSI,Very Volatile Women's Rockaway Wedge Sandal,Very stylish. I can dress them up or dress th...,2015-03-31,4,0,0
19997,38953442,0,B000I6ZXTE,Charles David Women's Elsa Ankle Boot,"Given that I wear high heels on a daily basis,...",2007-10-16,5,0,0
19998,22683249,0,B000EP6OUQ,Sperry Top-Sider Men's Billfish 3-Eye Boat Shoe,Given as a gift and were just what I expected!,2015-03-03,5,0,0


### After performing data cleaning and preprocessing, 6 columns are removed from the dataframe.