In [151]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

In [153]:
data = pd.read_csv('Watches Bags Accessories.csv', encoding='ISO-8859-1')

print(data.head())

                                               Title Rating in Stars  \
0  Yfashion Cartoon  Electronic  Watch Life Water...           4.6/5   
1  100% Imported LED watch for men , boys and Kid...           4.3/5   
2  1 Cartoon Characters Analog Wrist Watch For Ki...           4.5/5   
3  M3 Touch LED Bracelet Digital Watch Band Good ...           4.6/5   
4            led Watches For Boys and Girls and kids           4.4/5   

   Rating Count Sold Count Voucher       Delivery Currency Current Price  \
0           707    6K Sold       0  Free Delivery      Rs.           287   
1           293    1K Sold       0              0      Rs.           270   
2            57   452 Sold       0  Free Delivery      Rs.           225   
3            51   332 Sold       0  Free Delivery      Rs.           160   
4            54   476 Sold       0  Free Delivery      Rs.           198   

  Original Price                                               Link  \
0        Rs. 520  //www.daraz.pk/produc

In [154]:
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1078 entries, 0 to 1077
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Title            1078 non-null   object
 1   Rating in Stars  1078 non-null   object
 2   Rating Count     1078 non-null   int64 
 3   Sold Count       1044 non-null   object
 4   Voucher          1078 non-null   object
 5   Delivery         1078 non-null   object
 6   Currency         1078 non-null   object
 7   Current Price    1078 non-null   object
 8   Original Price   1010 non-null   object
 9   Link             1078 non-null   object
 10  Category         1078 non-null   object
dtypes: int64(1), object(10)
memory usage: 92.8+ KB
None


In [155]:
print(data.isnull().sum())

Title               0
Rating in Stars     0
Rating Count        0
Sold Count         34
Voucher             0
Delivery            0
Currency            0
Current Price       0
Original Price     68
Link                0
Category            0
dtype: int64


In [156]:
data['Sold Count'] = data['Sold Count'].str.replace('K Sold', '000').str.replace(' Sold', '')
data['Sold Count'] = data['Sold Count'].astype(float)

In [158]:
data['Current Price'] = data['Current Price'].str.replace('Rs.', '').str.replace(',', '').astype(float)
data['Original Price'] = data['Original Price'].str.replace('Rs.', '').str.replace(',', '').astype(float)

In [159]:
print(data[['Current Price', 'Original Price']].head())

   Current Price  Original Price
0          287.0           520.0
1          270.0           999.0
2          225.0           750.0
3          160.0           299.0
4          198.0           300.0


In [160]:
data['Original Price'] = data['Original Price'].fillna(data['Original Price'].mean())
data['Sold Count'] = data['Sold Count'].fillna(data['Sold Count'].mean())

In [162]:
print(data['Rating in Stars'].unique())

['4.6/5' '4.3/5' '4.5/5' '4.4/5' '3.8/5' '5-Apr' '3.6/5' '3.9/5' '4.8/5'
 '4.2/5' '5-May' '4.7/5' '3.4/5' '3.3/5' '3.5/5' '2.8/5' '5-Mar' '4.1/5'
 '3.7/5' '4.9/5' '0' '3.2/5' '5-Jan' '2.3/5' '3.1/5']


In [172]:
def clean_rating_stars(value):
    try:
        cleaned_value = value.replace('/5', '')
        return float(cleaned_value)
    except (ValueError, AttributeError):
        return np.nan

data['Rating in Stars'] = data['Rating in Stars'].apply(clean_rating_stars)

print(data['Rating in Stars'].isna().sum())

data['Rating in Stars'].fillna(data['Rating in Stars'].mean(), inplace=True)

1078


In [176]:
# Feature Engineering: Create new features

# 1. Discount Amount
data['Discount Amount'] = data['Original Price'] - data['Current Price']

# 2. Discount Percentage
data['Discount Percentage'] = (data['Discount Amount'] / data['Original Price']) * 100

# 3. Price to Sold Ratio
data['Price to Sold Ratio'] = data['Current Price'] / data['Sold Count']

# 4. Rating Effectiveness
data['Rating Effectiveness'] = data['Rating in Stars'] * data['Rating Count']

# Display the dataset with new features
data.head()

Unnamed: 0,Title,Rating in Stars,Rating Count,Sold Count,Voucher,Delivery,Currency,Current Price,Original Price,Link,Category,Discount Amount,Discount Percentage,Price to Sold Ratio,Rating Effectiveness
0,Yfashion Cartoon Electronic Watch Life Water...,,707,6000.0,0,Free Delivery,Rs.,287.0,520.0,//www.daraz.pk/products/y-led-3-i258744118-s14...,Kids Watches,233.0,44.807692,0.047833,
1,"100% Imported LED watch for men , boys and Kid...",,293,1000.0,0,0,Rs.,270.0,999.0,//www.daraz.pk/products/100-led-2023-i40167553...,Kids Watches,729.0,72.972973,0.27,
2,1 Cartoon Characters Analog Wrist Watch For Ki...,,57,452.0,0,Free Delivery,Rs.,225.0,750.0,//www.daraz.pk/products/1-i423737473-s20070599...,Kids Watches,525.0,70.0,0.497788,
3,M3 Touch LED Bracelet Digital Watch Band Good ...,,51,332.0,0,Free Delivery,Rs.,160.0,299.0,//www.daraz.pk/products/m3-led-led-i432456407-...,Kids Watches,139.0,46.488294,0.481928,
4,led Watches For Boys and Girls and kids,,54,476.0,0,Free Delivery,Rs.,198.0,300.0,//www.daraz.pk/products/led-i398157365-s192654...,Kids Watches,102.0,34.0,0.415966,
