In [1]:
import pandas as pd
import numpy as np
import random
from sklearn.preprocessing import MinMaxScaler

In [2]:
# Load dataset
df = pd.read_csv('myntra_data.csv')
df = df[:50000]
df.to_csv('myntra_dataset.csv', index=False)

In [3]:
# Generate synthetic sales data
np.random.seed(42)
df['sales'] = np.random.poisson(lam=100 + df['rating'] * 10 + df['discount'] * 0.5, size=len(df))


In [4]:
# Generate synthetic reviews data
reviews = [
    "Great product, highly recommend!",
    "Decent quality, could be better.",
    "Not satisfied with the product.",
    "Excellent value for money.",
    "Would buy again!",
    "Poor quality, not as described.",
    "Loved it, very stylish!",
    "Product arrived damaged, not happy.",
    "Fantastic purchase, very happy with it.",
    "The product is just okay, nothing special."
]

# Create synthetic reviews
def generate_reviews(row):
    num_reviews = int(row['ratingTotal'])
    review_list = random.choices(reviews, k=num_reviews)
    return ' '.join(review_list)

In [5]:
df['ratingTotal'] = np.random.randint(1, 100, size=len(df))
df['reviews'] = df.apply(generate_reviews, axis=1)

In [6]:
# Normalize popularity score
import math
def calculate_popularity_score(row):
    weight_rating = 0.4
    weight_reviews = 0.3
    weight_discount = 0.2
    weight_price = 0.1
    score = (weight_rating * row['rating'] +
             weight_reviews * row['ratingTotal'] +
             weight_discount * row['discount'] +
             weight_price * row['price'])
    return score

df['popularity_score_raw'] = df.apply(calculate_popularity_score, axis=1)
scaler = MinMaxScaler(feature_range=(1, 100))
df['popularity_score_normalized'] = scaler.fit_transform(df[['popularity_score_raw']])
df['popularity_score'] = df['popularity_score_normalized'].apply(math.floor)
df.drop(columns=['popularity_score_raw', 'popularity_score_normalized'], inplace=True)

In [7]:
# Save the dataframe to a new CSV file
df.to_csv('synthetic_myntra_dataset.csv', index=False)