In [7]:
# Install libraries if not already
!pip install pandas openpyxl tqdm

# Imports
import pandas as pd
from tqdm import tqdm
from google.colab import files
import random
from datetime import datetime, timedelta




In [8]:
# Simulate 50 reviews
num_reviews = 50

# Dummy lists
titles = ["Great product!", "Very satisfied", "Not as expected", "Value for money",
          "Excellent design", "Poor battery", "Sound quality amazing", "Packaging could be better"]

reviews = [
    "I love the sound quality, highly recommend",
    "Amazing features for this price",
    "The product stopped working in a week",
    "Good device for daily use",
    "Looks amazing and works perfectly",
    "Battery drains quickly",
    "Best speaker in this price range",
    "Box was slightly damaged"
]

verified_options = ["Yes", "No"]

# Generate dataframe
data = []
for i in tqdm(range(num_reviews), desc="Generating Dummy Reviews"):
    title = random.choice(titles)
    review = random.choice(reviews)
    rating = round(random.uniform(1.0, 5.0), 1)
    verified = random.choice(verified_options)
    date = (datetime.today() - timedelta(days=random.randint(0,365))).strftime("%d %b %Y")
    image_url = f"https://dummyimage.com/150x150/000/fff&text=Img{i+1}"

    data.append([title, review, rating, verified, date, image_url])

df = pd.DataFrame(data, columns=["Title","Review","Rating","Verified","Date","Image_URL"])


# 1️⃣ Review length
df['Review_Length'] = df['Review'].apply(len)

# 2️⃣ Review length category
df['Review_Length_Category'] = df['Review_Length'].apply(
    lambda x: "Short" if x<50 else "Medium" if x<100 else "Long"
)

# 3️⃣ Sentiment based on rating
df['Sentiment'] = df['Rating'].apply(
    lambda x: "Positive" if x>=4 else "Neutral" if x>=2.5 else "Negative"
)

# 4️⃣ Helpful votes (random 0-100)
df['Helpful_Votes'] = [random.randint(0,100) for _ in range(len(df))]

# 5️⃣ Product column (optional, for multi-product simulation)
df['Product'] = "Echo Dot (4th Gen)"  # same product or vary if you want



Generating Dummy Reviews: 100%|██████████| 50/50 [00:00<00:00, 17895.32it/s]


In [9]:
df['Review_Length'] = df['Review'].apply(len)


In [10]:
df.to_csv("dummy_reviews_advanced.csv", index=False)
df.to_excel("dummy_reviews_advanced.xlsx", index=False)

print("✅ Dummy dataset generated successfully! Total reviews:", len(df))


✅ Dummy dataset generated successfully! Total reviews: 50


In [11]:
files.download("dummy_reviews_advanced.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [12]:
df.head()


Unnamed: 0,Title,Review,Rating,Verified,Date,Image_URL,Review_Length,Review_Length_Category,Sentiment,Helpful_Votes,Product
0,Value for money,Best speaker in this price range,4.2,Yes,27 Jan 2025,https://dummyimage.com/150x150/000/fff&text=Img1,32,Short,Positive,43,Echo Dot (4th Gen)
1,Packaging could be better,Box was slightly damaged,3.0,No,04 Mar 2025,https://dummyimage.com/150x150/000/fff&text=Img2,24,Short,Neutral,65,Echo Dot (4th Gen)
2,Excellent design,Best speaker in this price range,1.6,No,04 Aug 2025,https://dummyimage.com/150x150/000/fff&text=Img3,32,Short,Negative,16,Echo Dot (4th Gen)
3,Sound quality amazing,The product stopped working in a week,1.5,Yes,26 Oct 2025,https://dummyimage.com/150x150/000/fff&text=Img4,37,Short,Negative,89,Echo Dot (4th Gen)
4,Not as expected,Looks amazing and works perfectly,1.5,Yes,15 Jul 2025,https://dummyimage.com/150x150/000/fff&text=Img5,33,Short,Negative,11,Echo Dot (4th Gen)
