In [None]:
# data_analysis.ipynb
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the cleaned data
df = pd.read_csv("amazon_soft_toys_clean.csv")
df.head()


In [None]:
# Count frequency of brands and display top 5
brand_counts = df["Brand"].value_counts().head(5)
print("Top 5 Brands by Frequency:")
print(brand_counts)

# Bar Chart: Top 5 Brands by Frequency
plt.figure(figsize=(10, 6))
sns.barplot(x=brand_counts.index, y=brand_counts.values, palette="viridis")
plt.title("Top 5 Brands by Frequency")
plt.xlabel("Brand")
plt.ylabel("Count")
plt.savefig("brand_frequency_bar_chart.png")
plt.show()

# Pie Chart: Share of Top 5 Brands
plt.figure(figsize=(8, 8))
plt.pie(brand_counts.values, labels=brand_counts.index, autopct='%1.1f%%', startangle=140)
plt.title("Percentage Share of Top 5 Brands")
plt.savefig("brand_share_pie_chart.png")
plt.show()


In [None]:
# Scatter Plot: Selling Price vs. Rating
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x="Selling Price", y="Rating", hue="Brand", palette="deep")
plt.title("Selling Price vs Rating")
plt.xlabel("Selling Price")
plt.ylabel("Rating")
plt.savefig("price_vs_rating_scatter.png")
plt.show()

# Average Selling Price by Rating Range
# Create rating bins (e.g., 0-2, 2-3, 3-4, 4-5)
df["Rating_bin"] = pd.cut(df["Rating"], bins=[0,2,3,4,5])
avg_price_by_rating = df.groupby("Rating_bin")["Selling Price"].mean().reset_index()

plt.figure(figsize=(10, 6))
sns.barplot(data=avg_price_by_rating, x="Rating_bin", y="Selling Price", palette="magma")
plt.title("Average Selling Price by Rating Range")
plt.xlabel("Rating Range")
plt.ylabel("Average Selling Price")
plt.savefig("avg_price_by_rating_bar_chart.png")
plt.show()


In [None]:
# Top 5 Products by Reviews
top_reviews = df.sort_values(by="Reviews", ascending=False).head(5)
plt.figure(figsize=(10, 6))
sns.barplot(data=top_reviews, x="Title", y="Reviews", palette="coolwarm")
plt.title("Top 5 Products by Reviews")
plt.xlabel("Product Title")
plt.ylabel("Number of Reviews")
plt.xticks(rotation=45, ha="right")
plt.savefig("top_products_by_reviews.png")
plt.show()

# Top 5 Products by Rating
top_ratings = df.sort_values(by="Rating", ascending=False).head(5)
plt.figure(figsize=(10, 6))
sns.barplot(data=top_ratings, x="Title", y="Rating", palette="summer")
plt.title("Top 5 Products by Rating")
plt.xlabel("Product Title")
plt.ylabel("Rating")
plt.xticks(rotation=45, ha="right")
plt.savefig("top_products_by_rating.png")
plt.show()
