In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv("cleaned_ebay_deals.csv")


## Time Series Analysis


In [None]:
df["timestamp"] = pd.to_datetime(df["timestamp"])
df = df.sort_values("timestamp")
df["hour"] = df["timestamp"].dt.hour
deals_per_hour = df.groupby("hour").size()

plt.figure(figsize=(12, 6))
deals_per_hour.plot(kind="bar")
plt.title("Number of Deals per Hour")
plt.xlabel("Hour")
plt.ylabel("Number of Deals")
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()


## Price and Discount Analysis


In [None]:
plt.figure(figsize=(15, 5))

plt.subplot(1, 2, 1)
plt.hist(df["price"].dropna(), bins=50, edgecolor="black")
plt.title("Distribution of Product Prices")
plt.xlabel("Price")
plt.ylabel("Frequency")

plt.subplot(1, 2, 2)
plt.boxplot(df["price"].dropna())
plt.title("Boxplot of Product Prices")
plt.ylabel("Price")

plt.tight_layout()
plt.show()


In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(df["original_price"], df["price"], alpha=0.5)
plt.xlabel("Original Price")
plt.ylabel("Discounted Price")
plt.title("Original Price vs Discounted Price")
plt.plot([df["original_price"].min(), df["original_price"].max()], 
         [df["original_price"].min(), df["original_price"].max()], 
         "r--", linewidth=2)
plt.tight_layout()
plt.show()


In [None]:
plt.figure(figsize=(10, 6))
plt.hist(df["discount_percentage"].dropna(), bins=50, edgecolor="black")
plt.title("Distribution of Discount Percentage")
plt.xlabel("Discount Percentage (%)")
plt.ylabel("Frequency")
plt.tight_layout()
plt.show()


## Shipping Information Analysis


In [None]:
shipping_counts = df["shipping"].value_counts()

plt.figure(figsize=(12, 6))
shipping_counts.plot(kind="bar")
plt.title("Frequency of Shipping Options")
plt.xlabel("Shipping Option")
plt.ylabel("Frequency")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()


## Text Analysis on Product Titles


In [None]:
keywords = ["Apple", "Samsung", "Laptop", "iPhone", "Tablet", "Gimbal"]
keyword_counts = {}

for keyword in keywords:
    count = df["title"].str.contains(keyword, case=False, na=False).sum()
    keyword_counts[keyword] = count

plt.figure(figsize=(10, 6))
plt.bar(keyword_counts.keys(), keyword_counts.values())
plt.title("Keyword Frequency in Product Titles")
plt.xlabel("Keyword")
plt.ylabel("Frequency")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


## Price Difference Analysis


In [None]:
df["price_difference"] = df["original_price"] - df["price"]

plt.figure(figsize=(10, 6))
plt.hist(df["price_difference"].dropna(), bins=50, edgecolor="black")
plt.title("Distribution of Price Differences")
plt.xlabel("Price Difference (Original - Discounted)")
plt.ylabel("Frequency")
plt.tight_layout()
plt.show()


## Top 5 Discounts


In [None]:
top_discounts = df.nlargest(5, "discount_percentage")[["title", "price", "original_price", "discount_percentage"]]
print("Top 5 Deals with Highest Discounts:")
print(top_discounts.to_string(index=False))
