# Sales Data Analysis

This notebook performs exploratory data analysis (EDA) on the Sample Superstore sales dataset. It examines sales trends, category performance, and top products to provide insights for decision-making.


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
df = pd.read_csv('Sample_Superstore.csv', encoding='ISO-8859-1')
df.head()


In [None]:
# Convert 'Order Date' to datetime and create month/year columns
df['Order Date'] = pd.to_datetime(df['Order Date'])
df['Month'] = df['Order Date'].dt.to_period('M')

# Summary statistics
print('Dataset shape:', df.shape)
print('Columns:', df.columns.tolist())
df.describe(include='all').T


In [None]:
# Monthly sales trend
monthly_sales = df.groupby('Month')['Sales'].sum()
plt.figure(figsize=(10,4))
monthly_sales.plot(marker='o')
plt.title('Monthly Sales Trend')
plt.xlabel('Month')
plt.ylabel('Sales')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:
# Sales by category
category_sales = df.groupby('Category')['Sales'].sum().sort_values(ascending=False)
plt.figure(figsize=(6,4))
sns.barplot(x=category_sales.index, y=category_sales.values)
plt.title('Sales by Category')
plt.xlabel('Category')
plt.ylabel('Sales')
plt.tight_layout()
plt.show()


In [None]:
# Top 10 products by sales
product_sales = df.groupby('Product Name')['Sales'].sum().sort_values(ascending=False).head(10)
plt.figure(figsize=(8,5))
sns.barplot(x=product_sales.values, y=product_sales.index, orient='h')
plt.title('Top 10 Products by Sales')
plt.xlabel('Sales')
plt.ylabel('Product Name')
plt.tight_layout()
plt.show()
