# 🛒 Sales Data Analysis: Sample Superstore Dataset
An exploratory data analysis (EDA) notebook on Superstore sales data to uncover insights.

In [None]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Load the dataset
df = pd.read_csv("Sample - Superstore.csv")

# Display first few rows
df.head()


In [None]:

# Dataset info
df.info()

# Check for missing values
df.isnull().sum()


In [None]:

# Drop duplicates if any
df.drop_duplicates(inplace=True)

# Basic stats
df.describe()


In [None]:

# Sales by Category
category_sales = df.groupby('Category')['Sales'].sum().sort_values(ascending=False)

plt.figure(figsize=(8,5))
category_sales.plot(kind='bar', color='skyblue')
plt.title("Total Sales by Category")
plt.ylabel("Sales")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:

# Sales by Region
region_sales = df.groupby('Region')['Sales'].sum().sort_values(ascending=False)

plt.figure(figsize=(8,5))
region_sales.plot(kind='bar', color='orange')
plt.title("Total Sales by Region")
plt.ylabel("Sales")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:

plt.figure(figsize=(8,5))
sns.scatterplot(data=df, x='Discount', y='Profit', hue='Category')
plt.title("Profit vs Discount")
plt.tight_layout()
plt.show()


In [None]:

# Profit by Sub-Category
subcategory_profit = df.groupby('Sub-Category')['Profit'].sum().sort_values()

plt.figure(figsize=(10,6))
subcategory_profit.plot(kind='barh', color='green')
plt.title("Profit by Sub-Category")
plt.xlabel("Profit")
plt.tight_layout()
plt.show()
