# 📒 Exploratory Data Analysis (EDA): Superstore Dataset

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(style="whitegrid")


In [None]:

# Load the dataset
df = pd.read_csv('Sample - Superstore.csv')
df.head()


## Basic Information

In [None]:

df.info()


In [None]:

df.describe()


## Checking Missing Values

In [None]:

df.isnull().sum()


## Univariate Analysis

In [None]:

categorical_cols = ['Ship Mode', 'Segment', 'Country', 'City', 'State', 'Region', 'Category', 'Sub-Category']

for col in categorical_cols:
    print(f"\nValue counts for {col}:")
    print(df[col].value_counts())


In [None]:

numerical_cols = ['Sales', 'Quantity', 'Discount', 'Profit']

df[numerical_cols].hist(bins=30, figsize=(12,8))
plt.tight_layout()
plt.show()


## Bivariate Analysis

In [None]:

plt.figure(figsize=(10,6))
sns.heatmap(df[numerical_cols].corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()


In [None]:

sns.pairplot(df[numerical_cols])
plt.show()


In [None]:

plt.figure(figsize=(8,6))
sns.scatterplot(x='Sales', y='Profit', data=df)
plt.title('Sales vs Profit')
plt.show()


In [None]:

plt.figure(figsize=(8,6))
sns.boxplot(x='Region', y='Profit', data=df)
plt.title('Profit by Region')
plt.show()


In [None]:

plt.figure(figsize=(10,6))
sns.barplot(x='Category', y='Sales', data=df, estimator=sum)
plt.title('Total Sales by Category')
plt.show()

plt.figure(figsize=(10,6))
sns.barplot(x='Category', y='Profit', data=df, estimator=sum)
plt.title('Total Profit by Category')
plt.show()



## Observations
- No missing values in the dataset.
- Technology has the highest sales and profit margins.
- Furniture category shows lower profit margins despite significant sales.
- Discount is negatively correlated with profit.
- High sales don't always guarantee high profits.
- The West region appears most profitable, while the South region is less profitable.



## Conclusion
The Superstore dataset reveals important patterns for business decision-making:
- Focus on profitable regions and categories (Technology, West region).
- Investigate reasons for losses in some categories (Furniture) and regions (South).
- Analyze discount strategies carefully to avoid profit erosion.
