# Exploratory Data Analysis (EDA) on Retail Sales Data

This project analyzes retail sales data to uncover trends, patterns, and insights that help the business make better decisions.

In [None]:
# Import Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Settings
sns.set(style='whitegrid')
plt.rcParams['figure.figsize'] = (10, 6)

In [None]:
# Load the dataset
df = pd.read_csv('retail_sales.csv')  # Replace with your dataset filename
df.head()

## Basic Info and Cleaning

In [None]:
# Display basic info
df.info()

In [None]:
# Check for missing values
df.isnull().sum()

In [None]:
# Summary statistics
df.describe()

## Data Visualization

In [None]:
# Histogram for numeric features
df.hist(bins=30, figsize=(14, 10), color='skyblue')
plt.tight_layout()
plt.show()

In [None]:
# Correlation heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

In [None]:
# Bar plot of top-selling categories (if available)
if 'Category' in df.columns and 'Sales' in df.columns:
    top_categories = df.groupby('Category')['Sales'].sum().sort_values(ascending=False)
    top_categories.plot(kind='bar', color='orange')
    plt.title('Top Selling Categories')
    plt.ylabel('Total Sales')
    plt.show()

## Time Series Analysis (if date column is available)

In [None]:
# Convert to datetime and plot sales over time
if 'Date' in df.columns:
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)
    df['Sales'].resample('M').sum().plot()
    plt.title('Monthly Sales Trend')
    plt.ylabel('Sales')
    plt.xlabel('Month')
    plt.grid(True)
    plt.show()

## Recommendations
- Focus on high-performing categories for promotions.
- Monitor time series patterns to plan stock levels and campaigns.
- Address missing data and clean inconsistencies before final modeling or dashboarding.