In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Optional: Set Seaborn style
sns.set_style("whitegrid")


In [None]:
# Load your dataset (update with your actual file path)
sales_data = pd.read_csv("sales_data.csv")

# Preview the data
print(sales_data.head())


In [None]:
# Check for nulls
print(sales_data.isnull().sum())

# Drop missing values
sales_data.dropna(inplace=True)


In [None]:
sns.histplot(sales_data['SalesAmount'], kde=False, bins=30)
plt.title('Sales Amount Distribution')
plt.xlabel('Sales Amount')
plt.ylabel('Frequency')
plt.show()


In [None]:
sns.histplot(sales_data['SalesAmount'], kde=True, bins=50)
plt.title('Sales Amount Distribution (50 Bins)')
plt.xlabel('Sales Amount')
plt.ylabel('Frequency')
plt.show()


In [None]:
sns.histplot(sales_data, x='SalesAmount', hue='Region', kde=True, bins=30)
plt.title('Sales Amount Distribution by Region')
plt.xlabel('Sales Amount')
plt.ylabel('Frequency')
plt.show()


In [None]:
sns.displot(sales_data['SalesAmount'], kde=True, bins=30)
plt.title('Sales Amount Distribution (Dist Plot)')
plt.xlabel('Sales Amount')
plt.ylabel('Frequency')
plt.show()


In [None]:
# Filter data for two specific regions (e.g., North and South)
sns.kdeplot(
    data=sales_data[sales_data['Region'] == 'North'], 
    x='SalesAmount', 
    label='North', 
    color='blue'
)

sns.kdeplot(
    data=sales_data[sales_data['Region'] == 'South'], 
    x='SalesAmount', 
    label='South', 
    color='red'
)

plt.title('KDE Comparison: North vs South')
plt.xlabel('Sales Amount')
plt.ylabel('Density')
plt.legend()
plt.show()


In [None]:
sns.histplot(sales_data['SalesAmount'], kde=True, bins=30)
plt.title('Distribution of Sales Amounts')
plt.xlabel('Sales Amount in USD')
plt.ylabel('Number of Transactions')
plt.show()


In [None]:
sns.histplot(
    sales_data, 
    x='SalesAmount', 
    hue='Region', 
    kde=True, 
    bins=30, 
    palette='Set1'
)
plt.title('Sales Amount Distribution by Region (Color-Coded)')
plt.xlabel('Sales Amount')
plt.ylabel('Frequency')
plt.show()


In [None]:
plt.figure(figsize=(8,5))
sns.histplot(sales_data['SalesAmount'], kde=True, bins=30)
plt.title('Sales Amount Distribution with KDE')
plt.xlabel('Sales Amount')
plt.ylabel('Frequency')
plt.tight_layout()
plt.savefig('sales_amount_distribution.png', dpi=300)
plt.close()


In [None]:
sns.histplot(sales_data['SalesAmount'], kde=True, bins=30)
plt.title('Final Sales Amount Distribution')
plt.xlabel('Sales Amount')
plt.ylabel('Frequency')
plt.show()
