In [None]:
# --- Imports and Settings ---
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set seaborn style for better aesthetics
sns.set_style("whitegrid")

# --- Load the Dataset ---
try:
    df = pd.read_csv('Superstore-Sales.csv')
    print("Dataset loaded successfully.")
except Exception as e:
    print(f"Error loading dataset: {e}")

# --- Task 1: Explore the Dataset ---
print("First few rows:")
print(df.head())
print("\nData types:")
print(df.dtypes)
print("\nMissing values:")
print(df.isnull().sum())

# --- Handle Missing Values ---
numerical_cols = df.select_dtypes(include=['float64', 'int64']).columns
df[numerical_cols] = df[numerical_cols].fillna(df[numerical_cols].mean())
categorical_cols = df.select_dtypes(include=['object']).columns
for col in categorical_cols:
    df[col] = df[col].fillna(df[col].mode()[0])
print("\nMissing values after handling:")
print(df.isnull().sum())

# --- Task 2: Basic Data Analysis ---
print("\nBasic statistics:")
print(df.describe())

# Group by 'Customer Segment' and compute mean 'Sales'
segment_sales = df.groupby('Customer Segment')['Sales'].mean()
print("\nAverage Sales per Customer Segment:")
print(segment_sales)

# Group by 'Product Category' and compute mean 'Sales'
category_sales = df.groupby('Product Category')['Sales'].mean()
print("\nAverage Sales per Product Category:")
print(category_sales)

# --- Task 3: Data Visualization ---

# Convert 'Order Date' to datetime and set as index
df['Order Date'] = pd.to_datetime(df['Order Date'])
df.set_index('Order Date', inplace=True)

# Line chart: Monthly Sales
monthly_sales = df['Sales'].resample('M').sum()
plt.figure(figsize=(12,6))
sns.lineplot(x=monthly_sales.index, y=monthly_sales.values)
plt.title('Monthly Sales Over Time')
plt.xlabel('Month')
plt.ylabel('Total Sales')
plt.show()

# Bar chart: Average Sales per Customer Segment
plt.figure(figsize=(8,6))
sns.barplot(x=segment_sales.index, y=segment_sales.values)
plt.title('Average Sales per Customer Segment')
plt.xlabel('Customer Segment')
plt.ylabel('Average Sales')
plt.show()

# Histogram: Distribution of Sales
plt.figure(figsize=(8,6))
sns.histplot(df['Sales'], bins=30, kde=True)
plt.title('Distribution of Sales')
plt.xlabel('Sales')
plt.ylabel('Frequency')
plt.show()

# Scatter plot: Sales vs Profit
plt.figure(figsize=(8,6))
sns.scatterplot(x='Sales', y='Profit', data=df)
plt.title('Sales vs Profit')
plt.xlabel('Sales')
plt.ylabel('Profit')
plt.show()

# Optional: Regression line for Sales vs Profit
plt.figure(figsize=(8,6))
sns.regplot(x='Sales', y='Profit', data=df)
plt.title('Sales vs Profit with Regression Line')
plt.xlabel('Sales')
plt.ylabel('Profit')
plt.show()

: 