In [None]:
# Step 1: Import pandas and load CSV
import pandas as pd

# Replace 'sales_data.csv' with your actual file name
df = pd.read_csv('sales_data.csv')

# Step 2: Display first and last 5 rows
print("First 5 Rows:\n", df.head())
print("\nLast 5 Rows:\n", df.tail())

# Step 3: Column names and datatypes
print("\nColumn Names:\n", df.columns)
print("\nData Types:\n", df.dtypes)

# Step 4: Shape, missing values, summary stats
print("\nShape of dataset:", df.shape)
print("\nMissing Values:\n", df.isnull().sum())
print("\nSummary Statistics:\n", df.describe())

# Step 5: Filter sales > ₹10,000
high_sales = df[df['Revenue'] > 10000]
print("\nSales above ₹10,000:\n", high_sales)

# Step 6: Add Profit Margin column
df['Profit Margin'] = ((df['Revenue'] - df['Cost Price']) / df['Revenue']) * 100
print("\nData with Profit Margin:\n", df.head())

# Step 7: Top selling product based on revenue
top_product = df.groupby('Product')['Revenue'].sum().sort_values(ascending=False).head(1)
print("\nTop Selling Product by Revenue:\n", top_product)

# Step 8: Group by Month and calculate total sales
df['Order Date'] = pd.to_datetime(df['Order Date'])
df['Month'] = df['Order Date'].dt.month
monthly_sales = df.groupby('Month')['Revenue'].sum()
print("\nTotal Sales per Month:\n", monthly_sales)

# Step 9: Sort data by highest revenue, show top 5
top_revenue = df.sort_values(by='Revenue', ascending=False).head(5)
print("\nTop 5 Entries by Revenue:\n", top_revenue)

# Step 10: Average revenue per product category
avg_rev_category = df.groupby('Category')['Revenue'].mean()
print("\nAverage Revenue per Category:\n", avg_rev_category)

# Step 11: Underperforming products (sales below ₹2000)
underperformers = df[df['Revenue'] < 2000]
print("\nUnderperforming Products (Revenue < ₹2000):\n", underperformers)

# Step 12: Save cleaned data
df.to_csv('cleaned_sales_data.csv', index=False)
print("\nCleaned data saved to 'cleaned_sales_data.csv'.")

# Step 13: Describe key sales trends
print("\n--- Key Insights ---")
print(f"Total Revenue: ₹{df['Revenue'].sum():,.2f}")
print(f"Total Orders: {len(df)}")
print(f"Top Product: {top_product.index[0]} with ₹{top_product.values[0]:,.2f} in revenue")
print(f"Best Month (₹): {monthly_sales.idxmax()} with ₹{monthly_sales.max():,.2f}")
print(f"Worst Month (₹): {monthly_sales.idxmin()} with ₹{monthly_sales.min():,.2f}")
