In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 1. Load the sales data
df = pd.read_csv('sales_data.csv')

# 2. Display the first and last five rows
print("First 5 rows:\n", df.head())
print("\nLast 5 rows:\n", df.tail())

# 3. Column names and data types
print("\nColumn Names:", df.columns.tolist())
print("\nData Types:\n", df.dtypes)

# 4. Shape, missing values, summary statistics
print("\nShape:", df.shape)
print("\nMissing Values:\n", df.isnull().sum())
print("\nSummary Statistics:\n", df.describe(include='all'))

# 5. Filter out sales above ₹10,000
high_sales = df[df['Revenue'] > 10000]
print("\nSales above ₹10,000:\n", high_sales)

# 6. Add "Profit Margin" column (Profit Margin = (Revenue - Cost) / Revenue)
df['Profit Margin'] = ((df['Revenue'] - df['Cost Price']) / df['Revenue']).round(2)

# 7. Top-selling product based on total revenue
top_product = df.groupby('Product')['Revenue'].sum().idxmax()
print("\nTop-Selling Product by Revenue:", top_product)

# 8. Group sales by month
df['Order Date'] = pd.to_datetime(df['Order Date'])
df['Month'] = df['Order Date'].dt.to_period('M')
monthly_sales = df.groupby('Month')['Revenue'].sum()
print("\nMonthly Sales:\n", monthly_sales)

# 9. Top 5 entries by highest revenue
top_5_sales = df.sort_values(by='Revenue', ascending=False).head(5)
print("\nTop 5 Sales by Revenue:\n", top_5_sales)

# 10. Average revenue per product category
avg_revenue_category = df.groupby('Category')['Revenue'].mean()
print("\nAverage Revenue per Category:\n", avg_revenue_category)

# 11. Underperforming products (Revenue < ₹1000)
underperformers = df[df['Revenue'] < 1000]
print("\nUnderperforming Products:\n", underperformers[['Product', 'Revenue']])

# 12. Save cleaned DataFrame
df.to_csv('cleaned_sales_data.csv', index=False)

# 13. Visualizations
plt.figure(figsize=(12, 6))
# Monthly Sales Trend
monthly_sales.plot(kind='line', marker='o', title='Monthly Sales Trend')
plt.ylabel('Revenue')
plt.grid(True)
plt.tight_layout()
plt.show()

# Top Product Categories
plt.figure(figsize=(10, 5))
sns.barplot(x=avg_revenue_category.index, y=avg_revenue_category.values)
plt.title('Average Revenue per Category')
plt.ylabel('Revenue')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


First 5 rows:
       Product     Category  Units Sold  Price per Unit  Total Sales
0      Laptop  Electronics          10           50000       500000
1      Mobile  Electronics          25           20000       500000
2      Tablet  Electronics          15           30000       450000
3  Headphones  Accessories          50            2000       100000
4       Mouse  Accessories          40            1500        60000

Last 5 rows:
       Product     Category  Units Sold  Price per Unit  Total Sales
0      Laptop  Electronics          10           50000       500000
1      Mobile  Electronics          25           20000       500000
2      Tablet  Electronics          15           30000       450000
3  Headphones  Accessories          50            2000       100000
4       Mouse  Accessories          40            1500        60000

Column Names: ['Product', 'Category', 'Units Sold', 'Price per Unit', 'Total Sales']

Data Types:
 Product           object
Category          object
Unit

KeyError: 'Revenue'

In [None]:
!pip install matplotlib seaborn


Collecting matplotlib
  Downloading matplotlib-3.10.6-cp313-cp313-win_amd64.whl.metadata (11 kB)
Collecting seaborn
  Using cached seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Using cached contourpy-1.3.3-cp313-cp313-win_amd64.whl.metadata (5.5 kB)
Collecting cycler>=0.10 (from matplotlib)
  Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.59.2-cp313-cp313-win_amd64.whl.metadata (111 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Using cached kiwisolver-1.4.9-cp313-cp313-win_amd64.whl.metadata (6.4 kB)
Collecting pillow>=8 (from matplotlib)
  Using cached pillow-11.3.0-cp313-cp313-win_amd64.whl.metadata (9.2 kB)
Collecting pyparsing>=2.3.1 (from matplotlib)
  Using cached pyparsing-3.2.3-py3-none-any.whl.metadata (5.0 kB)
Downloading matplotlib-3.10.6-cp313-cp313-win_amd64.whl (8.1 MB)
   ---------------------------------------- 0.0/8.1 MB ? e


[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip
