In [1]:
# For data manipulation and numerical computations

import numpy as np
import pandas as pd
%matplotlib inline        

In [2]:
# %matplotlib inline used in Jupyter notebooks to display Matplotlib plots directly within the notebook interface. 

In [3]:
# For data visualization

import matplotlib.pyplot as plt
import seaborn as sns

# For 3D Visualization

import plotly.express as px
import plotly.graph_objects as go   
import plotly.io as pio
pio.templates.default = 'plotly_white'

In [4]:
# Case Study 1: Sales Analysis
# Problem Statement:
# You have a dataset containing sales data with information like product ID, sales quantity, and revenue. 
# Perform the following tasks:

# Load the dataset and inspect its structure.
# Clean the data (handle missing values, duplicates, etc.).
# Calculate total revenue and quantity sold for each product.
# Find the top-selling products.
# Analyze monthly sales trends.

# Sales Analysis

# Load the dataset and inspect its structure.

In [5]:
np.random.seed(5)
product_id = np.random.randint(1,10,1000)
quantity = np.random.randint(1,10,1000)
sales = np.random.randint(100,500,1000)
revenue = sales * quantity

df_sales = pd.DataFrame({'Product ID':product_id,
                         'Quantity':quantity,
                         'Sales':sales,
                         'Revenue':revenue})
df_sales.head()

Unnamed: 0,Product ID,Quantity,Sales,Revenue
0,4,2,371,742
1,7,4,180,720
2,7,5,244,1220
3,1,4,231,924
4,9,7,399,2793


In [6]:
df_sales.shape

(1000, 4)

In [7]:
df_sales.columns

Index(['Product ID', 'Quantity', 'Sales', 'Revenue'], dtype='object')

# Clean the data (handle missing values, duplicates, etc.).


In [8]:
df_sales.isnull().sum()

Product ID    0
Quantity      0
Sales         0
Revenue       0
dtype: int64

In [9]:
df_sales.duplicated().sum()

14

# Calculate total revenue and quantity sold for each product.


In [10]:
total_revenue = df_sales['Revenue'].sum()
print('Total Revenue :- ', total_revenue)

Total Revenue :-  1526645


In [24]:
product = df_sales.groupby('Product ID')['Quantity'].sum()
product

Product ID
1    618
2    764
3    445
4    521
5    491
6    562
7    568
8    538
9    507
Name: Quantity, dtype: int32

In [25]:
fig = px.bar(product, labels = {"index":"Products","value":"Quantity Sold"}, title = "Quantity Sold For Each Product")
fig.show()

# Find the top-selling products.


In [26]:
top = df_sales.groupby('Product ID')['Sales'].sum()
name = top.idxmax()
sell = top.max()

print('Top Selling Product')
print('Product Name :- ',name)
print('Total Sell :- ',sell)

Top Selling Product
Product Name :-  2
Total Sell :-  42300


# Analyze monthly sales trends.
