# Real‑World Use Cases

Applying Pandas patterns to realistic, end‑to‑end data analysis workflows.


In [1]:
import pandas as pd
import numpy as np

## Exploratory Data Analysis

In [2]:
df = pd.DataFrame({
    'category': ['A', 'B', 'A', 'C', 'B', 'A'],
    'sales': [100, 150, 120, 90, 160, 200],
    'profit': [20, 35, 25, 10, 40, 60]
})

df

Unnamed: 0,category,sales,profit
0,A,100,20
1,B,150,35
2,A,120,25
3,C,90,10
4,B,160,40
5,A,200,60


In [3]:
df.describe()

Unnamed: 0,sales,profit
count,6.0,6.0
mean,136.666667,31.666667
std,41.311822,17.511901
min,90.0,10.0
25%,105.0,21.25
50%,135.0,30.0
75%,157.5,38.75
max,200.0,60.0


In [4]:
df.groupby('category')['sales'].mean()

category
A    140.0
B    155.0
C     90.0
Name: sales, dtype: float64

## Business reporting

In [6]:
report = (
    df.groupby('category')
      .agg(total_sales=('sales', 'sum'), avg_profit=('profit', 'mean'))
      .sort_values('total_sales', ascending=False)
)

report

Unnamed: 0_level_0,total_sales,avg_profit
category,Unnamed: 1_level_1,Unnamed: 2_level_1
A,420,35.0
B,310,37.5
C,90,10.0


In [7]:
report['margin'] = report['avg_profit'] / report['total_sales']
report

Unnamed: 0_level_0,total_sales,avg_profit,margin
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,420,35.0,0.083333
B,310,37.5,0.120968
C,90,10.0,0.111111


## Financial data analysis

In [8]:
prices = pd.Series(
    [100, 102, 101, 105, 110],
    index=pd.date_range('2023-01-01', periods=5)
)

prices

2023-01-01    100
2023-01-02    102
2023-01-03    101
2023-01-04    105
2023-01-05    110
Freq: D, dtype: int64

In [9]:
returns = prices.pct_change()
returns

2023-01-01         NaN
2023-01-02    0.020000
2023-01-03   -0.009804
2023-01-04    0.039604
2023-01-05    0.047619
Freq: D, dtype: float64

In [10]:
returns.rolling(3).mean()

2023-01-01         NaN
2023-01-02         NaN
2023-01-03         NaN
2023-01-04    0.016600
2023-01-05    0.025806
Freq: D, dtype: float64

## Time series forecasting prep

In [11]:
ts = pd.DataFrame({
    'date': pd.date_range('2023-01-01', periods=10),
    'sales': [100,120,130,125,140,160,155,170,180,190]
}).set_index('date')

ts

Unnamed: 0_level_0,sales
date,Unnamed: 1_level_1
2023-01-01,100
2023-01-02,120
2023-01-03,130
2023-01-04,125
2023-01-05,140
2023-01-06,160
2023-01-07,155
2023-01-08,170
2023-01-09,180
2023-01-10,190


In [12]:
ts['lag_1'] = ts['sales'].shift(1)
ts['lag_7'] = ts['sales'].shift(7)
ts

Unnamed: 0_level_0,sales,lag_1,lag_7
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01,100,,
2023-01-02,120,100.0,
2023-01-03,130,120.0,
2023-01-04,125,130.0,
2023-01-05,140,125.0,
2023-01-06,160,140.0,
2023-01-07,155,160.0,
2023-01-08,170,155.0,100.0
2023-01-09,180,170.0,120.0
2023-01-10,190,180.0,130.0


In [None]:
ts.dropna()

## Data preprocessing pipelines

In [13]:
clean = (
    df
    .assign(margin=lambda x: x['profit'] / x['sales'])
    .query('sales > 100')
    .groupby('category', as_index=False)
    .mean()
)

clean

Unnamed: 0,category,sales,profit,margin
0,A,160.0,42.5,0.254167
1,B,155.0,37.5,0.241667


## Kaggle‑style datasets

In [14]:
kaggle_like = pd.DataFrame({
    'Age': [22, 38, 26, 35, None],
    'Fare': [7.25, 71.83, 7.92, 53.10, 8.05],
    'Sex': ['male', 'female', 'female', 'female', 'male']
})

kaggle_like

Unnamed: 0,Age,Fare,Sex
0,22.0,7.25,male
1,38.0,71.83,female
2,26.0,7.92,female
3,35.0,53.1,female
4,,8.05,male


In [15]:
kaggle_like['Age'] = kaggle_like['Age'].fillna(kaggle_like['Age'].median())
kaggle_like

Unnamed: 0,Age,Fare,Sex
0,22.0,7.25,male
1,38.0,71.83,female
2,26.0,7.92,female
3,35.0,53.1,female
4,30.5,8.05,male


In [16]:
pd.get_dummies(kaggle_like, columns=['Sex'], drop_first=True)

Unnamed: 0,Age,Fare,Sex_male
0,22.0,7.25,True
1,38.0,71.83,False
2,26.0,7.92,False
3,35.0,53.1,False
4,30.5,8.05,True
