# Numerical Operations & Statistics

Numerical analysis patterns commonly used in data analysis and time series work.


In [2]:
import pandas as pd
import numpy as np

df = pd.DataFrame({
    'date': pd.date_range('2023-01-01', periods=6, freq='D'),
    'sales': [100, 120, 130, 125, 160, 170],
    'profit': [20, 25, 30, 28, 40, 45]
})

df = df.set_index('date')
df

Unnamed: 0_level_0,sales,profit
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-01-01,100,20
2023-01-02,120,25
2023-01-03,130,30
2023-01-04,125,28
2023-01-05,160,40
2023-01-06,170,45


## Descriptive statistics

In [3]:
df.describe()

Unnamed: 0,sales,profit
count,6.0,6.0
mean,134.166667,31.333333
std,26.15658,9.416298
min,100.0,20.0
25%,121.25,25.75
50%,127.5,29.0
75%,152.5,37.5
max,170.0,45.0


In [4]:
df.mean()

sales     134.166667
profit     31.333333
dtype: float64

In [5]:
df.std()

sales     26.156580
profit     9.416298
dtype: float64

## Correlation & covariance

In [6]:
df.corr()

Unnamed: 0,sales,profit
sales,1.0,0.996083
profit,0.996083,1.0


In [7]:
df.cov()

Unnamed: 0,sales,profit
sales,684.166667,245.333333
profit,245.333333,88.666667


## Quantiles

In [8]:
df.quantile(0.25)

sales     121.25
profit     25.75
Name: 0.25, dtype: float64

In [9]:
df.quantile([0.25, 0.5, 0.75])

Unnamed: 0,sales,profit
0.25,121.25,25.75
0.5,127.5,29.0
0.75,152.5,37.5


## Rolling statistics

In [10]:
df.rolling(window=2).mean()

Unnamed: 0_level_0,sales,profit
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-01-01,,
2023-01-02,110.0,22.5
2023-01-03,125.0,27.5
2023-01-04,127.5,29.0
2023-01-05,142.5,34.0
2023-01-06,165.0,42.5


In [11]:
df['sales'].rolling(window=3).sum()

date
2023-01-01      NaN
2023-01-02      NaN
2023-01-03    350.0
2023-01-04    375.0
2023-01-05    415.0
2023-01-06    455.0
Name: sales, dtype: float64

## Expanding windows

In [12]:
df.expanding().mean()

Unnamed: 0_level_0,sales,profit
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-01-01,100.0,20.0
2023-01-02,110.0,22.5
2023-01-03,116.666667,25.0
2023-01-04,118.75,25.75
2023-01-05,127.0,28.6
2023-01-06,134.166667,31.333333


In [13]:
df['profit'].expanding().sum()

date
2023-01-01     20.0
2023-01-02     45.0
2023-01-03     75.0
2023-01-04    103.0
2023-01-05    143.0
2023-01-06    188.0
Name: profit, dtype: float64

## Window functions

In [14]:
df['sales_rank'] = df['sales'].rank()
df

Unnamed: 0_level_0,sales,profit,sales_rank
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01,100,20,1.0
2023-01-02,120,25,2.0
2023-01-03,130,30,4.0
2023-01-04,125,28,3.0
2023-01-05,160,40,5.0
2023-01-06,170,45,6.0


In [15]:
df['rolling_zscore'] = (df['sales'] - df['sales'].rolling(3).mean()) / df['sales'].rolling(3).std()
df

Unnamed: 0_level_0,sales,profit,sales_rank,rolling_zscore
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-01-01,100,20,1.0,
2023-01-02,120,25,2.0,
2023-01-03,130,30,4.0,0.872872
2023-01-04,125,28,3.0,0.0
2023-01-05,160,40,5.0,1.144586
2023-01-06,170,45,6.0,0.77588
