## Pandas Windowing Functions
* 4 windowing functions are available in pandas
    * rolling: 
    * expanding:
    * ewm: 
    * weighted:

In [2]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

### Expanding window
* kümülatif toplamı verir

In [7]:
df = pd.DataFrame({"A":['a','b','c','a','b','a'], 'B':range(6)})

df.groupby('A').sum()
df.groupby('A').agg(['sum','max','min','mean','std','count'])

Unnamed: 0_level_0,B,B,B,B,B,B
Unnamed: 0_level_1,sum,max,min,mean,std,count
A,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
a,8,5,0,2.666667,2.516611,3
b,5,4,1,2.5,2.12132,2
c,2,2,2,2.0,,1


In [8]:
#birikimli toplam
df.groupby('A')\
  .expanding()\
    .sum()

#birikimli toplam
df.groupby('A')\
  .expanding()\
    .agg(['sum','mean'])

Unnamed: 0_level_0,Unnamed: 1_level_0,B,B
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,mean
A,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
a,0,0.0,0.0
a,3,3.0,1.5
a,5,8.0,2.666667
b,1,1.0,1.0
b,4,5.0,2.5
c,2,2.0,2.0


### Rolling Window
* Sabit veya değişken kayan pencere oluşturma imkanı verir.

In [10]:
seri = pd.Series(range(5))
seri

#
seri.rolling(window=2).sum()

0    NaN
1    1.0
2    3.0
3    5.0
4    7.0
dtype: float64

In [14]:
## pencereleri görelim
for pencere in seri.rolling(window=2):
    print(f"{pencere.tolist()} -> {sum(pencere.tolist())}")  

[0] -> 0
[0, 1] -> 1
[1, 2] -> 3
[2, 3] -> 5
[3, 4] -> 7


#### Zaman bazlı pencere

In [22]:
sr = pd.Series(range(5), index=pd.date_range('2020-01-01', periods=5, freq='D'))

#
# for pencere in sr.rolling(window='3D'): #3D ile 3 günlük pencere oluşturduk
#     print(pencere.tolist())

print([s.tolist() for s in sr.rolling(window='3D', center=True)])
#
sr.rolling(window='3D', center=True).sum()

[[0, 1], [0, 1, 2], [1, 2, 3], [2, 3, 4], [3, 4]]


2020-01-01    1.0
2020-01-02    3.0
2020-01-03    6.0
2020-01-04    9.0
2020-01-05    7.0
Freq: D, dtype: float64

#### min pencere

In [26]:
s = pd.Series([np.nan, 1, 2, np.nan, 4, 5, 6, 7])

s

#
s.rolling(window=3).sum()
s.rolling(window=3, min_periods=1).sum() #min_periods=1 ile eksik veriye rağmen işlem yapar. 1 dolu veri olması yeterli
s.rolling(window=3, min_periods=2).sum() #en az 2 tane dolu değer olmalı ki hesap yapsın

0     NaN
1     NaN
2     3.0
3     3.0
4     6.0
5     9.0
6    15.0
7    18.0
dtype: float64

#### Pencerelerin Uç Noktaları
* matematikteki parantezler gibi düşünülebilir
  * `right (]` sağ uç dahil
  * `left [)` sol uç dahil
  * `both []` her iki uç da dahil
  * `neither ()` her iki uç da dahil değil

In [3]:
df = pd.DataFrame( {'x':1}
             ,index=[
                    pd.Timestamp('20130101 09:00:01'),
                    pd.Timestamp('20130101 09:00:02'),
                    pd.Timestamp('20130101 09:00:03'),
                    pd.Timestamp('20130101 09:00:04'),
                    pd.Timestamp('20130101 09:00:06')]
        )

#
df["right"] = df.rolling('2s', closed='right').x.sum()
df["both"] = df.rolling('2s', closed='both').x.sum()
df["left"] = df.rolling('2s', closed='left').x.sum()
df["neither"] = df.rolling('2s', closed='neither').x.sum()


df

Unnamed: 0,x,right,both,left,neither
2013-01-01 09:00:01,1,1.0,1.0,,
2013-01-01 09:00:02,1,2.0,2.0,1.0,1.0
2013-01-01 09:00:03,1,2.0,3.0,2.0,1.0
2013-01-01 09:00:04,1,2.0,3.0,2.0,1.0
2013-01-01 09:00:06,1,1.0,2.0,1.0,


### Özel Pencere Oluşturma
* sabit olmayan uzaklıklar için pencerelereme yapılabilir.

In [6]:
from pandas.api.indexers import VariableOffsetWindowIndexer

#
df = pd.DataFrame(range(10), index = pd.date_range('2020',periods=10), columns=['A'])

#bir iş günü kaydırma yapmak için
indexer = VariableOffsetWindowIndexer(index=df.index, offset=pd.offsets.BDay(1))

#Bir iş günü gördüğünde önceki pencereyi kapatıp yenmi pencere açar.
df.rolling(window=indexer).sum()

#
df["Rolling"] = df.rolling(window=indexer).sum()
df["GünAdi"] = df.index.day_name() #gün adlarını alıyoruz

df

Unnamed: 0,A,Rolling,GünAdi
2020-01-01,0,0.0,Wednesday
2020-01-02,1,1.0,Thursday
2020-01-03,2,2.0,Friday
2020-01-04,3,3.0,Saturday
2020-01-05,4,7.0,Sunday
2020-01-06,5,12.0,Monday
2020-01-07,6,6.0,Tuesday
2020-01-08,7,7.0,Wednesday
2020-01-09,8,8.0,Thursday
2020-01-10,9,9.0,Friday
