In [316]:
import numpy as np
import pandas as pd

import datetime

# 1. Create synthetic time series

In [317]:
np.random.seed(0)

step = 10  # minutes
size = 10000  # rows

df = pd.DataFrame(
    {
        "datetime":[
            (
                datetime.datetime(2020, 10, 5, 0, 0, 0)
                + datetime.timedelta(minutes=x)
            )
            .strftime("%d.%m.%Y %H:%M:%S")
            for x in np.arange(0, size*step, step).tolist()
        ],
         "value1": np.random.randint(1, 100, size),
         "value2": np.random.randint(100, 200, size)
    }
)

df = (
    pd.concat(
        [df.loc[:, "datetime"].str.split(pat=" ", expand=True), df],
        axis=1
    )
    .drop("datetime", axis=1)
    .rename({0: "date", 1: "time"}, axis=1)
)

print(df.dtypes)
df.head()

date      object
time      object
value1     int64
value2     int64
dtype: object


Unnamed: 0,date,time,value1,value2
0,05.10.2020,00:00:00,45,103
1,05.10.2020,00:10:00,48,119
2,05.10.2020,00:20:00,65,114
3,05.10.2020,00:30:00,68,151
4,05.10.2020,00:40:00,68,189


# 2. Process created time series

In [318]:
df = (
    pd.concat(
        [
            pd.to_datetime(
                df.iloc[:, 0] + " " + df.iloc[:, 1],
                format="%d.%m.%Y %H:%M:%S"
            ),
            df.loc[:, "value1":"value2"]
        ], axis=1
    )
    .rename({0: "datetime"}, axis=1)
)

print(df.dtypes)
df.head()

datetime    datetime64[ns]
value1               int64
value2               int64
dtype: object


Unnamed: 0,datetime,value1,value2
0,2020-10-05 00:00:00,45,103
1,2020-10-05 00:10:00,48,119
2,2020-10-05 00:20:00,65,114
3,2020-10-05 00:30:00,68,151
4,2020-10-05 00:40:00,68,189


In [319]:
def group(df, freq_value):
    """
    Calculate weighted something by time periods.
    
    :param df:           Pandas DataFrame with columns: datetime, value1, value2
    :param freq_value:   time series frequency, str()
        (https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases)
    :return:             processed Pandas DataFrame
    """
    
    def custom_agg_func(data):
        result_agg = (
            data["value1"].agg(['min', 'max', 'std', 'var', 'count'])
        )
        return result_agg       
        
    result = (
        df
        .groupby(pd.Grouper(key="datetime", freq=freq_value, origin="start"))
        .apply(custom_agg_func)
        .reset_index(name="result")
    )
    
    return result

In [321]:
%time

df_processed = group(df, "30min")
df_processed.head(20)

CPU times: user 6 µs, sys: 0 ns, total: 6 µs
Wall time: 9.78 µs


Unnamed: 0,datetime,result
0,2020-10-05 00:00:00,17.616071
1,2020-10-05 00:30:00,18.205128
2,2020-10-05 01:00:00,16.829885
3,2020-10-05 01:30:00,28.101056
4,2020-10-05 02:00:00,16.176812
5,2020-10-05 02:30:00,20.952627
6,2020-10-05 03:00:00,23.957447
7,2020-10-05 03:30:00,15.304348
8,2020-10-05 04:00:00,11.000898
9,2020-10-05 04:30:00,25.610762
