In [12]:
import pandas as pd
from datetime import timedelta
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
import pandas as pd
import re
import numpy as np

In [15]:
df = pd.read_excel("Ventas x producto 0116 al 0418.xlsx")
df.head()

Unnamed: 0,Fecha,Local,Turno,Producto ID,Codigo,Producto,Subcategoria,Cantidad,1/2 Porciones
0,2016-01-01 17:19:00,Soler,Noche,66,169.0,HOT Bologna,Rolls,0.5,1
1,2016-01-01 17:19:00,Soler,Noche,55,163.0,HOT Firenze,Rolls,1.0,2
2,2016-01-01 17:19:00,Soler,Noche,520,,HOT New York Philadelphia,Rolls,4.0,2
3,2016-01-01 17:19:00,Soler,Noche,67,172.0,Roll Bs As,Rolls,2.5,1
4,2016-01-01 17:19:00,Soler,Noche,1227,,Roll Calafate,Rolls,2.0,0


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 92209 entries, 0 to 92208
Columns: 405 entries, Fecha to Subcategoria_Rolls
dtypes: float64(2), int64(2), object(1), uint8(400)
memory usage: 38.7+ MB


In [36]:
# functions

def targetToInt(data, target):
    """Convert a discrete target into a numeric one.

    Parameters
    ----------
    data : pandas.Dataframe
        The entire dataset

    target : str
        The label corresponding to the target feature

    Returns
    -------
    data : pandas.Dataframe
        Entire dataset with the target feature modified

    """
    label_encoder = LabelEncoder()
    label_encoder = label_encoder.fit(data.loc[:, target])
    data.loc[:, target] = label_encoder.transform(data.loc[:, target])
    return data


def oneHotEncoder(data, subdata=None, target=None):
    """Create dummy variables of discrete features.

    Parameters
    ----------
    data : pandas.Dataframe
        The entire dataset

    subdata: str, optional
        If provided, only that columns of the entire dataframe will
        be encoded (target parameter will be ignored).

    target : str, optional
        The label corresponding to the target feature in order to
        avoid creating dummies for it.

    Returns
    -------
    data : pandas.Dataframe
        Entire dataset with dummies variables created.

    NOTE: beware of date features. Use expandDatatime before.

    See also
    --------
    expandDatatime

    """
    if (subdata is not None):
        # if(not isinstance(subdata, pd.DataFrame)):
        #   raise Exception("subdata must be a DataFrame object.")
        subdata = data[subdata].copy()
        new_data = pd.get_dummies(subdata)
        data = data.drop(columns=subdata)
        new_data = data.join(new_data).copy()
    elif(target):
        y = data.loc[:, target].copy()
        new_data = pd.get_dummies(data.drop(columns=target))
        new_data[target] = y
    else:
        new_data = pd.get_dummies(data)
    return new_data


def expandDatetime(data, datefields, drop=True, time=False):
    """Create several features from every datetime column.

    Add new columns to the Dataframe('Year', 'Month', 'Week',
    'Day', 'Dayofweek', 'Dayofyear', 'Is_month_end',
    'Is_month_start', 'Is_quarter_end', 'Is_quarter_start',
    'Is_year_end' and 'Is_year_start') for every feature
    containing the word "Date".

    This method is adapted from fastai.structured.add_datepart.

    Parameters
    ----------
    data: pandas.Dataframe
        The entire working dataset

    drop: boolean, optional
        Determines whether to drop the original datetime columns
        or not.

    time: boolean, optional
        If True adds aditional columns (Hour, Min and Sec).

    Returns
    -------
    new_data: pandas.Dataframe
        The entire dataframe with the new columns.

    """
    new_data = data.copy()
    fields_list = list(new_data)
    for field in fields_list:
        if field in datefields:
            fld = new_data[field]
            if not np.issubdtype(fld.dtype, np.datetime64):
                fld = pd.to_datetime(fld, infer_datetime_format=True)
                new_data[field] = fld
            targ_pre = re.sub('[Dd]ate$', '', field)
            attr = ['Year', 'Month', 'Week', 'Day', 'Dayofweek',
                    'Dayofyear', 'Is_month_end', 'Is_month_start',
                    'Is_quarter_end', 'Is_quarter_start', 'Is_year_end',
                    'Is_year_start']
            if time:
                attr = attr + ['Hour', 'Minute', 'Second']
            for n in attr:
                new_data[targ_pre + n] = getattr(fld.dt, n.lower()).astype(int)
            new_data[targ_pre + 'Elapsed'] = fld.astype(np.int64) // 10 ** 9
            if drop:
                new_data.drop(field, axis=1, inplace=True)
    return new_data


def manageNulls(data):
    newData = data.copy()
    null_columns = newData.columns[newData.isnull().any()]
    if(not null_columns.empty):
        print("WARNING: {} columns contain some null\
               content".format(null_columns))
        impNumeric = SimpleImputer(strategy='mean')
        impCategorical = SimpleImputer(strategy='most_frequent')
        for null_column in null_columns:
            if isinstance(null_column, str):
                newData[null_column] = \
                    impCategorical.fit_transform(newData[[null_column]])
            else:
                newData[null_column] = \
                    impNumeric.fit_transform(newData[[null_column]])
    return newData

interval_time = {
    "HOUR": 60,
    "DAY": (24 * 60),
}

def time_normalizer(column, interval=interval_time["DAY"]):
    """
    Convert datetime from continous time to range according to the interval.

    Parameters
    ----------
    column: pandas.core.series.Series (Dataframe column)
        datetime column to apply transformation.

    interval: int (minutes)
        amount of minutes group the datetime column.

    """
    if not isinstance(column, pd.core.series.Series):
        raise Exception("<<column>> is not pd.core.series.Series class." +
                        "It's: " + str(type(column)))

    if(interval == interval_time["DAY"]):
        return column.dt.strftime('%Y/%m/%d')

    # TODO: extends behaviour (not only day)
    return column


def simple_pivot_transform(dataset, target, groupby):
    """Pivot only on time feature counting or adding the target column."""
    if not isinstance(dataset, pd.DataFrame):
        raise Exception("<<df>> is not pandas.Dataframe class. It's: " +
                        str(type(dataset)))
    df = dataset.copy()
    oper = "count"
    if(False):
        df_piv = df.groupby(groupby).count()
    else:
        oper = "sum"
        df_piv = df.groupby(groupby).sum()

    return df_piv.reset_index(), oper
    


In [108]:
tdata = pd.read_excel("Ventas x producto 0116 al 0418.xlsx")
tdata.head()

Unnamed: 0,Fecha,Local,Turno,Producto ID,Codigo,Producto,Subcategoria,Cantidad,1/2 Porciones
0,2016-01-01 17:19:00,Soler,Noche,66,169.0,HOT Bologna,Rolls,0.5,1
1,2016-01-01 17:19:00,Soler,Noche,55,163.0,HOT Firenze,Rolls,1.0,2
2,2016-01-01 17:19:00,Soler,Noche,520,,HOT New York Philadelphia,Rolls,4.0,2
3,2016-01-01 17:19:00,Soler,Noche,67,172.0,Roll Bs As,Rolls,2.5,1
4,2016-01-01 17:19:00,Soler,Noche,1227,,Roll Calafate,Rolls,2.0,0


In [109]:
tdata = manageNulls(tdata)



In [110]:
groupby = "Fecha"
target = "Cantidad"
# pivoting numeric variables
if(groupby):
    pivot = groupby+"_normalized"
    # normalize in temporal data
    tdata[pivot] = time_normalizer(tdata[groupby])
    # create pivot df (from the normalized column)
    data_piv, oper = simple_pivot_transform(tdata, target=target,
                                            groupby=pivot)
data_piv.head()

Unnamed: 0,Fecha_normalized,Producto ID,Codigo,Cantidad,1/2 Porciones
0,2016/01/01,48195,7081.0,333.0,22
1,2016/01/02,49409,6407.0,305.0,33
2,2016/01/03,50708,6953.0,282.5,23
3,2016/01/04,36325,3679.0,202.5,9
4,2016/01/05,45373,6414.0,265.5,50


In [111]:
ohedata = ["Local", "Turno", "Producto", "Subcategoria"]
if(ohedata):
        # make disc dummies with datetime field (foreign key)
        disc_data_ohe = oneHotEncoder(
            data=tdata[ohedata + [groupby+"_normalized"]],
            subdata=ohedata,
            target=target)
        # pivot by date and totalize
        disc_data_ohe_tot = disc_data_ohe.groupby(groupby+"_normalized").sum()
        # binarize (optionally)
        disc_data_ohe_tot = (disc_data_ohe_tot > 0).astype(int).reset_index()


In [112]:
print(disc_data_ohe_tot["Fecha_normalized"].nunique(), len(disc_data_ohe_tot["Fecha_normalized"]))
disc_data_ohe_tot.head()

846 846


Unnamed: 0,Fecha_normalized,Local_Soler,Turno_Dia,Turno_Noche,Producto_$319 Toronto XL Groupon!,Producto_1/2 Geishas Salmon,Producto_1/2 Sashimis Salmon,Producto_1/2 Valencia+1/2 Delhi+1/2 Chop suey,Producto_2 Alaska XL,Producto_2 Alaska xl fiestas,...,Subcategoria_Fiestas,Subcategoria_Langostino,Subcategoria_Makis,Subcategoria_Nigiris y Sashimis,Subcategoria_Otro,Subcategoria_POP SALE,Subcategoria_PedidosYa,Subcategoria_Postres,Subcategoria_Promos,Subcategoria_Rolls
0,2016/01/01,1,0,1,0,0,0,0,0,0,...,0,1,1,1,1,0,1,0,1,1
1,2016/01/02,1,1,1,0,0,0,0,0,0,...,0,1,1,1,1,0,1,0,1,1
2,2016/01/03,1,1,1,0,0,0,0,0,0,...,0,1,1,1,1,0,1,0,1,1
3,2016/01/04,1,1,1,0,0,0,0,0,0,...,0,1,1,0,0,0,1,0,1,1
4,2016/01/05,1,0,1,0,0,0,0,0,0,...,0,1,1,1,1,0,1,0,1,1


In [120]:
df_int = disc_data_ohe_tot.merge(data_piv,
                         left_on=groupby+"_normalized",
                         right_on=groupby+"_normalized",
                         suffixes=('', '_'+oper))
df_int[groupby+"_normalized"] = pd.to_datetime(df_int[groupby+"_normalized"])
df_int.head()

Unnamed: 0,Fecha_normalized,Local_Soler,Turno_Dia,Turno_Noche,Producto_$319 Toronto XL Groupon!,Producto_1/2 Geishas Salmon,Producto_1/2 Sashimis Salmon,Producto_1/2 Valencia+1/2 Delhi+1/2 Chop suey,Producto_2 Alaska XL,Producto_2 Alaska xl fiestas,...,Subcategoria_Otro,Subcategoria_POP SALE,Subcategoria_PedidosYa,Subcategoria_Postres,Subcategoria_Promos,Subcategoria_Rolls,Producto ID,Codigo,Cantidad,1/2 Porciones
0,2016-01-01,1,0,1,0,0,0,0,0,0,...,1,0,1,0,1,1,48195,7081.0,333.0,22
1,2016-01-02,1,1,1,0,0,0,0,0,0,...,1,0,1,0,1,1,49409,6407.0,305.0,33
2,2016-01-03,1,1,1,0,0,0,0,0,0,...,1,0,1,0,1,1,50708,6953.0,282.5,23
3,2016-01-04,1,1,1,0,0,0,0,0,0,...,0,0,1,0,1,1,36325,3679.0,202.5,9
4,2016-01-05,1,0,1,0,0,0,0,0,0,...,1,0,1,0,1,1,45373,6414.0,265.5,50


In [121]:
datefields = ["Fecha"]
dates = pd.DataFrame(tdata[groupby+"_normalized"].unique(),
                             columns=[groupby+"_normalized"])
tsdata = expandDatetime(data=dates, datefields=[groupby+"_normalized"], drop=False)
tsdata.head()

Unnamed: 0,Fecha_normalized,Fecha_normalizedYear,Fecha_normalizedMonth,Fecha_normalizedWeek,Fecha_normalizedDay,Fecha_normalizedDayofweek,Fecha_normalizedDayofyear,Fecha_normalizedIs_month_end,Fecha_normalizedIs_month_start,Fecha_normalizedIs_quarter_end,Fecha_normalizedIs_quarter_start,Fecha_normalizedIs_year_end,Fecha_normalizedIs_year_start,Fecha_normalizedElapsed
0,2016-01-01,2016,1,53,1,4,1,0,1,0,1,0,1,1451606400
1,2016-01-02,2016,1,53,2,5,2,0,0,0,0,0,0,1451692800
2,2016-01-03,2016,1,53,3,6,3,0,0,0,0,0,0,1451779200
3,2016-01-04,2016,1,1,4,0,4,0,0,0,0,0,0,1451865600
4,2016-01-05,2016,1,1,5,1,5,0,0,0,0,0,0,1451952000


In [122]:
df_int[["Fecha_normalized", "Local_Soler"]].info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 846 entries, 0 to 845
Data columns (total 2 columns):
Fecha_normalized    846 non-null datetime64[ns]
Local_Soler         846 non-null int64
dtypes: datetime64[ns](1), int64(1)
memory usage: 19.8 KB


In [123]:
df_int_2 = tsdata.merge(df_int,
                         left_on=groupby+"_normalized",
                         right_on=groupby+"_normalized",
                         suffixes=('', '_'+oper))
df_int_2.head()

Unnamed: 0,Fecha_normalized,Fecha_normalizedYear,Fecha_normalizedMonth,Fecha_normalizedWeek,Fecha_normalizedDay,Fecha_normalizedDayofweek,Fecha_normalizedDayofyear,Fecha_normalizedIs_month_end,Fecha_normalizedIs_month_start,Fecha_normalizedIs_quarter_end,...,Subcategoria_Otro,Subcategoria_POP SALE,Subcategoria_PedidosYa,Subcategoria_Postres,Subcategoria_Promos,Subcategoria_Rolls,Producto ID,Codigo,Cantidad,1/2 Porciones
0,2016-01-01,2016,1,53,1,4,1,0,1,0,...,1,0,1,0,1,1,48195,7081.0,333.0,22
1,2016-01-02,2016,1,53,2,5,2,0,0,0,...,1,0,1,0,1,1,49409,6407.0,305.0,33
2,2016-01-03,2016,1,53,3,6,3,0,0,0,...,1,0,1,0,1,1,50708,6953.0,282.5,23
3,2016-01-04,2016,1,1,4,0,4,0,0,0,...,0,0,1,0,1,1,36325,3679.0,202.5,9
4,2016-01-05,2016,1,1,5,1,5,0,0,0,...,1,0,1,0,1,1,45373,6414.0,265.5,50


In [124]:
print(df_int_2["Fecha_normalized"].nunique(), len(df_int_2["Fecha_normalized"]))


846 846


In [4]:
df = pd.get_dummies(df)

In [5]:
df["Fecha"].nunique()

1665

In [6]:
# without pivot only (date, target).
df["Fecha"] = df["Fecha"].dt.strftime('%Y/%m/%d')
tdf = df.groupby("Fecha").sum().reset_index()
tdf.head()

Unnamed: 0,Fecha,Producto ID,Codigo,Cantidad,1/2 Porciones,Local_Soler,Turno_Dia,Turno_Noche,Producto_$319 Toronto XL Groupon!,Producto_1/2 Geishas Salmon,...,Subcategoria_Fiestas,Subcategoria_Langostino,Subcategoria_Makis,Subcategoria_Nigiris y Sashimis,Subcategoria_Otro,Subcategoria_POP SALE,Subcategoria_PedidosYa,Subcategoria_Postres,Subcategoria_Promos,Subcategoria_Rolls
0,2016/01/01,48195,4431.0,333.0,22,70.0,0.0,70.0,0.0,0.0,...,0.0,3.0,2.0,2.0,2.0,0.0,19.0,0.0,2.0,15.0
1,2016/01/02,49409,3607.0,305.0,33,71.0,2.0,69.0,0.0,0.0,...,0.0,2.0,1.0,2.0,2.0,0.0,22.0,0.0,2.0,15.0
2,2016/01/03,50708,4253.0,282.5,23,74.0,14.0,60.0,0.0,0.0,...,0.0,3.0,1.0,2.0,2.0,0.0,24.0,0.0,2.0,20.0
3,2016/01/04,36325,1679.0,202.5,9,48.0,6.0,42.0,0.0,0.0,...,0.0,1.0,1.0,0.0,0.0,0.0,19.0,0.0,2.0,11.0
4,2016/01/05,45373,3964.0,265.5,50,66.0,0.0,66.0,0.0,0.0,...,0.0,2.0,1.0,2.0,1.0,0.0,21.0,0.0,2.0,19.0


In [24]:
len(df), len(tdf)

(92209, 846)

In [32]:
df.dtypes == "object"

Fecha             True
Local             True
Turno             True
Producto ID      False
Codigo           False
Producto          True
Subcategoria      True
Cantidad         False
1/2 Porciones    False
dtype: bool

In [22]:
df.head()
# agarrar discretas +  fecha

# dummy
df_dum = pd.get_dummies(df) # sin la fecha!!
# pivot por fecha y totalizar

# binarizo? > 0 (optional)
df_dum

Unnamed: 0,Producto ID,Codigo,Cantidad,1/2 Porciones,Fecha_2016/01/01,Fecha_2016/01/02,Fecha_2016/01/03,Fecha_2016/01/04,Fecha_2016/01/05,Fecha_2016/01/06,...,Subcategoria_Fiestas,Subcategoria_Langostino,Subcategoria_Makis,Subcategoria_Nigiris y Sashimis,Subcategoria_Otro,Subcategoria_POP SALE,Subcategoria_PedidosYa,Subcategoria_Postres,Subcategoria_Promos,Subcategoria_Rolls
0,66,169.0,0.5,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,55,163.0,1.0,2,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,520,,4.0,2,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,67,172.0,2.5,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,1227,,2.0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
5,1195,,1.5,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
6,56,,3.5,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
7,69,164.0,1.5,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
8,9,158.0,4.5,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
9,62,,1.0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [21]:
tdf.head()

Unnamed: 0,Fecha,Producto ID,Codigo,Cantidad,1/2 Porciones
0,2016/01/01,48195,4431.0,333.0,22
1,2016/01/02,49409,3607.0,305.0,33
2,2016/01/03,50708,4253.0,282.5,23
3,2016/01/04,36325,1679.0,202.5,9
4,2016/01/05,45373,3964.0,265.5,50


In [11]:
mdf = df.merge(tdf, left_on="Fecha", right_on="Fecha")
len(mdf)

182613

In [14]:
len(df)

92209

In [39]:
type(df["Fecha"])

pandas.core.series.Series

In [31]:
df.sort_values("Fecha").head(2)

Unnamed: 0,Fecha,Local,Turno,Producto ID,Codigo,Producto,Subcategoria,Cantidad,1/2 Porciones
0,2016-01-01 17:19:00,Soler,Noche,66,169.0,HOT Bologna,Rolls,0.5,1
50,2016-01-01 17:19:00,Soler,Noche,869,,Pollo Tandoori,Langostino,2.0,0


In [38]:
df["Fecha"].dt.strftime('%Y/%m/%d').head()

0    2016/01/01
1    2016/01/01
2    2016/01/01
3    2016/01/01
4    2016/01/01
Name: Fecha, dtype: object

In [28]:
df.groupby(["Fecha"]).count().head()

Unnamed: 0_level_0,Local,Turno,Producto ID,Codigo,Producto,Subcategoria,Cantidad,1/2 Porciones
Fecha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2016-01-01 17:19:00,70,70,70,17,70,70,70,70
2016-01-02 11:12:00,2,2,2,0,2,2,2,2
2016-01-02 17:37:00,69,69,69,15,69,69,69,69
2016-01-03 11:03:00,14,14,14,3,14,14,14,14
2016-01-03 17:36:00,60,60,60,17,60,60,60,60


In [13]:
# define a delta
minutes = 60 * 24  # a day
delta = timedelta(minutes=minutes)
category = "Producto ID"
time = "Fecha"
target = "Cantidad"

In [8]:
rsuffix='_next'

# step 1: sort rows
df = df.sort_values([category, time])
# step 2: add next record to previous column
df_shift = df[[category, time]].shift(1)
df_int = df.join(df_shift, lsuffix='', rsuffix=rsuffix)
# if category is different, put date as None (last record)
df_int.loc[df_int[category] != df_int[category+rsuffix], time+rsuffix] = None

In [29]:
# step 3: group (sum or count??) records between the delta
df_int[df_int[time] <= (df_int[time+rsuffix] + delta)]
# df_time = df_int.set_index(time)
# df_time.rolling(2).sum()

Unnamed: 0,Fecha,Local,Turno,Producto ID,Codigo,Producto,Subcategoria,Cantidad,1/2 Porciones,Producto ID_next,Fecha_next
2224,2016-02-03 17:32:00,Soler,Noche,1,150.0,Roll Salmon Skin,Rolls,1.0,0,1.0,2016-02-02 17:49:00
158,2016-01-03 17:36:00,Soler,Noche,2,152.0,Roll California,Rolls,0.5,1,2.0,2016-01-03 11:03:00
660,2016-01-11 17:38:00,Soler,Noche,2,152.0,Roll California,Rolls,1.5,1,2.0,2016-01-10 17:43:00
1067,2016-01-17 17:32:00,Soler,Noche,2,152.0,Roll California,Rolls,0.5,1,2.0,2016-01-16 17:37:00
1332,2016-01-21 17:22:00,Soler,Noche,2,152.0,Roll California,Rolls,1.0,0,2.0,2016-01-20 17:30:00
2903,2016-02-12 10:09:00,Soler,Dia,2,152.0,Roll California,Rolls,0.5,1,2.0,2016-02-11 17:26:00
3077,2016-02-14 17:31:00,Soler,Noche,2,152.0,Roll California,Rolls,0.5,1,2.0,2016-02-13 17:35:00
4511,2016-03-01 16:15:00,Soler,Noche,2,152.0,Roll California,Rolls,0.5,1,2.0,2016-02-29 17:37:00
4551,2016-03-02 09:52:00,Soler,Dia,2,152.0,Roll California,Rolls,0.5,1,2.0,2016-03-01 16:15:00
4920,2016-03-06 09:56:00,Soler,Dia,2,152.0,Roll California,Rolls,0.5,1,2.0,2016-03-05 17:32:00


In [16]:
df_int[target].rolling(2).sum()

531      NaN
860      2.0
1002     2.0
1739     2.0
1862     2.0
2151     1.5
2224     1.5
2366     1.5
2853     1.5
3019     1.5
3891     1.0
142      1.0
158      1.0
336      1.5
456      2.0
523      2.0
593      1.5
660      2.0
920      2.5
991      2.5
1067     2.0
1256     1.0
1332     1.5
1486     1.5
1617     1.0
1852     1.5
2086     1.5
2217     1.5
2511     2.0
2843     2.0
        ... 
91245    9.0
91343    3.0
91436    3.0
91535    6.0
91646    8.0
91780    5.0
91923    5.0
91988    6.0
92047    5.0
92159    4.0
84678    3.0
85615    4.0
86463    3.0
87350    3.0
88146    4.0
88221    3.0
89157    2.0
90866    2.0
91644    2.0
85408    2.0
85453    2.0
85536    3.0
86163    3.0
86389    5.0
87274    5.0
88129    3.0
88428    3.0
89081    2.0
90035    2.0
90792    3.0
Name: Cantidad, Length: 92209, dtype: float64

In [21]:
test = pd.DataFrame({'B': [0, 1, 2, None, 4]},
                   index = [pd.Timestamp('20130101 09:00:00'),
                            pd.Timestamp('20130101 09:00:02'),
                            pd.Timestamp('20130101 09:00:03'),
                            pd.Timestamp('20130101 09:00:05'),
                            pd.Timestamp('20130101 09:00:06')])

In [24]:
test.rolling("2s").sum()

Unnamed: 0,B
2013-01-01 09:00:00,0.0
2013-01-01 09:00:02,1.0
2013-01-01 09:00:03,3.0
2013-01-01 09:00:05,
2013-01-01 09:00:06,4.0


In [25]:
test.index

DatetimeIndex(['2013-01-01 09:00:00', '2013-01-01 09:00:02',
               '2013-01-01 09:00:03', '2013-01-01 09:00:05',
               '2013-01-01 09:00:06'],
              dtype='datetime64[ns]', freq=None)

TypeError: '>' not supported between instances of 'str' and 'int'