In [133]:
from datetime import datetime, timedelta
import pandas as pd
import numpy as np

In [134]:
x = datetime(2022, 1, 9)

In [135]:
x.timestamp()

1641666600.0

In [136]:
y = timedelta(0,600)

In [137]:
x.timestamp() + y.seconds


1641667200.0

In [138]:
def round_dt(dt, delta):
    return datetime.min + round((dt - datetime.min) / delta) * delta

In [139]:
def impute_time_axis(time_axis: pd.Series, freq: timedelta = timedelta(days = 1)):
    if time_axis.isna().sum() == 0:
        return pd.to_datetime(time_axis)
    series = pd.to_datetime(time_axis)
    print(series[0].minute)
    time_list = series.to_list()
    null_intervals = []
    idx = 0
    while idx < len(time_list):
        if pd.isnull(time_list[idx]):
            start = time_list[idx-1]
            nulls = 0
            while pd.isnull(time_list[idx + nulls]):
                nulls += 1
            idx += nulls
            end = time_list[idx]
            null_intervals.append({
                "start": start,
                "end": end,
                "nulls": nulls + 2
            })
        else:
            idx += 1
    seconds_intervals =  [np.linspace(interval["start"].timestamp(), interval["end"].timestamp(), interval["nulls"]) for interval in null_intervals]
    x = []
    for i in seconds_intervals:
        for j in i[1:-1]:
            x.append(j)
    final_fill_values = [pd.to_datetime(round_dt(datetime.utcfromtimestamp(i), freq)) for i in x]
    column = []
    idx = 0
    for i in time_axis.to_list():
        if pd.isnull(i):
            column.append(final_fill_values[idx])
            idx += 1
        else:
            column.append(i)
    ser = pd.Series(pd.to_datetime(column))
    return ser
    

In [140]:

df = pd.DataFrame([[10  ,      "Mumbai"   ,  '2015-12-02 05:10:00'  ]   ,               
   [1 ,       "Bangalore" , '2015-12-02 05:20:00'],
   [12    ,    "Pune"    ,   '2015-12-02 05:40:00'],
   [4    ,    "Mumbai"  ,    np.nan],
   [15   ,     "Delhi" ,     '2015-12-02 06:00:00'],
   [6   ,     "Mumbai"   ,    np.nan],
   [14    ,    "Bangalore" ,   np.nan],
  [ 5   ,     "Pune"    ,   '2015-12-02 06:40:00'],
   [8   ,     "Mumbai"  ,   '2015-12-02 07:10:00']], columns = ['value', 'city', 'date'])
df

Unnamed: 0,value,city,date
0,10,Mumbai,2015-12-02 05:10:00
1,1,Bangalore,2015-12-02 05:20:00
2,12,Pune,2015-12-02 05:40:00
3,4,Mumbai,
4,15,Delhi,2015-12-02 06:00:00
5,6,Mumbai,
6,14,Bangalore,
7,5,Pune,2015-12-02 06:40:00
8,8,Mumbai,2015-12-02 07:10:00


In [141]:

df['date'] = impute_time_axis(df['date'], freq = timedelta(minutes=10))

10


In [142]:
df

Unnamed: 0,value,city,date
0,10,Mumbai,2015-12-02 05:10:00
1,1,Bangalore,2015-12-02 05:20:00
2,12,Pune,2015-12-02 05:40:00
3,4,Mumbai,2015-12-02 05:50:00
4,15,Delhi,2015-12-02 06:00:00
5,6,Mumbai,2015-12-02 06:10:00
6,14,Bangalore,2015-12-02 06:30:00
7,5,Pune,2015-12-02 06:40:00
8,8,Mumbai,2015-12-02 07:10:00


In [143]:
from tsa.interpolation import interpolate_dates as interpolate

In [144]:
df = interpolate(df, 'date', ['value'], timedelta(minutes=10))
df

Unnamed: 0,value,date
0,10.0,2015-12-02 05:10:00
1,1.0,2015-12-02 05:20:00
2,6.5,2015-12-02 05:30:00
3,12.0,2015-12-02 05:40:00
4,4.0,2015-12-02 05:50:00
5,15.0,2015-12-02 06:00:00
6,6.0,2015-12-02 06:10:00
7,10.0,2015-12-02 06:20:00
8,14.0,2015-12-02 06:30:00
9,5.0,2015-12-02 06:40:00


In [145]:
from tsa.properties import Stationarity

s = Stationarity(df, 'date', 'value')

In [146]:
s.test_stationarity()

1. ADF :  -4.654173074863313
2. P-Value :  0.00010251499296778517
3. Num Of Lags :  0
4. Num Of Observations Used For ADF Regression and Critical Values Calculation : 12
5. Critical Values :
	 1% :  -4.137829282407408
	 5% :  -3.1549724074074077
	 10% :  -2.7144769444444443


As p-value is inside the confidence interval of 95%, series is stationary.


In [148]:
from tsa.visualization import waterfall_plot
waterfall_plot(df, 'date','value', 'waterfall')

{'t': 'waterfall',
 'x': ['2015-12-02 05:10:00',
  '2015-12-02 05:20:00',
  '2015-12-02 05:30:00',
  '2015-12-02 05:40:00',
  '2015-12-02 05:50:00',
  '2015-12-02 06:00:00',
  '2015-12-02 06:10:00',
  '2015-12-02 06:20:00',
  '2015-12-02 06:30:00',
  '2015-12-02 06:40:00',
  '2015-12-02 06:50:00',
  '2015-12-02 07:00:00',
  '2015-12-02 07:10:00'],
 'y': [0, 1.0, 1.0, 6.5, 4.0, 4.0, 6.0, 6.0, 10.0, 5.0, 5.0, 6.0, 7.0],
 'i': [10.0, '-', 5.5, 5.5, '-', 11.0, '-', 4.0, 4.0, '-', 1.0, 1.0, 1.0],
 'd': ['-', 9.0, '-', '-', 8.0, '-', 9.0, '-', '-', 9.0, '-', '-', '-']}