In [None]:
import matplotlib.pyplot as plt
import numpy as np
import copy
import time
import multiprocessing
from datetime import datetime
import pandas as pd
from functools import partial
from typing import Optional, Union
from __future__ import annotations
import scipy.stats as st
from datetime import datetime, timedelta
import staircase as sc
from numba import jit, prange
from matplotlib.dates import DayLocator, HourLocator, DateFormatter, drange

class Basis():
    T = Union[int,float]
    
    __slots__ = ('_base')
    
    def _default_base(self,x:T) -> np.ufunc:
            return np.heaviside(x,1)
        
    def __init__(self,bfunc=None) -> None:
        if bfunc is None:
            bfunc = self._default_base
            
        self._base = bfunc
    
    def base(self) -> np.ufunc:
        return self._base


class Step():
    T = Union[int,float,pd.Timestamp]

    __slots__ = ('start','start_ts','weight','_end','basis','_base')
    
    def __init__(self, start:T,end:T = None, weight:T = 1, basis:Basis = Basis()) -> None:
        self.start = start
        
        if type(start) is pd.Timestamp:
            self.start_ts = self.start.timestamp()
        else:
            self.start_ts = start

        self.weight = weight
        self._end = end
        self.basis = basis
        self._base = self.basis.base()
        
        if end is not None:
            self._end = Step(end,end=None,weight = -1*self.weight,basis=self.basis)
            

    def rebase(self,new_basis:Basis) -> None:
        self.basis = new_basis
        self._base = self.basis.base()
        
    def __gt__(self, other) -> bool:
        if type(other) is Step:
            return self.start_ts < other.start_ts
        elif type(other) is pd.Timestamp:
            return self.start_ts < other.timestamp()
        else:
            return self.start_ts < other
    
    def __gt__(self, other) -> bool:
        if type(other) is Step:
            return self.start_ts > other.start_ts
        elif type(other) is pd.Timestamp:
            return self.start_ts > other.timestamp()
        else:
            return self.start_ts > other
            
    def step(self,x:T) -> float:

        if type(x[0]) in [pd.Timestamp,datetime]:
            xf = np.asarray([t.timestamp()-self.start_ts for t in x])
        else:
            xf = np.asarray([t-self.start_ts for t in x])
        return self.weight*self._base(xf)
    
    def end(self) -> Step:
        return self._end

    def __add__(self,other:Step) -> Step:
        return Step(self.start + other.start)

    def __sub__(self,other:Step) -> Step:
        return Step(self.start - other.start)

    def __repr__(self) -> str:
        return str(self.start)


class Steps():
    T = Union[int,float,pd.Timestamp]
    V = Union[Step,'Optional[Steps]',int,float]
    
    def __init__(self) -> None:
        self._steps = []

    @staticmethod
    def _fill_missing(dt, fill):
        if pd.isnull(dt):
            return fill
        else:
            return dt
    
    @staticmethod
    def stepify(data,start='start',end=None,weight=None) -> Steps:
        if type(data) == pd.DataFrame:
            if data[start].dtypes == np.dtype('datetime64[ns]'):
                if end is not None:
                    if data[end].dtypes == np.dtype('datetime64[ns]'):            
                        if weight is None:
                            return data.apply(lambda x: Step(Steps._fill_missing(pd.Timestamp(x[start]),pd.Timestamp((data[end].min()).date())),Steps._fill_missing(pd.Timestamp(x[end]),pd.Timestamp((data[start].max()).date()))),axis=1)
                        else:
                            return data.apply(lambda x: Step(Steps._fill_missing(pd.Timestamp(x[start]),pd.Timestamp((data[end].min()).date())),Steps._fill_missing(pd.Timestamp(x[end]),pd.Timestamp((data[start].max()).date())),x[weight]),axis=1)
                    else:
                        raise TypeError("end data must be same type as start data") 
                else:
                    if weight is None:
                        return data.apply(lambda x: Step(Steps._fill_missing(pd.Timestamp(x[start]),pd.Timestamp((data[end].min()).date()))),axis=1)
                    else:
                        return data.apply(lambda x: Step(Steps._fill_missing(pd.Timestamp(x[start]),pd.Timestamp((data[end].min()).date())),weight=x[weight]),axis=1)
            elif data[start].dtypes in [int,float]:
                if end is not None:
                    if data[end].dtypes in [int,float]:            
                        if weight is None:
                            return data.apply(lambda x: Step(x[start],x[end]),axis=1)
                        else:
                            return data.apply(lambda x: Step(x[start],x[end],x[weight]),axis=1)
                    else:
                        raise TypeError("end data must be same type as start data") 
                else:
                    if weight is None:
                        return data.apply(lambda x: Step(x[start]),axis=1)
                    else:
                        return data.apply(lambda x: Step(x[start],weight=x[weight]),axis=1)
            else:
                raise TypeError("start data can only be intger, float or datetime")    
        else:
            raise TypeError("input data must be a Dataframe")
                
        
    def add(self,steps:[Step]) -> Steps:
        
        end_steps = [s.end() for s in steps if s.end() is not None]
        self._steps.extend(copy.deepcopy(steps))
        self._steps.extend(copy.deepcopy(end_steps))
        self._steps.sort()
        return self

    def __add__(self,b:V) -> Steps:
        if type(b) == Step:
            combine = self.copy()
            combine._steps.extend([copy.deepcopy(b)])
            if b.end() is not None:
                    combine._steps.extend([copy.deepcopy(b.end())])
            
        elif type(b) == Steps:
            combine = self.copy()
            combine._steps.extend(copy.deepcopy(b.steps()))
        else:
            combine = self.copy()
            combine._steps.extend([Step(-np.Inf,weight=b)])
            
        return combine

    def copy(self) -> Steps:
        #new_instance = copy.copy(self)
        new_instance = Steps()
        new_instance._steps.extend(copy.deepcopy(self._steps))
        return new_instance
        
    def steps(self) -> [Step]:
        return self._steps

    def rebase(self,new_basis:Basis = Basis()) -> None:
        for s in self._steps:
            s.rebase(new_basis)
    
    def direct_step_function(self):
        data = []
        
        for s in self._steps:
            if abs(s.start_ts) < np.Inf:
                data.append({'ts': s.start, 'step': s.weight})
        
        return pd.DataFrame.from_dict(data)
    
    def _pcalc(self, st,val):
        return st.step(val)
    
    #@jit(parallel=True)
    def step_function(self, x:T) -> float:
        h = np.zeros((len(self._steps),len(x)))
        w = np.ones(len(self._steps))
        for i in range(len(self._steps)):
            h[i] = self._steps[i].step(x)
            
        return np.dot(w,h)

def generate_stuff(): 

    x = Step(3.75)
    y = Step(5.6,weight=-1)
    z = Step(7.2)
    q = Step(12)
    p = Step(9,weight=-1)
    p1 = Step(9.5,weight=-1)

    ss1 = Steps()
    ss1 = ss1.add([z,x,y])

    ss2 = Steps()
    ss2 = ss2.add([q,x,p])

    ss3 = ss2 + ss1
    ss4 = ss3+q+p1


In [None]:
df = pd.read_csv(r"data/vessel_queue.csv", parse_dates=['enter', 'leave'], dayfirst=True)

def fill_missing(dt, fill):
    if pd.isnull(dt):
        return fill
    else:
        return dt

df.enter = df.enter.apply(pd.Timestamp)
df.leave = df.leave.apply(pd.Timestamp)

sst = Steps.stepify(df,'enter','leave')
#sst = df.apply(lambda x: Step(fill_missing(pd.Timestamp(x.enter),pd.Timestamp((df.leave.min()).date())),fill_missing(pd.Timestamp(x.leave),pd.Timestamp((df.enter.max()).date()))),axis=1)
#sst = df.loc[pd.isnull(df.enter)].apply(lambda x: Step(fill_missing(pd.Timestamp(x.enter),pd.Timestamp((df.leave.min()).date())),fill_missing(pd.Timestamp(x.leave),None)),axis=1)

In [None]:
type(df) == pd.DataFrame
#df.enter.dtypes ==np.dtype('datetime64[ns]')

In [None]:
print(f'enter date range: {df.enter.min()} to {df.enter.max()}')
print(f'leave date range {df.leave.min()} to {df.leave.max()}')


print(f'{(pd.Timestamp((df.leave.min()).date()))}')
dst = Step(df.iloc[12].enter,df.iloc[12].leave)

In [None]:
df.loc[pd.isnull(df.enter)].sort_values('leave')

In [None]:
#p = np.arange(2,13,0.01)
def mybase(x):
    return 0.5*(1+np.tanh(x/0.05))

def mybase2(x):
    return (0.5+(1/np.pi)*np.arctan(x/5))

mbase = np.frompyfunc(mybase2,1,1)
tbase = Basis(mbase)


p = np.arange(pd.Timestamp(2020,1,1), pd.Timestamp(2020,12,30), pd.Timedelta(minutes=1)).astype(pd.Timestamp)
#p = np.arange(pd.Timestamp(df.enter.max()), pd.Timestamp(df.leave.max()), pd.Timedelta(hours=1)).astype(pd.Timestamp)


#p = np.arange('2020-01-01', '20202-01-05', dtype='datetime64[h]')
p2 = [t.timestamp() for t in p]

fig,ax = plt.subplots(nrows=2,figsize=(20,16))

vsteps = Steps()
vsteps = vsteps.add(sst)
#vsteps.rebase(tbase)
#print(p)
#print(dst.step(p))
ax[0].step(p,vsteps.step_function(p))
#ax.set_xlim(p[0], p[-1])

# The hour locator takes the hour or sequence of hours you want to
# tick, not the base multiple

#ax.xaxis.set_major_locator(DayLocator())
#ax.xaxis.set_minor_locator(HourLocator(range(0, 25, 6)))
#ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))

#ax.fmt_xdata = DateFormatter('%Y-%m-%d %H:%M:%S')
#fig.autofmt_xdate()

queue = sc.Stairs(use_dates=True).layer(df.enter, df.leave)
queue.plot(ax[1])
#ax.step(p,sst[3].step(p))

In [None]:
tic = time.perf_counter()
def mybase(x):
    return 0.5*(1+np.tanh(x/2000))

def mybase2(x):
    return (0.5+(1/np.pi)*np.arctan(x/500))

def mybase3(x):
    return 1.0/(1.0+np.exp(-0.01*x))

mbase = np.frompyfunc(mybase,1,1)
tbase = Basis(mbase)


p = np.arange(pd.Timestamp(2020,1,1,11), pd.Timestamp(2020,1,5), pd.Timedelta(minutes=1)).astype(pd.Timestamp)

fig,ax = plt.subplots(nrows=3,figsize=(20,16))

vsteps = Steps()
vsteps = vsteps.add(sst)
vsteps = vsteps + 2
#vsteps.rebase(tbase)
ax[0].step(p,vsteps.step_function(p))


dfd = vsteps.direct_step_function()
dfd['cumsum'] = dfd.step.cumsum()
df2 = df.loc[df.leave < pd.Timestamp('2020-01-05')]
queue = sc.Stairs(use_dates=True).layer(df2.enter, df2.leave)
queue.plot(ax[1])

dfd.ts = dfd.ts.apply(pd.Timestamp)
dfdg = dfd.sort_values('ts').groupby('ts')['cumsum'].max()
dfdg.loc[dfdg.index < pd.Timestamp('2020-01-05')].plot(ax=ax[2],drawstyle="steps")

toc = time.perf_counter()
print(f"Generated in  {toc - tic:0.4f} seconds")

In [None]:
tst = vsteps.steps()
tst[9].start

In [None]:

generate_stuff()


In [None]:
from scipy.stats import norm

def mybase(x):
    return 0.5*(1+np.tanh(x/0.05))

def mybase2(x):
    return (0.5+(1/np.pi)*np.arctan(x/0.05))

mbase = np.frompyfunc(mybase2,1,1)
tbase = Basis(mbase)

x = Step(3.75)
y = Step(5.6,6)
y2 = Step(8.2,8.8)
y3 = Step(9.2,9.8)

z = Step(7.2)
q = Step(12)

p = Step(9,weight=-1)
p1 = Step(9.5,weight=-1)

ss1 = Steps()
ss1 = ss1.add([z,x,y])

ss2 = Steps()
ss2 = ss2.add([q,x,p])

ss3 = ss2 + ss1
ss4 = ss3+y2

ss5 = Steps().add([y2,y3])

yy = y3+y3-y2
ss5.rebase(tbase)
    
p = np.arange(2,13,0.01)

fig,ax = plt.subplots(nrows=6,figsize=(14,8))

ax[0].step(p,ss1.step_function(p))
ax[1].step(p,ss2.step_function(p))
ax[2].step(p,ss3.step_function(p))
ax[3].step(p,ss4.step_function(p))
ax[4].step(p,ss5.step_function(p))
ax[5].step(p,yy.step(p))


In [None]:
fig, ax = plt.subplots(figsize=(16,8))

plt.step(p,ss1.step_function(p),label = "s1")
plt.step(p,ss2.step_function(p),label = "s2", linestyle="--", linewidth=3)
plt.step(p,ss3.step_function(p),label = "s3", linestyle="-", linewidth=2)
ax.legend()
plt.show()


In [None]:
st = np.array([q,x,p])
st.sort()
fig, ax = plt.subplots(figsize=(16,8))

plt.step(p,ss1.step_function(p),label = "1")
plt.step(p,ss2.step_function(p),label = "s", linestyle="--", linewidth=3)
plt.step(p,ss3.step_function(p),label = "3", linestyle="-", linewidth=2)
ax.legend()
plt.show()

In [None]:
import numpy as np
k = [1,2,3]

kk = [k]*3
kk

In [None]:
x = Step(pd.Timestamp(2020,1,1,15,12))
y = Step(pd.Timestamp(2020,1,1,11,1))
z = Step(pd.Timestamp(2020,1,1,9,25))

na = np.array([x,y,z])
na

In [None]:
fna = na[np.where(na > pd.Timestamp(2020,1,1,10))]
fna

In [None]:
fig, ax = plt.subplots(figsize=(16,8))

p = np.arange(-2,3,0.1)
plt.step(p,np.heaviside(p-np.Inf,1))
