In [1]:
# Importing libraries
  
import numpy as np
import pandas as pd
from tslearn.utils import to_pyts_dataset
from tslearn.generators import random_walks
from tslearn.barycenters import softdtw_barycenter, euclidean_barycenter

In [11]:
class TimeSeries:
    def __init__(self, data: list):
        self.data = np.array(data)
    
    def __add__(self, other):
        if isinstance(other, TimeSeries):
            return TimeSeries(self.data + other.data)
        elif isinstance(other, int):
            return TimeSeries(self.data + other)
        elif isinstance(other, float):
            return TimeSeries(self.data + other)
        elif isinstance(other, np.ndarray):
            return TimeSeries(self.data + other)
        
    def __radd__(self, other):
        return self + other
        
    def __sub__(self, other):
        if isinstance(other, TimeSeries):
            return TimeSeries(self.data - other.data)
        elif isinstance(other, int):
            return TimeSeries(self.data - other)
        elif isinstance(other, float):
            return TimeSeries(self.data - other)
        elif isinstance(other, np.ndarray):
            return TimeSeries(self.data - other)
    
    def __rsub__(self, other):
        return -self + other
    
    def __mul__(self, other):
        if isinstance(other, TimeSeries):
            return TimeSeries(self.data * other.data)
        elif isinstance(other, int):
            return TimeSeries(self.data * other)
        elif isinstance(other, float):
            return TimeSeries(self.data * other)
        elif isinstance(other, np.ndarray):
            return TimeSeries(self.data * other)
    
    def __rmul__(self, other):
        return self * other
        
    def __truediv__(self, other):
        if isinstance(other, TimeSeries):
            return TimeSeries(self.data / other.data)
        elif isinstance(other, int):
            return TimeSeries(self.data / other)
        elif isinstance(other, float):
            return TimeSeries(self.data / other)
        elif isinstance(other, np.ndarray):
            return TimeSeries(self.data / other)
    
    def __floordiv__(self, other):
        if isinstance(other, TimeSeries):
            return TimeSeries(self.data // other.data)
        elif isinstance(other, int):
            return TimeSeries(self.data // other)
        elif isinstance(other, float):
            return TimeSeries(self.data // other)
        elif isinstance(other, np.ndarray):
            return TimeSeries(self.data // other)
        
    def __pow__(self, other):
        if isinstance(other, TimeSeries):
            return TimeSeries(self.data ** other.data)
        elif isinstance(other, int):
            return TimeSeries(self.data ** other)
        elif isinstance(other, float):
            return TimeSeries(self.data ** other)
        elif isinstance(other, np.ndarray):
            return TimeSeries(self.data ** other)
    
    def __neg__(self):
        return TimeSeries(-self.data)
    
    def __repr__(self):
        return str(self.data)
    
    def __len__(self):
        return len(self.data)
    
    def append(self, other):
        self.data = np.append(self.data, other)
        return self
    
    def extend(self, other):
        self.data = np.append(self.data, other.data)
        return self
    
    def size(self):
        return len(self.data)
    
    def get_data(self):
        return self.data

In [2]:
# calculateMahalanobis function to calculate
# the Mahalanobis distance
def calculateMahalanobis(y=None, data=None, cov=None):
  
    y_mu = y - np.mean(data)
    if not cov:
        cov = np.cov(data.values.T)
    inv_covmat = np.linalg.inv(cov)
    left = np.dot(y_mu, inv_covmat)
    mahal = np.dot(left, y_mu.T)
    return mahal.diagonal()

In [3]:
# data
# data = { 'Price': [100000, 800000, 650000, 700000, 
#                    860000, 730000, 400000, 870000,
#                    780000, 400000],
#          'Distance': [16000, 60000, 300000, 10000, 
#                       252000, 350000, 260000, 510000, 
#                       2000, 5000],
#          'Emission': [300, 400, 1230, 300, 400, 104,
#                       632, 221, 142, 267],
#          'Performance': [60, 88, 90, 87, 83, 81, 72, 
#                          91, 90, 93],
#          'Mileage': [76, 89, 89, 57, 79, 84, 78, 99, 
#                      97, 99]
#            }
data = random_walks(n_ts=10, sz=5, d=2)
# data = to_pyts_dataset(data)

In [4]:
data = to_pyts_dataset(data)

In [5]:
data

array([[[ 0.89924132,  1.994074  ,  3.70670861,  3.77934299,
          4.68051678],
        [ 0.63136167,  0.42404455,  0.35053128, -0.85049521,
         -2.11707843]],

       [[-0.91977193, -0.60523712,  0.12669143, -0.3888253 ,
         -0.30128132],
        [-0.2002337 , -0.45376124,  0.44585905,  1.33578257,
         -0.08594881]],

       [[ 0.7248871 ,  0.0948117 ,  0.7108839 ,  1.35920393,
          0.72930517],
        [ 1.10436435, -0.39528485,  0.04040551,  0.42102396,
          2.22465443]],

       [[ 0.78985966,  0.87227635, -0.43082329, -0.44682516,
          0.63595676],
        [-0.87226968, -1.5280429 , -2.71588214, -3.75125961,
         -3.581445  ]],

       [[-0.53345279, -1.34014267, -1.30905531, -0.36118079,
         -1.62068587],
        [-1.33063291, -1.63006093, -2.12918997, -2.89937914,
         -3.06136541]],

       [[ 0.21148865, -0.6313883 , -1.29220963, -2.39147498,
         -3.81051265],
        [-1.17648128, -3.01463631, -4.91202308, -5.97355382,
     

In [6]:
y = data[0]

In [7]:
y

array([[ 0.89924132,  1.994074  ,  3.70670861,  3.77934299,  4.68051678],
       [ 0.63136167,  0.42404455,  0.35053128, -0.85049521, -2.11707843]])

In [8]:
data = data[1:]

In [9]:
y_mu = y - np.mean(data)

In [10]:
cov = np.cov(data.T)

ValueError: m has more than 2 dimensions

In [20]:
np.mean(to_pyts_dataset(data), axis=0)

array([[ 0.23269052,  0.15226351,  0.35418025,  0.05216147,  0.06028867],
       [-0.23496056, -0.32649221, -0.43882192, -0.64713867, -0.62011391]])

In [17]:
np.mean(data, axis=0)

array([[ 0.23269052, -0.23496056],
       [ 0.15226351, -0.32649221],
       [ 0.35418025, -0.43882192],
       [ 0.05216147, -0.64713867],
       [ 0.06028867, -0.62011391]])

In [18]:
softdtw_barycenter(data)

array([[ 0.23040567, -0.23550974],
       [ 0.22046953, -0.19495791],
       [ 0.14550385, -0.16611313],
       [ 0.18379636,  0.38666469],
       [ 0.17890966, -1.36882761]])

In [19]:
euclidean_barycenter(data)

array([[ 0.23269052, -0.23496056],
       [ 0.15226351, -0.32649221],
       [ 0.35418025, -0.43882192],
       [ 0.05216147, -0.64713867],
       [ 0.06028867, -0.62011391]])

In [None]:
# Creating a new column in the dataframe that holds
# the Mahalanobis distance for each row
df['calculateMahalanobis'] = calculateMahalanobis(y=df, data=df[[
  'Price', 'Distance', 'Emission','Performance', 'Mileage']])
  
# Display the dataframe
print(df)