In [22]:
# Importing libraries
  
import numpy as np
import pandas as pd
from tslearn.utils import to_pyts_dataset
from tslearn.generators import random_walks
from tslearn.barycenters import softdtw_barycenter, euclidean_barycenter

In [5]:
class TimeSeries:
    def __init__(self, data: list):
        self.data = np.array(data)
    
    def __add__(self, other):
        if isinstance(other, TimeSeries):
            return TimeSeries(self.data + other.data)
        elif isinstance(other, int):
            return TimeSeries(self.data + other)
        elif isinstance(other, float):
            return TimeSeries(self.data + other)
        elif isinstance(other, np.ndarray):
            return TimeSeries(self.data + other)
        
    def __radd__(self, other):
        return self + other
        
    def __sub__(self, other):
        if isinstance(other, TimeSeries):
            return TimeSeries(self.data - other.data)
        elif isinstance(other, int):
            return TimeSeries(self.data - other)
        elif isinstance(other, float):
            return TimeSeries(self.data - other)
        elif isinstance(other, np.ndarray):
            return TimeSeries(self.data - other)
    
    def __rsub__(self, other):
        return -self + other
    
    def __mul__(self, other):
        if isinstance(other, TimeSeries):
            return TimeSeries(self.data * other.data)
        elif isinstance(other, int):
            return TimeSeries(self.data * other)
        elif isinstance(other, float):
            return TimeSeries(self.data * other)
        elif isinstance(other, np.ndarray):
            return TimeSeries(self.data * other)
    
    def __rmul__(self, other):
        return self * other
        
    def __truediv__(self, other):
        if isinstance(other, TimeSeries):
            return TimeSeries(self.data / other.data)
        elif isinstance(other, int):
            return TimeSeries(self.data / other)
        elif isinstance(other, float):
            return TimeSeries(self.data / other)
        elif isinstance(other, np.ndarray):
            return TimeSeries(self.data / other)
    
    def __floordiv__(self, other):
        if isinstance(other, TimeSeries):
            return TimeSeries(self.data // other.data)
        elif isinstance(other, int):
            return TimeSeries(self.data // other)
        elif isinstance(other, float):
            return TimeSeries(self.data // other)
        elif isinstance(other, np.ndarray):
            return TimeSeries(self.data // other)
        
    def __pow__(self, other):
        if isinstance(other, TimeSeries):
            return TimeSeries(self.data ** other.data)
        elif isinstance(other, int):
            return TimeSeries(self.data ** other)
        elif isinstance(other, float):
            return TimeSeries(self.data ** other)
        elif isinstance(other, np.ndarray):
            return TimeSeries(self.data ** other)
        
    def __neg__(self):
        return TimeSeries(-self.data)
    
    def __repr__(self):
        return str(self.data)
    
    def __len__(self):
        return len(self.data)
    
    def append(self, other):
        self.data = np.append(self.data, other)
        return self
    
    def extend(self, other):
        self.data = np.append(self.data, other.data)
        return self
    
    def size(self):
        return len(self.data)
    
    def get_data(self):
        return self.data

In [6]:
# calculateMahalanobis function to calculate
# the Mahalanobis distance
def calculateMahalanobis(y=None, data=None, cov=None):
  
    y_mu = y - np.mean(data)
    if not cov:
        cov = np.cov(data.values.T)
    inv_covmat = np.linalg.inv(cov)
    left = np.dot(y_mu, inv_covmat)
    mahal = np.dot(left, y_mu.T)
    return mahal.diagonal()

In [97]:
# data
# data = { 'Price': [100000, 800000, 650000, 700000, 
#                    860000, 730000, 400000, 870000,
#                    780000, 400000],
#          'Distance': [16000, 60000, 300000, 10000, 
#                       252000, 350000, 260000, 510000, 
#                       2000, 5000],
#          'Emission': [300, 400, 1230, 300, 400, 104,
#                       632, 221, 142, 267],
#          'Performance': [60, 88, 90, 87, 83, 81, 72, 
#                          91, 90, 93],
#          'Mileage': [76, 89, 89, 57, 79, 84, 78, 99, 
#                      97, 99]
#            }
x = np.random.randint(5, size=(4, 2, 3))
x

array([[[3, 0, 0],
        [4, 3, 4]],

       [[3, 3, 3],
        [2, 2, 1]],

       [[3, 1, 3],
        [2, 0, 2]],

       [[4, 2, 1],
        [4, 0, 4]]])

In [103]:
N = x.shape[0]
m1 = x - x.sum(0,keepdims=1)/N
cov = np.einsum('ijk,ljk->ilk',m1,m1) /(N - 1)
cov

array([[[ 0.35416667,  1.77083333,  1.54166667],
        [-0.3125    , -0.3125    , -1.45833333],
        [-0.3125    , -0.47916667, -1.04166667],
        [ 0.27083333, -0.97916667,  0.95833333]],

       [[-0.3125    , -0.3125    , -1.45833333],
        [ 0.35416667,  0.9375    ,  1.54166667],
        [ 0.35416667, -0.5625    ,  0.95833333],
        [-0.39583333, -0.0625    , -1.04166667]],

       [[-0.3125    , -0.47916667, -1.04166667],
        [ 0.35416667, -0.5625    ,  0.95833333],
        [ 0.35416667,  0.60416667,  0.70833333],
        [-0.39583333,  0.4375    , -0.625     ]],

       [[ 0.27083333, -0.97916667,  0.95833333],
        [-0.39583333, -0.0625    , -1.04166667],
        [-0.39583333,  0.4375    , -0.625     ],
        [ 0.52083333,  0.60416667,  0.70833333]]])

In [108]:
np.linalg.inv(cov, )

LinAlgError: Last 2 dimensions of the array must be square

In [110]:
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [109]:
inv_cov = np.linalg.tensorinv(cov, ind=2)

LinAlgError: Last 2 dimensions of the array must be square

In [83]:
data = np.random.randint(5, size=(4, 2))
data

array([[1, 0],
       [1, 4],
       [4, 0],
       [0, 0]])

In [84]:
np.cov(data)

array([[ 0.5, -1.5,  2. ,  0. ],
       [-1.5,  4.5, -6. ,  0. ],
       [ 2. , -6. ,  8. ,  0. ],
       [ 0. ,  0. ,  0. ,  0. ]])

In [57]:
np.transpose(data, (1, 0, 2))

array([[[1, 2, 3, 4],
        [3, 4, 5, 6],
        [5, 6, 7, 8]],

       [[2, 3, 4, 5],
        [4, 5, 6, 7],
        [6, 7, 8, 9]]])

In [43]:
x = data

In [44]:
N = x.shape[0]
N

10

In [45]:
m = x[0, :, :]
m

array([[-0.86557046, -0.61443859, -0.50824325,  0.29412327, -1.17867698],
       [ 0.39816941,  0.74053315,  0.33091491,  0.02902028,  1.16368194]])

In [51]:
np.mean(x, axis=0)

array([[ 0.13562241,  0.73423937,  0.8029737 ,  1.1198561 ,  0.88859473],
       [ 0.12659499,  0.23373692,  0.1436549 , -0.11167652, -0.25775   ]])

In [52]:
x.sum(axis=0, keepdims=True) / N

array([[[ 0.13562241,  0.73423937,  0.8029737 ,  1.1198561 ,
          0.88859473],
        [ 0.12659499,  0.23373692,  0.1436549 , -0.11167652,
         -0.25775   ]]])

In [21]:
def to_timeseries(data):
    new_data = []
    for i in range(len(data)):
        inner_data = []
        for j in range(len(data[i])):
            inner_data.append(TimeSeries(data[i][j]))
        new_data.append(inner_data)
    return new_data

def to_numpy(data):
    new_data = []
    for i in range(len(data)):
        inner_data = []
        for j in range(len(data[i])):
            inner_data.append(data[i][j].data)
        new_data.append(inner_data)
    return np.array(new_data)

In [32]:
new_data = to_timeseries(data)

In [33]:
np_new_data = np.array(new_data)

In [39]:
np.mean(np_new_data, axis=0)

array([[0.13562241 0.73423937 0.8029737  1.1198561  0.88859473],
       [ 0.12659499  0.23373692  0.1436549  -0.11167652 -0.25775   ]],
      dtype=object)

In [42]:
np.cov(data, rowvar=False)

ValueError: m has more than 2 dimensions

In [34]:
y = data[0]

In [35]:
y

array([[ 0.98000723,  1.12008055,  2.44036502,  4.82350001,  3.68844372],
       [-1.07306635, -0.45691828, -0.30805465,  1.80594941,  2.26949636]])

In [36]:
data = data[1:]

In [37]:
y_mu = y - np.mean(data)

In [40]:
cov = np.cov(data.T)

ValueError: m has more than 2 dimensions

In [43]:
x = np.array([[0, 2], [1, 1], [2, 0]])
x

array([[0, 2],
       [1, 1],
       [2, 0]])

In [44]:
np.cov(x)

array([[ 2.,  0., -2.],
       [ 0.,  0.,  0.],
       [-2.,  0.,  2.]])

In [20]:
np.mean(to_pyts_dataset(data), axis=0)

array([[ 0.23269052,  0.15226351,  0.35418025,  0.05216147,  0.06028867],
       [-0.23496056, -0.32649221, -0.43882192, -0.64713867, -0.62011391]])

In [17]:
np.mean(data, axis=0)

array([[ 0.23269052, -0.23496056],
       [ 0.15226351, -0.32649221],
       [ 0.35418025, -0.43882192],
       [ 0.05216147, -0.64713867],
       [ 0.06028867, -0.62011391]])

In [18]:
softdtw_barycenter(data)

array([[ 0.23040567, -0.23550974],
       [ 0.22046953, -0.19495791],
       [ 0.14550385, -0.16611313],
       [ 0.18379636,  0.38666469],
       [ 0.17890966, -1.36882761]])

In [19]:
euclidean_barycenter(data)

array([[ 0.23269052, -0.23496056],
       [ 0.15226351, -0.32649221],
       [ 0.35418025, -0.43882192],
       [ 0.05216147, -0.64713867],
       [ 0.06028867, -0.62011391]])

In [None]:
# Creating a new column in the dataframe that holds
# the Mahalanobis distance for each row
df['calculateMahalanobis'] = calculateMahalanobis(y=df, data=df[[
  'Price', 'Distance', 'Emission','Performance', 'Mileage']])
  
# Display the dataframe
print(df)