# MAT-model: Model Classes for Multiple Aspect Trajectory Data Mining \[MAT-Tools Framework\]

Sample Code in python notebook to use `mat-model` as a python library.

The present package offers a tool, to support the user in the task of modeling multiple aspect trajectories. It integrates into a unique framework for multiple aspects trajectories and in general for multidimensional sequence data mining methods.

Created on Apr, 2024
Copyright (C) 2023, License GPL Version 3 or superior (see LICENSE file)

In [36]:
#!pip install mat-model
!pip install --upgrade mat-model

Collecting mat-model
  Downloading mat_model-0.1b5-py3-none-any.whl.metadata (5.0 kB)
Downloading mat_model-0.1b5-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: mat-model
  Attempting uninstall: mat-model
    Found existing installation: mat-model 0.1b4
    Uninstalling mat-model-0.1b4:
      Successfully uninstalled mat-model-0.1b4
Successfully installed mat-model-0.1b5


In [1]:
from matdata.dataset import *
ds = 'mat.FoursquareNYC'
df = load_ds(ds, sample_size=0.25)
df

Loading dataset file: https://github.com/mat-analysis/datasets/tree/main/mat/FoursquareNYC/


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1055k  100 1055k    0     0  3873k      0 --:--:-- --:--:-- --:--:-- 3938k


Stratification (class-balanced):   0%|          | 0/193 [00:00<?, ?it/s]

Sorting data:   0%|          | 0/193 [00:00<?, ?it/s]

Unnamed: 0,space,time,day,poi,type,root_type,rating,weather,tid,label
0,40.8340978041072 -73.9452672225881,788,Monday,Galaxy Gourmet Deli,Deli / Bodega,Food,8.2,Clouds,127,6
1,40.5671960000000 -73.8825760000000,1175,Monday,MTA Bus - Beach 169 St & Rockaway Point Bl (Q2...,Bus Stop,Travel & Transport,-1.0,Clouds,127,6
2,40.6899127194574 -73.9815044403076,1381,Monday,MTA Subway - DeKalb Ave (B/Q/R),Metro Station,Travel & Transport,-1.0,Clouds,127,6
3,40.7085883614824 -73.9910316467285,1404,Monday,MTA Subway - Manhattan Bridge (B/D/N/Q),Train,Travel & Transport,-1.0,Clouds,127,6
4,40.8331652006224 -73.9418603427692,845,Tuesday,The Grinnell,Home (private),Residence,-1.0,Clear,127,6
...,...,...,...,...,...,...,...,...,...,...
17,40.7047332789043 -73.9877378940582,939,Thursday,Miami Ad School Brooklyn,General College & University,College & University,-1.0,Clear,29559,1070
18,40.6978026652822 -73.9941451630314,483,Friday,Eastern Athletic Club,Gym,Outdoors & Recreation,6.9,Clear,29559,1070
19,40.6946728967503 -73.9940820360805,794,Friday,Starbucks,Coffee Shop,Food,7.0,Clear,29559,1070
20,40.7023694709909 -73.9875124790989,1261,Friday,Superfine,American Restaurant,Food,7.6,Clear,29559,1070


#### Trajectory Objects

Alternatively, you can convert the dataframe into Trajectory objects:

In [2]:
from matmodel.util.parsers import df2trajectory

T, data_desc = df2trajectory(df)

Converting Trajectories:   0%|          | 0/694 [00:00<?, ?it/s]

In [4]:
traj = T[1]
traj.display()

𝘛𐄁135 	𝘱1⟨(40.690 -73.982), 2024-01-01 02:25:00, Monday, NYCT Transit Survey Unit, Office, Professional & Other Places, -1.0, Clouds⟩↴
	𝘱2⟨(40.709 -73.991), 2024-01-01 03:21:00, Monday, MTA Subway - Manhattan Bridge (B/D/N/Q), Train, Travel & Transport, -1.0, Clouds⟩↴
	𝘱3⟨(40.828 -73.926), 2024-01-01 23:02:00, Monday, MTA Subway - 161st St/Yankee Stadium (4/B/D), Metro Station, Travel & Transport, -1.0, Clouds⟩↴
	𝘱4⟨(40.709 -73.991), 2024-01-01 01:40:00, Tuesday, MTA Subway - Manhattan Bridge (B/D/N/Q), Train, Travel & Transport, -1.0, Clouds⟩↴
	𝘱5⟨(40.690 -73.982), 2024-01-01 02:25:00, Tuesday, NYCT Transit Survey Unit, Office, Professional & Other Places, -1.0, Rain⟩↴
	𝘱6⟨(40.759 -73.988), 2024-01-01 04:07:00, Tuesday, MTA Bus - 8 Av & W 46 St (M20/M104), Bus Stop, Travel & Transport, -1.0, Rain⟩↴
	𝘱7⟨(40.653 -74.002), 2024-01-01 05:07:00, Wednesday, MTA Regional Bus Depot - Jackie Gleason, Bus Station, Travel & Transport, -1.0, Clouds⟩↴
	𝘱8⟨(40.638 -73.979), 2024-01-01 05:53:00, Wed

In [5]:
data_desc.attributes

[1. space (space2d),
 2. time (datetime),
 3. day (nominal),
 4. poi (nominal),
 5. type (nominal),
 6. root_type (nominal),
 7. rating (numeric),
 8. weather (nominal)]

In [8]:
traj.data_desc.attributes # data_desc is referenced in each trajectory internally

[1. space (space2d),
 2. time (datetime),
 3. day (nominal),
 4. poi (nominal),
 5. type (nominal),
 6. root_type (nominal),
 7. rating (numeric),
 8. weather (nominal)]

In [9]:
# Special desctriptors for trajectory:
print(data_desc.idDesc)
print(data_desc.labelDesc)

9. tid (numeric)
10. label (nominal)


In [10]:
traj.points[0]

𝘱1⟨(40.690 -73.982), 2024-01-01 02:25:00, Monday, NYCT Transit Survey Unit, Office, Professional & Other Places, -1.0, Clouds⟩

In [11]:
# Values
traj.points[0].aspects[2], traj.points[0].aspects[3]

(Monday, NYCT Transit Survey Unit)

In [12]:
# name attr, and value
data_desc.attributes[3].text, traj.points[0].aspects[3]

('poi', NYCT Transit Survey Unit)

In [13]:
a = traj.points[0].aspects[0]
b = traj.points[0].aspects[2]
from matmodel.base import Space2D

isinstance(a, Space2D), isinstance(b, Space2D), type(a), type(b)

(True, False, matmodel.base.Aspect.Space2D, matmodel.base.Aspect.Categoric)

In [14]:
a.value, type(a.value), b.value, type(b.value)

((40.6900872257332, -73.9817776229191), tuple, 'Monday', str)

In [15]:
a1 = data_desc.attributes[0]
a8 = data_desc.attributes[7]
print(a1.order, a1.text, a1.dtype, sep=' -- ')

print('Comparator 1:', a1.comparator)
print('Comparator 8:', a8.comparator)

1 -- space -- space2d
Comparator 1: <matmodel.comparator.distance.EuclideanDistance object at 0x1445784f0>
Comparator 8: <matmodel.comparator.distance.EqualsDistance object at 0x1445789d0>


In [16]:
for attr in data_desc.attributes:
    print(attr, attr.comparator)

1. space (space2d) <matmodel.comparator.distance.EuclideanDistance object at 0x1445784f0>
2. time (datetime) <matmodel.comparator.distance.DatetimeDistance object at 0x1445785b0>
3. day (nominal) <matmodel.comparator.distance.EqualsDistance object at 0x144578610>
4. poi (nominal) <matmodel.comparator.distance.EqualsDistance object at 0x1445786d0>
5. type (nominal) <matmodel.comparator.distance.EqualsDistance object at 0x144578790>
6. root_type (nominal) <matmodel.comparator.distance.EqualsDistance object at 0x144578850>
7. rating (numeric) <matmodel.comparator.distance.AbsoluteDistance object at 0x1445788b0>
8. weather (nominal) <matmodel.comparator.distance.EqualsDistance object at 0x1445789d0>


In [17]:
type(traj.points[0].aspects[7])

matmodel.base.Aspect.Categoric

In [18]:
# Distância espacial:
a1.comparator.distance(traj.points[0].aspects[0], traj.points[1].aspects[0])

0.02068644437005193

In [19]:
# Calcular a distancia do p1 com p2, no atributo Weather (São iguais)
a8.comparator.distance(traj.points[0].aspects[7], traj.points[1].aspects[7])

0

In [20]:
# Calcular a distancia do p1 com p6, no atributo 1 (São diferentes)
a8.comparator.distance(traj.points[0].aspects[7], traj.points[5].aspects[7])

1

In [21]:
# Exemplos de valores de distância:
d1 = 2
d2 = 10

# isso era uma função que o Andres usava para aumentar a diferença proporcionalmente quanto maior fosse a distancia,
# vai até o max_value do comparador (se for setado)
a1.comparator.enhance(d1), a1.comparator.enhance(d2)

(4, 100)

In [22]:
# Exemplos de valores de distância:
d1 = 25
d2 = 75

# Se tiver valores de distância que quiser normalizar de 0 a 1, dá pra atribuir o maior valor de distância possível:
a1.comparator.max_value = 100
a1.comparator.normalize(d1), a1.comparator.normalize(d2)

(0.25, 0.75)

In [23]:
help(a1.comparator.distance)

Help on method distance in module matmodel.comparator.distance:

distance(asp1, asp2) method of matmodel.comparator.distance.EuclideanDistance instance
    Calculates the Euclidean distance (works for points of 2D, 3D, and more).
    
    Arguments:
    asp1 (Space2D, Space3D) - value 1 to compare
    asp2 (Space2D, Space3D) - value 2 to compare
    
    Return:
    distance - distance value.



In [24]:
# Eu posso criar outros comparadores, ou trocar:
from matmodel.comparator import LcsDistance, EditlcsDistance

a1.comparator = LcsDistance()
print(traj.points[0].aspects[2], traj.points[2].aspects[2], a1.comparator.distance(traj.points[0].aspects[2], traj.points[2].aspects[2]))
print(traj.points[0].aspects[2], traj.points[5].aspects[2], a1.comparator.distance(traj.points[0].aspects[2], traj.points[5].aspects[2]))

a1.comparator = EditlcsDistance()
print(traj.points[0].aspects[2], traj.points[2].aspects[2], a1.comparator.distance(traj.points[0].aspects[2], traj.points[2].aspects[2]))
print(traj.points[0].aspects[2], traj.points[5].aspects[2], a1.comparator.distance(traj.points[0].aspects[2], traj.points[5].aspects[2]))

Monday Monday 0
Monday Tuesday 4
Monday Monday 0
Monday Tuesday 7


---

In [27]:
from matmodel.base import DateTime, Interval

v1 = DateTime('60')
v2 = DateTime('150')
v3 = DateTime('1430')

i1 = Interval('70', '120') # Todos fora desse intervalo
i2 = Interval('90', '180') # Dentro: v2, fora: v1 e v3
i3 = Interval('1380', '1430') # v3 == end

v1, v2, v3, i1, i2, i3

(2024-01-01 01:00:00,
 2024-01-01 02:30:00,
 2024-01-01 23:50:00,
 [2024-01-01 01:10:00 𛲔𛲔 2024-01-01 02:00:00],
 [2024-01-01 01:30:00 𛲔𛲔 2024-01-01 03:00:00],
 [2024-01-01 23:00:00 𛲔𛲔 2024-01-01 23:50:00])

In [28]:
type(v1.value), v1.get('m'), v2.get('m'), v3.get('m')

(datetime.datetime, 60, 150, 1430)

In [29]:
from matmodel.comparator import *

tD = TimeDistance() # Esse compara sempre em relação ao horário do dia (só considera o tempo)
dD = DatetimeDistance(units='m') # esse comprara sempre da maior para o menor
iD = InintervalDistance(units='m') # Esse compara datas e intervalos

tD.max_value, iD.max_value, dD.max_value

(1439, None, None)

In [30]:
print(tD.distance(v1, v2), tD.distance(v2, v1))
print(tD.distance(v1, v3), tD.distance(v3, v1)) # A diferença é sempre a menor possível

print(dD.distance(v1, v2), dD.distance(v2, v1))
print(dD.distance(v1, v3), dD.distance(v3, v1)) # A diferença é da maior para menor

90 90
70 70
90.0 90.0
1370.0 1370.0


In [31]:
print(iD.distance(v1, v2), iD.distance(v2, v1))
print(iD.match(v1, v2), iD.match(v2, v1)) # See if equals

v4 = DateTime('60') # Equals v1
print(iD.match(v1, v4), iD.match(v4, v1))

print(iD.match(v1, v2, 60*3), iD.match(v2, v1, 60*3)) # See if match in the threshold of 3h

90.0 90.0
False False
True True
True True


In [32]:
i1.start, i1.end

(datetime.datetime(2024, 1, 1, 1, 10), datetime.datetime(2024, 1, 1, 2, 0))

In [33]:
# distance is a match 0 or 1
print(iD.distance(v1, i1), iD.distance(i1, v1))
print(iD.distance(v2, i1), iD.distance(i1, v2))
print(iD.distance(v3, i1), iD.distance(i1, v3))
print()
print(iD.distance(v1, i2), iD.distance(i2, v1))
print(iD.distance(v2, i2), iD.distance(i2, v2)) # v2 inside
print(iD.distance(v3, i2), iD.distance(i2, v3))
print()
print(iD.distance(v1, i3), iD.distance(i3, v1))
print(iD.distance(v2, i3), iD.distance(i3, v2))
print(iD.distance(v3, i3), iD.distance(i3, v3)) # v3 inside

1 1
1 1
1 1

1 1
0 0
1 1

1 1
1 1
0 0


In [34]:
# match is bool
print(iD.match(v1, i1), iD.match(i1, v1))
print(iD.match(v2, i1), iD.match(i1, v2))
print(iD.match(v3, i1), iD.match(i1, v3))
print()
print(iD.match(v1, i2), iD.match(i2, v1))
print(iD.match(v2, i2), iD.match(i2, v2))
print(iD.match(v3, i2), iD.match(i2, v3))
print()
print(iD.match(v1, i3), iD.match(i3, v1))
print(iD.match(v2, i3), iD.match(i3, v2))
print(iD.match(v3, i3), iD.match(i3, v3))

False False
False False
False False

False False
True True
False False

False False
False False
True True


---

In [37]:
# For manually setting a weight for each attribute (can be configured in the descriptor JSON)
for attr in data_desc.attributes:
    attr.weight = 1.0

In [40]:
for attr in data_desc.attributes:
    print(attr, '\t>> W:', attr.weight)

1. space (space2d) 	>> W: 1.0
2. time (time) 	>> W: 1.0
3. day (nominal) 	>> W: 1.0
4. poi (nominal) 	>> W: 1.0
5. type (nominal) 	>> W: 1.0
6. root_type (nominal) 	>> W: 1.0
7. rating (numeric) 	>> W: 1.0
8. weather (nominal) 	>> W: 1.0


---

In [5]:
# We can configure how the dataset attributes are going to be instantiated, each with different distance functions (comparators):
from matmodel.util.parsers import df2trajectory
T, data_desc = df2trajectory(df, data_desc='../datasets/mat/FoursquareNYC/FoursquareNYC.json')

Converting Trajectories:   0%|          | 0/694 [00:00<?, ?it/s]

In [14]:
for attr in data_desc.attributes:
    print(attr.comparator, '>>', attr)

<matmodel.comparator.distance.EuclideanDistance object at 0x14d67fd30> >> 1. space (space2d)
<matmodel.comparator.distance.TimeDistance object at 0x14d67fd00> >> 2. time (time)
<matmodel.comparator.distance.EqualsDistance object at 0x14d67fca0> >> 3. day (nominal)
<matmodel.comparator.distance.EqualsDistance object at 0x14d67fc40> >> 4. poi (nominal)
<matmodel.comparator.distance.EqualsDistance object at 0x14d67fe20> >> 5. type (nominal)
<matmodel.comparator.distance.EqualsDistance object at 0x14d67fd90> >> 6. root_type (nominal)
<matmodel.comparator.distance.AbsoluteDistance object at 0x14d67fd60> >> 7. rating (numeric)
<matmodel.comparator.distance.EqualsDistance object at 0x14d67f9a0> >> 8. weather (nominal)


In [7]:
# Also, we can configure dependency groups, given any name to a subset of related attributes (in JSON descriptor file):
data_desc.dependencies

{'poi': [1. space (space2d),
  4. poi (nominal),
  5. type (nominal),
  6. root_type (nominal),
  7. rating (numeric)]}

In [13]:
for attr in data_desc.attributes:
    print(attr.dependency_group, '>>', attr)

poi >> 1. space (space2d)
None >> 2. time (time)
None >> 3. day (nominal)
poi >> 4. poi (nominal)
poi >> 5. type (nominal)
poi >> 6. root_type (nominal)
poi >> 7. rating (numeric)
None >> 8. weather (nominal)


In [3]:
# The FeatureDescriptor class can instantiate any instance by a JSON object:
from matmodel.descriptor import FeatureDescriptor

desc = {
    "order": 7,
    "type": "numeric",
    "text": "rating",
    "dependency": "poi",
    "weight": 0.5,
    "comparator": {
        "distance": "diffnotneg",
        "maxValue": 5.0,
        "param1": 'something x',
        "param2": 'something y',
    }
}

ft = FeatureDescriptor.instantiate(desc)
ft

7. rating (numeric)

In [5]:
ft, ft.dependency_group, ft.weight, ft.comparator, ft.comparator.param1, ft.comparator.param2

(7. rating (numeric),
 'poi',
 0.5,
 <matmodel.comparator.distance.AbsoluteDistance at 0x1075d7c70>,
 'something x',
 'something y')

# --

In [None]:
sel_attributes = ['poi', 'day', 'category', 'weather']
attributes = [{'order': 1, 'type': 'space2d', 'text': 'lat_lon', 'comparator': {'distance': 'euclidean'}}, {'order': 2, 'type': 'nominal', 'text': 'day', 'comparator': {'distance': 'equals'}},{'order': 1, 'type': 'space2d', 'text': 'lat_lon', 'comparator': {'distance': 'euclidean'}}, {'order': 2, 'type': 'nominal', 'text': 'day', 'comparator': {'distance': 'equals'}}]

dict(map(lambda item: (item['text'], item), attributes))

In [None]:
from matmodel.method import MethodWrapper

MethodWrapper.providedMethods()

In [None]:
from matmodel.method.MethodWrapper import Param

Param.TYPE_TEXT

\# By Tarlis Portela (2023)