In [None]:
!pip install PM4Py

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting PM4Py
  Downloading pm4py-2.7.3-py3-none-any.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m22.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting deprecation
  Downloading deprecation-2.1.0-py2.py3-none-any.whl (11 kB)
Collecting stringdist
  Downloading StringDist-1.0.9.tar.gz (7.4 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting intervaltree
  Downloading intervaltree-3.1.0.tar.gz (32 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: intervaltree, stringdist
  Building wheel for intervaltree (setup.py) ... [?25l[?25hdone
  Created wheel for intervaltree: filename=intervaltree-3.1.0-py2.py3-none-any.whl size=26114 sha256=af8ca24f1e925373cf364736c6b9df5fc06294e0140e22bab68657eef1a31b97
  Stored in directory: /root/.cache/pip/wheels/ab/fa/1b/75d9a713279796785711bd0

In [None]:
import pm4py
import pandas as pd
from PIL import Image
import numpy as np
from abc import ABC, abstractmethod

In [None]:
class Converter(ABC):
  
  def __init__(self, log_file_path, ch_sep, caseID_col_name, act_col_name, ts_col_name):
        # Динамические поля (переменные объекта)
        self.caseID_col_name = caseID_col_name 
        self.act_col_name = act_col_name
        self.ts_col_name = ts_col_name

        #self.event_log = pd.read_csv(log_file_path, sep=ch_sep)
        self.event_log = pm4py.read_xes(log_file_path)
        self.event_log[caseID_col_name]=self.event_log[caseID_col_name].apply(str)
        self.event_log[act_col_name]=self.event_log[act_col_name].apply(str)
        self.event_log[ts_col_name]= pd.to_datetime(self.event_log[ts_col_name])

        self.event_log = self.event_log[[act_col_name, caseID_col_name, ts_col_name]]

  @abstractmethod
  def convert(self):
    pass


In [None]:
class ActivityConverter(Converter):

  def __get_unique_ids__(self):
    ids = list(self.event_log[self.caseID_col_name].unique())
    return ids

  def __get_case_logs__(self, ids_list):
    case_logs = []
    for id in ids_list:
      case_log = self.event_log.query("`{0}` == @id".format(self.caseID_col_name))
      case_log.sort_values(by=[self.ts_col_name])
      case_logs.append(case_log)
    return case_logs

  def __get_prefix_traces__(self):
    cases_prefix_traces = []
    self.max_len_prefix_trace = 0

    for case_log in self.case_logs:
      prefix_traces_act = []
      for i in range(1, len(case_log)+1):
        prifix_trace = case_log[self.act_col_name].values[0:i]

        #находим самую длинную префиксную трассу
        if (len(prifix_trace) > self.max_len_prefix_trace):
          self.max_len_prefix_trace = len(prifix_trace)

        prefix_traces_act.append(prifix_trace)

      cases_prefix_traces.append(prefix_traces_act)
    return cases_prefix_traces

  def __get_activity_matrices__(self, act_num, act_dict):
    activity_matrices = []
    for prefix_traces in self.cases_prefix_traces:
      np_matrix = []
      matrix = [ [0]*act_num for i in range(self.max_len_prefix_trace)]
      for i in range(len(prefix_traces)):
        for act in prefix_traces[i]:
          act_index = act_dict[act]
          matrix[i][act_index] += 1
          np_matrix = np.asmatrix(matrix)
          np_matrix = np_matrix.astype("uint8")
      activity_matrices.append(np_matrix)
    return activity_matrices


  def convert(self, path_to_dir):
    ids_list = self.__get_unique_ids__()
    self.case_logs = self.__get_case_logs__(ids_list)
    self.cases_prefix_traces = self.__get_prefix_traces__()

    # получаем уникальные активити
    activities = list(self.event_log[self.act_col_name].unique())
    act_dict = {}

    # проводим типо изоморфизм между номером и активити
    act_num = len(activities)
    for i in range(act_num):
      act_dict[activities[i]] = i

    self.activity_matrices = self.__get_activity_matrices__(act_num, act_dict)
  
    index = 1
    for np_matrix in self.activity_matrices:
      norm_matrix = np_matrix.copy()
      norm_matrix *= 255.0/norm_matrix.max()
      A = np.squeeze(np.asarray(norm_matrix)) 
      img = Image.fromarray(A)

      img = img.resize((224, 224), Image.NEAREST)
      path = path_to_dir + "/image_" + str(index) + ".png"
      index+=1
      img.save(path)      
     

In [None]:
act_conv = ActivityConverter('/content/drive/MyDrive/CourseProject/Test_3/Existence_Activity_universal.xes', ',', "concept:instance", "concept:name", "time:timestamp")
#act_conv = ActivityConverter('/content/drive/MyDrive/CourseProject/Test_3/Response_Never_Log.xes', ',', "concept:instance", "concept:name", "time:timestamp")

act_conv.convert('/content/drive/MyDrive/CourseProject/Test_3/Images_Existence_Activity_universal')
#act_conv.convert('/content/drive/MyDrive/CourseProject/Test_3/Images_never')

parsing log, completed traces ::   0%|          | 0/100 [00:00<?, ?it/s]