In [11]:
import cv2
import math
import numpy as np
import os
import pm4py
import pandas as pd
from PIL import Image
import numpy as np
import glob

In [19]:
class PatternsConverter():
    
    act_dict = None
    
    @classmethod
    def set_activities(cls, activities):
        if cls.act_dict == None:
            activities = sorted(activities)
            # проводим соответствие между номером и активити
            act_num = len(activities)
            cls.act_dict = {}
            for i in range(act_num):
                cls.act_dict[activities[i]] = i
                
    
    def __init__(self, log_file_path, caseID_col_name, act_col_name, ts_col_name):
        self.log_file_path = log_file_path
        # Динамические поля (переменные объекта)
        self.caseID_col_name = caseID_col_name 
        self.act_col_name = act_col_name
        self.ts_col_name = ts_col_name
        
        #self.event_log = pd.read_csv(log_file_path, sep=ch_sep)
        self.event_log = pm4py.read_xes(log_file_path)
        self.event_log[caseID_col_name]=self.event_log[caseID_col_name].apply(str)
        self.event_log[act_col_name]=self.event_log[act_col_name].apply(str)
        self.traces_num = 0
        self.event_log[ts_col_name]= pd.to_datetime(self.event_log[ts_col_name], utc=True, dayfirst=True)

        self.event_log = self.event_log[[act_col_name, caseID_col_name, ts_col_name]]
    
    def __get_unique_ids(self):
        ids = list(self.event_log[self.caseID_col_name].unique())
        self.traces_num = len(ids)
        return ids

    def __get_case_logs(self, ids_list):
        case_logs = []
        for id in ids_list:
            case_log = self.event_log.query("`{0}` == @id".format(self.caseID_col_name))
            case_log.sort_values(by=[self.ts_col_name])
            if len(case_log) > 2:
                case_logs.append(case_log)
            # for i in range(len(PatternsConverter.act_dict)):
            #     if activities[i] in case_log[self.act_col_name].values:
            #         self.act_freq[i] += 1
        # for i in range(act_num):
        #     if self.act_freq[i] != 0:
        #         self.act_freq[i] = math.log2(len(case_logs)/self.act_freq[i])
        return case_logs

    def __get_prefix_traces(self):
        cases_prefix_traces = []
        self.max_len_prefix_trace = 0

        for case_log in self.case_logs:
            prefix_traces_act = []
            for i in range(1, len(case_log) + 1):
                prefix_trace = []
                
                if "pattern_between_after_before" not in self.log_file_path:
                    prefix_trace = case_log[self.act_col_name].values[0:i]
                else:
                    prefix_trace = case_log[0:i]
                   
                # находим самую длинную префиксную трассу
                if len(prefix_trace) > self.max_len_prefix_trace:
                    self.max_len_prefix_trace = len(prefix_trace)
                
                
                prefix_traces_act.append(prefix_trace)
                

            cases_prefix_traces.append(prefix_traces_act)
            #print(prefix_traces_act)
        return cases_prefix_traces

    def __get_activity_matrices(self):
        activity_matrices = []
        for prefix_traces in self.cases_prefix_traces:
            np_matrix = []
            matrix = [[0] * len(PatternsConverter.act_dict) for i in range(self.max_len_prefix_trace)]
            for i in range(len(prefix_traces)):
                for act in prefix_traces[i]:
                    act_index = PatternsConverter.act_dict[act]
                    matrix[i][act_index] += 1
            np_matrix = np.asmatrix(matrix)
            np_matrix = np_matrix.astype("uint8")
            activity_matrices.append(np_matrix)
        return activity_matrices

    def convert(self, path_to_dir):
        ids_list = self.__get_unique_ids()

        if "pattern_between_after_before" not in self.log_file_path:
            self.case_logs = self.__get_case_logs(ids_list)
        else:
            matrix = [["Start", "Ist", "A", "A", "A", "Icmp", "End"]] * 1000
            self.case_logs = matrix
        
        self.cases_prefix_traces = self.__get_prefix_traces()
        self.activity_matrices = self.__get_activity_matrices()

        index = 1
        # freq_matrix = [self.act_freq]  * self.max_len_prefix_trace
        # freq_matrix = np.squeeze(np.asarray(freq_matrix))
        # freq_matrix *= 255.0/(freq_matrix.max()*2)
        # freq_matrix = np.squeeze(np.asarray(freq_matrix))
        # freq_matrix = freq_matrix.astype(int)

        for np_matrix in self.activity_matrices:
           
            norm_matrix = np_matrix.copy()
            norm_matrix *= 255.0 / norm_matrix.max()
            activity_chanel = np.squeeze(np.asarray(norm_matrix))
            activity_chanel = activity_chanel.astype(int)

            result = cv2.merge([activity_chanel, activity_chanel, activity_chanel])
            path = path_to_dir + "/image_" + str(index) + ".png"

            # resize image
            result = cv2.resize(result, (256, 256), interpolation=cv2.INTER_NEAREST)

            cv2.imwrite(path, result)
            # img = Image.fromarray(result)
            # if img.mode != 'RGB':
            #  img = img.convert('RGB')
            # img = img.resize((256, 256), Image.NEAREST)

            index += 1
            # img.save(path)
