In [8]:
import pandas as pd 
import numpy as np
import glob
import os 
from datetime import datetime
import matplotlib.pyplot as plt

In [9]:
def read_event_log(event_data_fp):
    df_events = pd.read_csv(event_data_fp)
    machines = df_events["machine_id"].unique()
    events = []
    for machine in machines: 
        print(machine)
        machine_events = df_events[df_events["machine_id"]== machine]
        machine_events = machine_events.sort_values(by = "timestamp", ascending = True)
        machine_events = machine_events.reset_index()
        machine_events = machine_events.drop(columns = ["index"])
        events.append(machine_events)
    return events 

In [10]:
def read_processes(event_data_fp):
    df_events = pd.read_csv(event_data_fp)
    companys = df_events["Company"].unique()
    processes = []
    for company in companys:
        company_events = df_events[df_events["Company"] == company]
        machines = company_events["machine_id"].unique()
        for machine in machines:
            machine_events = company_events[company_events["machine_id"]==machine]
            machine_events = machine_events.sort_values(by = "timestamp", ascending = True)
            machine_events = machine_events.reset_index()
            machine_events = machine_events.drop(columns = ["index", 'Unnamed: 0', 'id'])
            process_list = extract_process(machine_events.values.tolist())
            processes.append(process_list)
    processes = pd.concat(processes)
    return processes

In [11]:
def extract_process(machine_events):
    process_list = []
    for i in range(1, len(machine_events)):
        process = {}
        e1 = machine_events[i-1]
        e2 = machine_events[i]
        process["t1"], process["t2"] = e1[0], e2[0]
        process["event1"], process["event2"]  = e1[1], e2[1]
        process["machine_id"] = e1[2]
        process["company"] = e1[3]
        process["duration"] = process["t2"] - process["t1"]
        process["class"] = str(process["event1"]) + "_" + str(process["event2"])
        if process_check(process):
            process_list.append(process)
    process_list = pd.DataFrame(process_list)
    return process_list

In [12]:
def process_check(process):
    check = True 
    if process["duration"] < 0:
        check = False 
    if datetime.utcfromtimestamp(process["t1"]).strftime('%Y-%m-%d') != datetime.utcfromtimestamp(process["t2"]).strftime('%Y-%m-%d'):
        check = False 
    return check

In [13]:
if __name__ == "__main__":
    event_data_fp = r"/Users/Asus/Desktop/process-mining-2/events.csv"
    fp_out = r"/Users/Asus/Desktop/process-mining-2/processes.csv"
    events = read_processes(event_data_fp)
    events.to_csv(fp_out)


In [1]:
import pdb
from tqdm import tqdm
import pandas as pd
import matplotlib.pyplot as plt

final_df = pd.DataFrame(columns=['company', 'process_class', 'machine_id', 'duration' , 'accurance'])
output_path = r"/Users/Asus/Desktop/process-mining-2/generalresult.csv"


df = pd.read_csv('Desktop/process-mining-2/processes.csv')

class_list = df['class'].unique().tolist()
company_list = df['company'].unique().tolist()
machine_id_list = df['machine_id'].unique().tolist()

for company in company_list:
    for classs in class_list:
        for machine_id in machine_id_list:
            df_chosen = df[df['company'] == company]
            df_chosen = df_chosen[df_chosen['class'] == classs]
            df_chosen = df_chosen[df_chosen['machine_id'] == machine_id]
            if len(df_chosen) > 0:
                print('duration for ' + company + ' class ' + classs + ' id ' + str(machine_id) + ':', df_chosen['duration'].mean(), len (df_chosen['duration']))
                temp = pd.DataFrame([[company, classs, machine_id, df_chosen['duration'].mean(), len (df_chosen['duration'])]], columns=['company', 'process_class', 'machine_id', 'duration', 'accurance'])
                final_df = final_df.append(temp)
                
final_df.to_csv(output_path, sep=',', index=False)

duration for schlote class 2_5 id 1: 0.004167228302185474 2910
duration for schlote class 2_5 id 2: 0.004233823892145515 1333
duration for schlote class 5_9 id 1: 249.94706049999274 1416
duration for schlote class 5_9 id 2: 152.8643558069376 650
duration for schlote class 9_8 id 1: 0.0038314913936338796 1865
duration for schlote class 9_8 id 2: 0.0047649167905187825 942
duration for schlote class 8_0 id 1: 24.448595019356237 1820
duration for schlote class 8_0 id 2: 9.989034761058251 911
duration for schlote class 0_2 id 1: 292.07308502483835 2862
duration for schlote class 0_2 id 2: 372.52715979880304 1323
duration for schlote class 5_0 id 1: 340.98193227649955 1463
duration for schlote class 5_0 id 2: 253.81217625369766 657
duration for schlote class 0_1 id 1: 4.396227795569623 8540
duration for schlote class 0_1 id 2: 5.460033584467321 3578
duration for schlote class 1_0 id 1: 36.85913105253308 8186
duration for schlote class 1_0 id 2: 51.84392582789309 3345
duration for schlote cla