In [4]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import hmine, association_rules
from sklearn.preprocessing import KBinsDiscretizer
import numpy as np
from sklearn.preprocessing import StandardScaler

path = "../../dane/8CPU_20RAM/3600s/2repl/merged.csv"

### Wczytanie danych

In [7]:
data = pd.read_csv(path)
data = data.drop('replicaId', axis=1)
data = data.drop(["applicationTime_methods", "databaseTime_methods", "databaseTime_trading", 
                  "applicationTime_trading", "timestamp"], axis=1)

### Normalizacja danych

In [12]:
endpoint_map = {
    'do-register': 0,
    'add-sell-offer': 1,
    'add-buy-offer': 2,
    'get-stock-data': 3,
    'add-company': 4,
    'get-stock-users-and-companies': 5
}
data['endpointUrl_methods'] = data['endpointUrl_methods'].replace(endpoint_map)

data['cpuUsage_stock'] = round(data['cpuUsage_stock'], 2)
data['memoryUsage_stock'] = round(data['memoryUsage_stock'], 2)
data['cpuUsage_traffic'] = round(data['cpuUsage_traffic'], 2)
data['memoryUsage_traffic'] = round(data['memoryUsage_traffic'], 2)

### Dyskretyzacja wartości ciągłych

In [13]:
transformer = KBinsDiscretizer(encode='onehot-dense', strategy='kmeans' , n_bins=5)
df_discret = pd.DataFrame(transformer.fit_transform(data).astype(np.int8))

### Utworzenie nowych nazw cech

In [14]:
df_discret.columns = [
    f"{data.columns[idx]}: {round(edge, 4)} - {round(next_edge, 4)}"
    for idx, edges in enumerate(transformer.bin_edges_)
    for edge, next_edge in zip(edges, edges[1:])
]

# Wyrzucenie zdyskretyzowanych cech o zerowej wariancji:
df_discret = df_discret.loc[:, (df_discret.var() != 0)]


### Apriori

In [15]:

test = association_rules(hmine(df_discret, min_support=0.01, use_colnames=True, max_len=2), metric='lift').sort_values(by='lift', ascending=False)

test



Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
1076,(memoryUsage_traffic: 0.7484 - 0.83),(cpuUsage_traffic: 0.2723 - 0.4295),0.074359,0.019713,0.019713,0.265103,13.448238,0.018247,1.333910,1.000000
1077,(cpuUsage_traffic: 0.2723 - 0.4295),(memoryUsage_traffic: 0.7484 - 0.83),0.019713,0.074359,0.019713,1.000000,13.448238,0.018247,inf,0.944255
757,(cpuUsage_traffic: 0.2723 - 0.4295),(cpuUsage_stock: 0.13 - 0.2184),0.019713,0.058978,0.013003,0.659607,11.183920,0.011840,2.764513,0.928897
756,(cpuUsage_stock: 0.13 - 0.2184),(cpuUsage_traffic: 0.2723 - 0.4295),0.058978,0.019713,0.013003,0.220466,11.183920,0.011840,1.257531,0.967656
234,(endpointUrl_methods: 0.5 - 1.5),(cpuUsage_stock: 0.13 - 0.2184),0.028188,0.058978,0.017252,0.612048,10.377540,0.015590,2.425613,0.929849
...,...,...,...,...,...,...,...,...,...,...
377,(queueSizeForward_methods: 0.0 - 1648.6128),(memoryUsage_traffic: 0.6473 - 0.7484),0.309083,0.274505,0.068712,0.222308,0.809851,-0.016133,0.932882,-0.253638
111,(memoryUsage_stock: 0.6193 - 0.818),(apiTime_methods: 60002.0458 - 90914.1624),0.148895,0.182632,0.021888,0.147002,0.804909,-0.005305,0.958230,-0.221656
110,(apiTime_methods: 60002.0458 - 90914.1624),(memoryUsage_stock: 0.6193 - 0.818),0.182632,0.148895,0.021888,0.119847,0.804909,-0.005305,0.966996,-0.228712
153,(apiTime_methods: 90914.1624 - 118217.5003),(queueSizeForward_methods: 8899.636 - 12770.0),0.286730,0.108096,0.024922,0.086919,0.804095,-0.006072,0.976807,-0.254607
