In [6]:
import pandas as pd
data_rfm = pd.read_parquet('df.parquet')
data_norm = data_rfm[["log_R", "log_F", "log_M"]]

print(data_norm)

         log_R     log_F     log_M
0     3.737670  5.209486  8.368925
1     4.356709  3.332205  7.271175
2     3.044522  4.290459  7.285198
3     5.746203  2.833213  5.688330
4     4.317488  4.510860  7.143941
...        ...       ...       ...
4365  5.087596  2.397895  5.201806
4366  1.791759  2.079442  4.404522
4367  4.553877  2.639057  5.179534
4368  2.484907  6.629363  7.647729
4369  0.693147  4.262680  7.516586

[4329 rows x 3 columns]


In [2]:
from sklearn.cluster import KMeans, DBSCAN, OPTICS, AgglomerativeClustering, Birch
from minisom import MiniSom
import numpy as np
import timeit

def cluster_kmeans():
    n_clusters = 3
    kmeans = KMeans(n_clusters = n_clusters)
    kmeans.fit(data_norm)
    kmeans.predict(data_norm)

def cluster_dbscan():
    eps = 0.3
    min_samples = 11
    model = DBSCAN(eps=eps, min_samples=min_samples)
    model.fit_predict(data_norm)

def cluster_optics():
    eps = 0.3
    min_samples = 15

    model = OPTICS(eps=eps, min_samples=min_samples).fit(data_norm)
    model.fit_predict(data_norm)

def cluster_agglomerative_hierarchical():
    n_clusters = 4
    model = AgglomerativeClustering(n_clusters=n_clusters)
    model.fit_predict(data_norm)

def cluster_birch():
    n_clusters = 4
    model = Birch(n_clusters=n_clusters)
    model.fit_predict(data_norm)

def cluster_som():
    n_rows, n_cols = 1, 3
    n_iterations = 100000

    som = MiniSom(n_rows, n_cols, data_norm.shape[1], sigma=0.7, learning_rate=0.5)
    som.train_batch(data_norm.values, n_iterations)



In [3]:
import timeit

times = []

# Wykonanie algorytmu n razy
n = 100
for _ in range(n):
    t = timeit.timeit("cluster_optics()", globals=globals(), number=1)
    times.append(t)

average_time = np.mean(times)
standard_deviation = np.std(times)

print(f"Średni czas wykonania: {average_time} sekundy")
print(f"Odchylenie standardowe: {standard_deviation} sekundy")

Średni czas wykonania: 5.3357610560000035 sekundy
Odchylenie standardowe: 0.3038762569617348 sekundy


In [5]:
import timeit

times = []

# Wykonanie algorytmu n razy
n = 100
for _ in range(n):
    t = timeit.timeit("cluster_som()", globals=globals(), number=1)
    times.append(t)

average_time = np.mean(times)
standard_deviation = np.std(times)

print(f"Średni czas wykonania: {average_time} sekundy")
print(f"Odchylenie standardowe: {standard_deviation} sekundy")

Średni czas wykonania: 4.020368716999999 sekundy
Odchylenie standardowe: 0.18528454975949046 sekundy
