In [39]:
import pandas as pd
import numpy as np

from sklearn.cluster import KMeans, AgglomerativeClustering, SpectralClustering, DBSCAN
from sklearn.metrics import adjusted_rand_score, f1_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [40]:
df = pd.read_pickle('crowd_train_all_data_embedded.pkl')


def get_dataframe(df, min_count, max_count):
    df_temp = df.copy()

    freq = df_temp.source_id.value_counts()
    freq = freq[min_count <= freq]
    df_temp = df_temp[df.source_id.isin(freq.index)]

    df_temp = (
        df_temp.groupby('source_id')
        .apply(lambda x: x.nlargest(max_count, 'duration'))
        .reset_index(drop=True)
        .drop(
            columns=[
                'duration',
                'hash_id',
                'annotator_emo',
                'golden_emo',
                'annotator_id',
                'speaker_text',
                'speaker_emo',
            ]
        )
    )

    df_temp.dropna(inplace=True)

    print(df_temp.source_id.nunique())

    return df_temp


df_t = get_dataframe(df, 350, 350)

8


In [50]:
X = np.vstack(df_t.audio_feature.to_numpy()).reshape(df_t.shape[0], -1)
le = LabelEncoder()
y = le.fit_transform(df_t.source_id)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
X_train.shape, X_test.shape, len(y_train[y_train == 4]), len(y_test[y_test == 4])

((2240, 498), (560, 498), 280, 70)

In [51]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [52]:
X_train.shape, y_train.shape

((2240, 498), (2240,))

In [53]:
df_train = pd.DataFrame(np.concatenate([X_train_scaled, y_train.reshape(-1, 1)], axis=1))
clusters_centres = df_train.drop_duplicates(subset=[498]).drop(columns=[498]).to_numpy()

# Модели с заранее известным количеством кластеров

## kmeans

In [91]:
params = {
    "n_clusters": len(set(y)),
    "random_state": 42,
    # "max_iter": 10_000,
    # "algorithm": "elkan",
    "init": 'k-means++',
    # "tol": 0.01,
}
sk_kmeans = KMeans(**params)
sk_kmeans.fit(X_train_scaled)

  super()._check_params_vs_input(X, default_n_init=10)


In [92]:
preds_train = sk_kmeans.predict(X_train_scaled)
preds_test = sk_kmeans.predict(X_test_scaled)
kmeans_f1_w_train = f1_score(y_train, preds_train, average='weighted')
kmeans_f1_w_test = f1_score(y_test, preds_test, average='weighted')
kmeans_f1_w_train, kmeans_f1_w_test

(0.001777319114780415, 0.0016339869281045754)

In [94]:
import mlflow
from sklearn.preprocessing import StandardScaler

from sklearn import metrics

mlflow.set_tracking_uri("http://127.0.0.1:5000")
experiment = mlflow.set_experiment(f"clusterization, 102 classes, 5 per class")

run_name = "kmeans normalized centres k-means++"

with mlflow.start_run(run_name=run_name) as run:
    params = {
    "n_clusters": len(set(y)),
    "random_state": 42,
    # "max_iter": 10_000,
    # "algorithm": "elkan",
    "init": 'k-means++',
    # "tol": 0.01,
    }
    sk_kmeans = KMeans(**params)
    sk_kmeans.fit(X_train_scaled)

    predicts = sk_kmeans.predict(X_test_scaled)

    mlflow.log_metric("train f1_weighted", metrics.f1_score(y_train, sk_kmeans.predict(X_train_scaled), average="weighted"))
    mlflow.log_metric("f1_weighted", metrics.f1_score(y_test, predicts, average="weighted"))
    mlflow.log_metric("f1_micro", metrics.f1_score(y_test, predicts, average="micro"))
    mlflow.log_metric("accuracy", metrics.accuracy_score(y_test, predicts))
    
    mlflow.log_params(params)

  super()._check_params_vs_input(X, default_n_init=10)


In [70]:
params = {
    "n_clusters": len(set(y)),
    "random_state": 42,
    # "max_iter": 10_000,
    # "algorithm": "elkan",
    "init": clusters_centres,
    # "tol": 0.01,
}
sk_kmeans = KMeans(**params)
sk_kmeans.fit(X_train_scaled)

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


In [71]:
preds_train = sk_kmeans.predict(X_train_scaled)
preds_test = sk_kmeans.predict(X_test_scaled)
kmeans_f1_w_train = f1_score(y_train, preds_train, average='weighted')
kmeans_f1_w_test = f1_score(y_test, preds_test, average='weighted')
kmeans_f1_w_train, kmeans_f1_w_test

(0.19033971923419596, 0.19371523308166683)

## AgglomerativeClustering

In [67]:
sk_ac = AgglomerativeClustering(n_clusters=len(set(y)), linkage='ward')

sk_ac_preds = sk_ac.fit_predict(X)

sk_ac_f1_w = f1_score(y, sk_ac_preds, average='weighted')
sk_ac_f1_w

0.1823282267597413

In [95]:
import mlflow
from sklearn.preprocessing import StandardScaler

from sklearn import metrics

mlflow.set_tracking_uri("http://127.0.0.1:5000")
experiment = mlflow.set_experiment(f"clusterization, 102 classes, 5 per class")

run_name = "Agglomerative Clustering ward"

with mlflow.start_run(run_name=run_name) as run:
    params = {
    "n_clusters": len(set(y)),
    "linkage": 'ward'
    }
    sk_ac = AgglomerativeClustering(**params)

    predicts = sk_ac.fit_predict(X)

    mlflow.log_metric("f1_weighted", metrics.f1_score(y, predicts, average="weighted"))
    mlflow.log_metric("f1_micro", metrics.f1_score(y, predicts, average="micro"))
    mlflow.log_metric("accuracy", metrics.accuracy_score(y, predicts))
    
    mlflow.log_params(params)

## SpectralClustering

In [81]:
sk_ac = SpectralClustering(n_clusters=len(set(y)), gamma=10, random_state=0)

sk_ac_preds = sk_ac.fit_predict(X)

sk_ac_f1_w = f1_score(y, sk_ac_preds, average='weighted')
sk_ac_f1_w



0.11766561309783533

In [96]:
import mlflow
from sklearn.preprocessing import StandardScaler

from sklearn import metrics

mlflow.set_tracking_uri("http://127.0.0.1:5000")
experiment = mlflow.set_experiment(f"clusterization, 102 classes, 5 per class")

run_name = "Spectral Clustering"

with mlflow.start_run(run_name=run_name) as run:
    params = {
    "n_clusters": len(set(y)),
    'random_state': 0,
    }
    sk_ac = SpectralClustering(**params)

    predicts = sk_ac.fit_predict(X)

    mlflow.log_metric("f1_weighted", metrics.f1_score(y, predicts, average="weighted"))
    mlflow.log_metric("f1_micro", metrics.f1_score(y, predicts, average="micro"))
    mlflow.log_metric("accuracy", metrics.accuracy_score(y, predicts))
    
    mlflow.log_params(params)



# Модели без заранее заданного количества классов

## ESoinn

In [45]:
from essoin_model.esoinn import ESoinn

s = ESoinn(iteration_threshold=300, plt_in_fit=False)
s.fit(X)

 10%|▉         | 276/2800 [00:00<00:04, 518.61it/s] 

[False, True, True, True, False, True, False, True, True, True, False, False, False, True, False, False, True, False, True, False, False, False, True, True, False, False, False, False, False, False, False, True, True, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, True, False, False, False, False, False, True, True, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, True, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, True, False, False, False, False, False, False, False, False, False, True, False, False, False, True, False, False, True, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False,

 15%|█▍        | 410/2800 [00:01<00:06, 344.72it/s]

Number of classes： 10


 20%|██        | 560/2800 [00:01<00:06, 359.09it/s]

[False, True, False, True, False, True, True, True, True, True, False, True, True, True, True, True, True, True, True, False, False, True, False, False, True, True, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, True, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, True, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, Fal

 24%|██▍       | 675/2800 [00:02<00:09, 233.10it/s]

Number of classes： 11


 31%|███▏      | 880/2800 [00:02<00:06, 287.04it/s]

[True, False, False, True, True, False, True, True, False, True, False, True, True, True, True, True, False, False, True, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, True, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, Fal

 34%|███▍      | 946/2800 [00:03<00:12, 145.22it/s]

Number of classes： 9


 43%|████▎     | 1198/2800 [00:04<00:06, 246.04it/s]

[False, False, True, False, True, False, True, False, False, True, False, False, True, True, False, True, True, False, True, True, True, False, True, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False,

 46%|████▌     | 1290/2800 [00:05<00:08, 188.32it/s]

Number of classes： 12


 52%|█████▏    | 1465/2800 [00:05<00:04, 306.11it/s]

[True, False, False, True, False, True, False, True, True, True, False, False, True, True, False, True, True, False, True, False, True, False, False, False, False, False, False, False, True, False, False, False, True, True, False, True, True, False, False, True, True, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, True, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, Fa

 56%|█████▌    | 1565/2800 [00:06<00:07, 161.20it/s]

Number of classes： 11


 64%|██████▍   | 1785/2800 [00:07<00:04, 251.66it/s]

[True, False, False, True, False, True, True, False, True, False, False, False, False, True, False, True, False, False, False, False, True, False, False, False, False, False, True, False, False, False, False, False, False, False, False, True, False, False, True, True, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, True, False, False, False, False, False, Fal

 67%|██████▋   | 1868/2800 [00:08<00:06, 139.84it/s]

Number of classes： 10


 74%|███████▍  | 2086/2800 [00:09<00:03, 207.50it/s]

[False, False, False, False, False, True, False, True, False, False, True, False, True, False, False, False, False, False, False, True, False, False, False, False, False, True, True, False, True, False, False, False, True, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, F

 77%|███████▋  | 2164/2800 [00:10<00:05, 122.61it/s]

Number of classes： 10


 86%|████████▌ | 2397/2800 [00:11<00:02, 198.07it/s]

[False, False, False, False, False, False, False, False, False, True, False, False, True, False, True, True, True, False, False, False, True, False, False, False, False, True, True, False, False, False, False, False, False, False, True, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False

 88%|████████▊ | 2468/2800 [00:12<00:02, 112.83it/s]

Number of classes： 11


 96%|█████████▌| 2680/2800 [00:12<00:00, 215.79it/s]

[False, False, False, False, False, True, False, False, False, True, False, True, False, False, False, False, False, False, True, False, True, False, False, False, False, False, False, False, True, True, False, False, False, False, False, False, True, True, False, False, False, False, False, False, False, False, False, False, False, True, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False

100%|██████████| 2800/2800 [00:14<00:00, 197.14it/s]

Number of classes： 12
Number of classes： 103





<Figure size 432x288 with 0 Axes>

In [46]:
essoin_preds = s.predict(X)
essoin_score = adjusted_rand_score(y, essoin_preds)
essoin_score

Number of classes： 103


  0%|          | 0/2800 [00:00<?, ?it/s]

100%|██████████| 2800/2800 [00:00<00:00, 2823.33it/s]


0.2775579507089917

In [48]:
import mlflow
from sklearn.preprocessing import StandardScaler
from essoin_model.esoinn import ESoinn

from sklearn import metrics

mlflow.set_tracking_uri("http://127.0.0.1:5000")
experiment = mlflow.set_experiment(f"clusterization without the number of classes, 8 classes")

run_name = "ESoinn"

with mlflow.start_run(run_name=run_name) as run:
    params = {
        "iteration_threshold": 300,
        "plt_in_fit": False,
    }
    s = ESoinn(**params)
    s.fit(X)

    essoin_preds = s.predict(X)
    mlflow.log_metric("adjusted_rand_score", adjusted_rand_score(y, essoin_preds))

    mlflow.log_params(params)


  9%|▉         | 262/2800 [00:00<00:04, 579.39it/s]

[True, True, False, True, True, True, False, False, False, False, True, True, False, True, False, False, False, False, False, False, True, True, False, True, True, False, False, True, False, False, True, False, False, False, False, False, True, False, False, True, True, False, True, False, False, False, False, False, False, False, True, True, False, False, False, False, False, True, True, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, True, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, Fal

 13%|█▎        | 377/2800 [00:01<00:07, 313.42it/s]

Number of classes： 5


 20%|██        | 574/2800 [00:01<00:06, 350.91it/s]

[False, False, True, True, True, True, False, True, True, True, True, True, True, True, True, True, True, False, True, False, True, False, True, False, True, False, False, False, True, False, False, False, False, False, False, False, False, False, False, True, False, True, False, False, False, False, False, True, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, True, True, False, False, False, False, False, False, False, False, True, False, True, True, False, False, True, False, False, False, False, True, True, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, Fals

 24%|██▎       | 660/2800 [00:02<00:12, 172.93it/s]

Number of classes： 4


 31%|███▏      | 877/2800 [00:03<00:07, 244.17it/s]

[False, True, True, False, True, False, True, False, True, False, False, True, False, False, True, True, False, False, True, True, True, False, False, False, False, True, True, False, True, True, True, False, True, True, False, True, False, False, True, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, True, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, True, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False,

 34%|███▍      | 954/2800 [00:04<00:16, 113.87it/s]

Number of classes： 6


 43%|████▎     | 1193/2800 [00:05<00:08, 194.41it/s]

[False, True, True, False, True, False, True, False, False, True, False, False, True, False, True, False, False, True, False, True, False, False, False, True, False, False, True, False, False, False, True, True, True, True, False, False, False, False, False, False, False, True, False, True, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, True, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, True, 

 45%|████▌     | 1260/2800 [00:06<00:17, 90.54it/s] 

Number of classes： 5


 53%|█████▎    | 1480/2800 [00:07<00:06, 194.14it/s]

[False, False, False, True, True, True, True, False, False, True, True, True, True, False, True, False, False, False, False, False, True, False, False, False, False, True, False, False, False, False, False, True, False, False, True, False, False, False, True, False, False, False, False, False, True, False, False, False, False, False, False, False, False, True, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, Fals

 55%|█████▌    | 1547/2800 [00:08<00:15, 83.36it/s] 

Number of classes： 5


 64%|██████▎   | 1783/2800 [00:09<00:05, 189.37it/s]

[False, False, False, True, True, False, True, False, False, True, False, True, False, False, True, False, False, True, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, True, True, False, True, False, False, False, False, False, False, True, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False,

 66%|██████▌   | 1846/2800 [00:11<00:12, 74.83it/s] 

Number of classes： 5


 75%|███████▍  | 2088/2800 [00:12<00:03, 184.00it/s]

[False, True, False, True, True, False, True, True, True, True, False, False, True, False, True, False, False, False, False, True, False, False, False, True, False, True, False, False, False, False, False, False, True, False, True, False, False, False, False, False, False, True, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, True, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False,

 77%|███████▋  | 2148/2800 [00:14<00:09, 70.91it/s] 

Number of classes： 6


 85%|████████▌ | 2385/2800 [00:15<00:02, 175.91it/s]

[False, True, False, False, True, True, False, False, False, True, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, True, False, True, True, True, True, False, False, False, True, False, True, False, False, False, False, True, False, True, True, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, True, True, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False,

 88%|████████▊ | 2478/2800 [00:16<00:03, 89.34it/s] 

Number of classes： 6


 96%|█████████▋| 2695/2800 [00:17<00:00, 175.81it/s]

[False, True, True, False, True, False, True, True, False, True, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, True, True, False, True, False, False, True, False, False, True, True, False, True, False, False, False, True, True, False, True, False, True, False, False, False, False, False, True, False, False, True, False, False, False, False, False, True, False, False, False, False, False, False, False, True, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, Fa

 98%|█████████▊| 2752/2800 [00:19<00:00, 62.14it/s] 

Number of classes： 4


100%|██████████| 2800/2800 [00:19<00:00, 141.69it/s]

Number of classes： 89





<Figure size 432x288 with 0 Axes>

Number of classes： 89


100%|██████████| 2800/2800 [00:00<00:00, 3160.76it/s]


## DBScan

In [54]:
sk_dbscan = DBSCAN(eps=0.3, min_samples=3)
sk_dbscan_pred_res = sk_dbscan.fit_predict(X)
sk_dbscan_ari = adjusted_rand_score(y, sk_dbscan_pred_res)
print(f'Adjusted Rand Score for sk DBSCAN: {sk_dbscan_ari}', '', sep='\n')
print('prediction', sk_dbscan_pred_res, sep='\n')

Adjusted Rand Score for sk DBSCAN: 0.0

prediction
[-1 -1 -1 ... -1 -1 -1]


In [55]:
import mlflow
from sklearn.preprocessing import StandardScaler
from essoin_model.esoinn import ESoinn

from sklearn import metrics

mlflow.set_tracking_uri("http://127.0.0.1:5000")
experiment = mlflow.set_experiment(f"clusterization without the number of classes, 8 classes")

run_name = "DBSCAN"

with mlflow.start_run(run_name=run_name) as run:
    params = {
        "eps": 0.3, 
        "min_samples": 3,
    }
    s = DBSCAN(**params)
    preds = s.fit_predict(X)
    mlflow.log_metric("adjusted_rand_score", adjusted_rand_score(y, preds))
    
    mlflow.log_params(params)