In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import plotly.express as px
import plotly.io as pio
from statistics import mean

from clustering import (
    StandardKMeans,
    StandardGaussianMixture,
    SpearmanGaussianMixture,
    MADSpearmanGaussianMixture,
    KendallGaussianMixture,
    MADKendallGaussianMixture,
    OrtizGaussianMixture,
    MADOrtizGaussianMixture,
    ApproxOrtizGaussianMixture,
    MADApproxOrtizGaussianMixture
)
from utils import plot_gaussian_mixtures

pio.templates.default = 'ggplot2'

## Iris dataset

In [2]:
from sklearn import datasets

iris = datasets.load_iris()
iris_df = pd.DataFrame(iris.data)
iris_df['class'] = iris.target
iris_df.columns = ('sepal_len', 'sepal_wid', 'petal_len', 'petal_wid', 'class')
iris_df.dropna(how='all', inplace=True)

In [3]:
iris_df

Unnamed: 0,sepal_len,sepal_wid,petal_len,petal_wid,class
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [4]:
iris_x = np.array(iris_df.iloc[:, [0, 1, 2, 3]])

In [5]:
iris_y = np.array(iris_df.iloc[:, -1])

### Planteando el experimento:

* número de clusters: 3
* Dimensiones: 4
* observaciones: 150

In [6]:
n_clusters = 3

In [7]:
iris_x_scaled = StandardScaler().fit_transform(iris_x)


### StandardGaussianMixture

In [8]:
results = []
for _ in range(100):
    gmm = StandardGaussianMixture(n_components=n_clusters)
    gmm.fit(iris_x_scaled)
    results.append(gmm.accuracy_score(iris_x_scaled, iris_y))
print(mean(results))

0.9366666666666666


### K-Means

In [9]:
results = []
for _ in range(100):
    model = StandardKMeans(n_clusters=n_clusters)
    model.fit(iris_x_scaled)
    results.append(model.accuracy_score(iris_x_scaled, iris_y))
print(mean(results))

0.8309333333333334


### SpearmanGaussianMixture

In [28]:
results = []
for i in range(100):
    gmm = SpearmanGaussianMixture(n_components=n_clusters)
    gmm.fit(iris_x_scaled)
    iris_df['cluster'] = gmm.predict(iris_x_scaled)
    results.append(gmm.accuracy_score(iris_x_scaled, iris_y))
print(mean(results))

0.7870666666666666


### MADSpearmanGaussianMixture

In [29]:
results = []
for i in range(100):
    gmm = MADSpearmanGaussianMixture(n_components=n_clusters)
    gmm.fit(iris_x_scaled)
    iris_df['cluster'] = gmm.predict(iris_x_scaled)
    results.append(gmm.accuracy_score(iris_x_scaled, iris_y))
print(mean(results))

0.7566666666666667


### KendallGaussianMixture

In [33]:
results = []
for i in range(100):
    gmm = KendallGaussianMixture(n_components=n_clusters, reg_covar=1)
    gmm.fit(iris_x_scaled)
    iris_df['cluster'] = gmm.predict(iris_x_scaled)

    results.append(gmm.accuracy_score(iris_x_scaled, iris_y))
print(mean(results))

  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weighe

0.8254


  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),


### MADKendallGaussianMixture

In [35]:
results = []
for i in range(100):
    gmm = MADKendallGaussianMixture(n_components=n_clusters, reg_covar=1)
    gmm.fit(iris_x_scaled)
    iris_df['cluster'] = gmm.predict(iris_x_scaled)
    results.append(gmm.accuracy_score(iris_x_scaled, iris_y))
print(mean(results))

  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weighe

0.8189333333333333


  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weighe

### OrtizGaussianMixture

In [None]:
results = []
for i in range(10):
    gmm = OrtizGaussianMixture(n_components=n_clusters)
    gmm.fit(iris_x_scaled)
    iris_df['cluster'] = gmm.predict(iris_x_scaled)
    results.append(gmm.accuracy_score(iris_x_scaled, iris_y))
print(mean(results))

In [16]:
gmm = OrtizGaussianMixture(n_components=n_clusters)
gmm.fit(iris_x_scaled)
gmm.accuracy_score(iris_x_scaled, iris_y)

KeyboardInterrupt: 

### MADOrtizGaussianMixture

In [None]:
results = []
for i in range(100):
    gmm = MADOrtizGaussianMixture(n_components=n_clusters)
    gmm.fit(iris_x_scaled)
    iris_df['cluster'] = gmm.predict(iris_x_scaled)
    results.append(gmm.accuracy_score(iris_x_scaled, iris_y))
print(mean(results))

In [None]:
gmm = MADOrtizGaussianMixture(n_components=n_clusters)
gmm.fit(iris_x_scaled)
gmm.accuracy_score(iris_x_scaled, iris_y)

### ApproxOrtizGaussianMixture

In [None]:
results = []
for i in range(10):
    gmm = ApproxOrtizGaussianMixture(n_components=n_clusters)
    gmm.fit(iris_x_scaled)
    iris_df['cluster'] = gmm.predict(iris_x_scaled)
    results.append(gmm.accuracy_score(iris_x_scaled, iris_y))
print(mean(results))

In [17]:
gmm = ApproxOrtizGaussianMixture(n_components=n_clusters)
gmm.fit(iris_x_scaled)
gmm.accuracy_score(iris_x_scaled, iris_y)



0.7933333333333333

### MADApproxOrtizGaussianMixture

In [None]:
results = []
for i in range(100):
    gmm = MADApproxOrtizGaussianMixture(n_components=n_clusters)
    gmm.fit(iris_x_scaled)
    iris_df['cluster'] = gmm.predict(iris_x_scaled)
    results.append(gmm.accuracy_score(iris_x_scaled, iris_y))
print(mean(results))

### Con sólo 2 dimensiones

In [45]:
iris_df

Unnamed: 0,sepal_len,sepal_wid,petal_len,petal_wid,class,cluster
0,5.1,3.5,1.4,0.2,0,0
1,4.9,3.0,1.4,0.2,0,0
2,4.7,3.2,1.3,0.2,0,0
3,4.6,3.1,1.5,0.2,0,0
4,5.0,3.6,1.4,0.2,0,0
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2,1
146,6.3,2.5,5.0,1.9,2,1
147,6.5,3.0,5.2,2.0,2,1
148,6.2,3.4,5.4,2.3,2,1


In [46]:
iris_df_2d = iris_df[['sepal_len', 'petal_len']]
iris_x = np.array(iris_df_2d)
iris_y = np.array(iris_df['class'])

In [47]:
iris_x

array([[5.1, 1.4],
       [4.9, 1.4],
       [4.7, 1.3],
       [4.6, 1.5],
       [5. , 1.4],
       [5.4, 1.7],
       [4.6, 1.4],
       [5. , 1.5],
       [4.4, 1.4],
       [4.9, 1.5],
       [5.4, 1.5],
       [4.8, 1.6],
       [4.8, 1.4],
       [4.3, 1.1],
       [5.8, 1.2],
       [5.7, 1.5],
       [5.4, 1.3],
       [5.1, 1.4],
       [5.7, 1.7],
       [5.1, 1.5],
       [5.4, 1.7],
       [5.1, 1.5],
       [4.6, 1. ],
       [5.1, 1.7],
       [4.8, 1.9],
       [5. , 1.6],
       [5. , 1.6],
       [5.2, 1.5],
       [5.2, 1.4],
       [4.7, 1.6],
       [4.8, 1.6],
       [5.4, 1.5],
       [5.2, 1.5],
       [5.5, 1.4],
       [4.9, 1.5],
       [5. , 1.2],
       [5.5, 1.3],
       [4.9, 1.4],
       [4.4, 1.3],
       [5.1, 1.5],
       [5. , 1.3],
       [4.5, 1.3],
       [4.4, 1.3],
       [5. , 1.6],
       [5.1, 1.9],
       [4.8, 1.4],
       [5.1, 1.6],
       [4.6, 1.4],
       [5.3, 1.5],
       [5. , 1.4],
       [7. , 4.7],
       [6.4, 4.5],
       [6.9,

In [48]:
iris_y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [49]:
n_clusters = 3
iris_x_scaled = StandardScaler().fit_transform(iris_x)

In [None]:
fig = px.scatter(x=iris_x_scaled[:, 0], y=iris_x_scaled[:, 1], color=iris_y)
fig.show()

In [51]:
results = []
for i in range(100):
    gmm = StandardGaussianMixture(n_components=n_clusters)
    gmm.fit(iris_x_scaled)
    iris_df['cluster'] = gmm.predict(iris_x_scaled)
    results.append(gmm.accuracy_score(iris_x_scaled, iris_y))
print(mean(results))

0.7373333333333333


In [52]:
plot_gaussian_mixtures(gmm, iris_x_scaled)

In [53]:
results = []
for i in range(10):
    gmm = SpearmanGaussianMixture(n_components=n_clusters)
    gmm.fit(iris_x_scaled)
    iris_df['cluster'] = gmm.predict(iris_x_scaled)
    results.append(gmm.accuracy_score(iris_x_scaled, iris_y))
print(mean(results))

0.7066666666666667


In [54]:
plot_gaussian_mixtures(gmm, iris_x_scaled)

## QCM SENSOR ALCOHOL DATASET

In [18]:
qcm3 = pd.read_csv(
    '../data/QCM3.csv',
    sep=';')
qcm6 = pd.read_csv(
    '../data/QCM6.csv',
    sep=';')
qcm7 = pd.read_csv(
    '../data/QCM7.csv',
    sep=';')
qcm10 = pd.read_csv(
    '../data/QCM10.csv',
    sep=';')
qcm12 = pd.read_csv(
    '../data/QCM12.csv',
    sep=';')

In [19]:
qcm = pd.concat([qcm3, qcm6, qcm7, qcm10, qcm12])
print("Shape of dataset: ", qcm.shape)

Shape of dataset:  (125, 15)


In [20]:
qcm.head()

Unnamed: 0,0.799_0.201,0.799_0.201.1,0.700_0.300,0.700_0.300.1,0.600_0.400,0.600_0.400.1,0.501_0.499,0.501_0.499.1,0.400_0.600,0.400_0.600.1,1-Octanol,1-Propanol,2-Butanol,2-propanol,1-isobutanol
0,-10.06,-10.62,-14.43,-18.31,-24.64,-30.56,-38.62,-45.59,-54.89,-62.28,1,0,0,0,0
1,-9.69,-10.86,-16.73,-21.75,-28.47,-35.83,-43.65,-52.43,-61.92,-71.27,1,0,0,0,0
2,-12.07,-14.28,-21.54,-27.92,-35.19,-43.94,-52.04,-62.49,-71.97,-83.1,1,0,0,0,0
3,-14.21,-17.41,-25.91,-33.36,-41.29,-51.27,-59.94,-71.55,-81.51,-93.83,1,0,0,0,0
4,-16.57,-20.35,-29.97,-37.84,-47.03,-57.29,-67.13,-78.96,-90.01,-102.65,1,0,0,0,0


In [21]:
qcm.loc[qcm["1-Octanol"] == 1, 'alcohol'] = 0
qcm.loc[qcm["1-Propanol"] == 1, 'alcohol'] = 1
qcm.loc[qcm["2-Butanol"] == 1, 'alcohol'] = 2
qcm.loc[qcm["2-propanol"] == 1, 'alcohol'] = 3
qcm.loc[qcm["1-isobutanol"] == 1, 'alcohol'] = 4


In [22]:
qcm.drop(['1-Octanol', "1-Propanol", "2-Butanol", "2-propanol", "1-isobutanol"], axis=1, inplace=True)

In [23]:
qcm.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 125 entries, 0 to 24
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   0.799_0.201    125 non-null    float64
 1   0.799_0.201.1  125 non-null    float64
 2   0.700_0.300    125 non-null    float64
 3   0.700_0.300.1  125 non-null    float64
 4   0.600_0.400    125 non-null    float64
 5   0.600_0.400.1  125 non-null    float64
 6   0.501_0.499    125 non-null    float64
 7   0.501_0.499.1  125 non-null    float64
 8   0.400_0.600    125 non-null    float64
 9   0.400_0.600.1  125 non-null    float64
 10  alcohol        125 non-null    float64
dtypes: float64(11)
memory usage: 11.7 KB


In [24]:
qcm_x = np.array(qcm.iloc[:, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])
qcm_y = np.array(qcm.iloc[:, [10]])


In [25]:
n_clusters = 5

In [26]:
qcm_x_scaled = StandardScaler().fit_transform(qcm_x)

### K-Means

In [27]:
results = []
for i in range(100):
    gmm = StandardKMeans(n_clusters=n_clusters)
    gmm.fit(qcm_x_scaled)
    qcm['cluster'] = gmm.predict(qcm_x_scaled)
    results.append(gmm.accuracy_score(qcm_x_scaled, qcm_y))
print(mean(results))

0.50336


### StandardGaussianMixture

In [51]:
results = []
for i in range(100):
    gmm = StandardGaussianMixture(n_components=n_clusters)
    gmm.fit(qcm_x_scaled)
    qcm['cluster'] = gmm.predict(qcm_x_scaled)
    results.append(gmm.accuracy_score(qcm_x_scaled, qcm_y))
print(mean(results))

0.4284


### SpearmanGaussianMixture

In [54]:
results = []
for i in range(100):
    gmm = SpearmanGaussianMixture(n_components=n_clusters, max_iter=100, reg_covar=1e-3)
    gmm.fit(qcm_x_scaled)
    qcm['cluster'] = gmm.predict(qcm_x_scaled)
    gmm.accuracy_score(qcm_x_scaled, qcm_y)
    results.append(gmm.accuracy_score(qcm_x_scaled, qcm_y))
print(mean(results))

0.43488


### MADSpearmanGaussianMixture

In [55]:
results = []
for i in range(100):
    gmm = MADSpearmanGaussianMixture(n_components=n_clusters, max_iter=1000, reg_covar=1e-3)
    gmm.fit(qcm_x_scaled)
    qcm['cluster'] = gmm.predict(qcm_x_scaled)
    gmm.accuracy_score(qcm_x_scaled, qcm_y)
    results.append(gmm.accuracy_score(qcm_x_scaled, qcm_y))
print(mean(results))

0.47719999999999996


### KendallGaussianMixture

In [72]:
results = []
for i in range(100):
    gmm = KendallGaussianMixture(n_components=n_clusters, max_iter=1000, reg_covar=10)
    gmm.fit(qcm_x_scaled)
    qcm['cluster'] = gmm.predict(qcm_x_scaled)
    gmm.accuracy_score(qcm_x_scaled, qcm_y)
    results.append(gmm.accuracy_score(qcm_x_scaled, qcm_y))
print(mean(results))


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in doub

0.28608



invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars



### MADKendallGaussianMixture

In [78]:
results = []
for i in range(100):
    try:
        gmm = MADKendallGaussianMixture(n_components=n_clusters, max_iter=1000, reg_covar=2)
        gmm.fit(qcm_x_scaled)
        qcm['cluster'] = gmm.predict(qcm_x_scaled)
        gmm.accuracy_score(qcm_x_scaled, qcm_y)
        results.append(gmm.accuracy_score(qcm_x_scaled, qcm_y))
    except ValueError:
        pass
print(mean(results))


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in doub

0.4028



invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars



### OrtizGaussianMixture

In [29]:
gmm = OrtizGaussianMixture(n_components=n_clusters, max_iter=1000, reg_covar=10)
gmm.fit(qcm_x_scaled)

KeyboardInterrupt: 

In [84]:
results = []
for i in range(10):
    try:
        gmm = OrtizGaussianMixture(n_components=n_clusters, max_iter=1000, reg_covar=10)
        gmm.fit(qcm_x_scaled)
        gmm.accuracy_score(qcm_x_scaled, qcm_y)
        results.append(gmm.accuracy_score(qcm_x_scaled, qcm_y))
    except ValueError:
        pass
print(mean(results))

StatisticsError: mean requires at least one data point

### MADOrtizGaussianMixture

In [46]:
results = []
for i in range(100):
    gmm = MADOrtizGaussianMixture(n_components=n_clusters, max_iter=1000, reg_covar=1e-3)
    gmm.fit(qcm_x_scaled)
    qcm['cluster'] = gmm.predict(qcm_x_scaled)
    gmm.accuracy_score(qcm_x_scaled, qcm_y)
    results.append(gmm.accuracy_score(qcm_x_scaled, qcm_y))
print(mean(results))

0.51856


### ApproxOrtizGaussianMixture

In [47]:

results = []
for i in range(100):
    gmm = ApproxOrtizGaussianMixture(n_components=n_clusters, max_iter=1000, reg_covar=1e-3)
    gmm.fit(qcm_x_scaled)
    qcm['cluster'] = gmm.predict(qcm_x_scaled)
    gmm.accuracy_score(qcm_x_scaled, qcm_y)
    results.append(gmm.accuracy_score(qcm_x_scaled, qcm_y))
print(mean(results))

0.5190400000000001


In [32]:
gmm = ApproxOrtizGaussianMixture(n_components=n_clusters, reg_covar=1)
gmm.fit(qcm_x_scaled)
gmm.accuracy_score(qcm_x_scaled, iris_y)

KeyboardInterrupt: 

## Seeds DataSet

In [39]:
seeds = pd.read_csv('../data/seeds.csv')

In [40]:
columns = [
    'area','perimeter','compactness','lengthOfKernel','widthOfKernel','asymmetryCoefficient','lengthOfKernelGroove','seedType'
]

In [41]:
seeds.columns = columns

In [42]:
seeds

Unnamed: 0,area,perimeter,compactness,lengthOfKernel,widthOfKernel,asymmetryCoefficient,lengthOfKernelGroove,seedType
0,15.26,14.84,0.8710,5.763,3.312,2.221,5.220,0
1,14.88,14.57,0.8811,5.554,3.333,1.018,4.956,0
2,14.29,14.09,0.9050,5.291,3.337,2.699,4.825,0
3,13.84,13.94,0.8955,5.324,3.379,2.259,4.805,0
4,16.14,14.99,0.9034,5.658,3.562,1.355,5.175,0
...,...,...,...,...,...,...,...,...
205,12.19,13.20,0.8783,5.137,2.981,3.631,4.870,2
206,11.23,12.88,0.8511,5.140,2.795,4.325,5.003,2
207,13.20,13.66,0.8883,5.236,3.232,8.315,5.056,2
208,11.84,13.21,0.8521,5.175,2.836,3.598,5.044,2


In [43]:
seeds.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 210 entries, 0 to 209
Data columns (total 8 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   area                  210 non-null    float64
 1   perimeter             210 non-null    float64
 2   compactness           210 non-null    float64
 3   lengthOfKernel        210 non-null    float64
 4   widthOfKernel         210 non-null    float64
 5   asymmetryCoefficient  210 non-null    float64
 6   lengthOfKernelGroove  210 non-null    float64
 7   seedType              210 non-null    int64  
dtypes: float64(7), int64(1)
memory usage: 13.2 KB


In [44]:
def seed(seed_type):
    return seed_type - 1

In [45]:
seeds['label'] = seeds['seedType'].apply(lambda x: seed(x))


In [46]:
seed_x = np.array(seeds.iloc[:, [0, 1, 2, 3, 4, 5, 6]])
seed_y = np.array(seeds.iloc[:, [-1]])

In [47]:
seed_x_scaled = StandardScaler().fit_transform(seed_x)

In [48]:
n_clusters = 3

### K-Means

In [58]:
results = []
for i in range(100):
    gmm = StandardKMeans(n_clusters=n_clusters)
    gmm.fit(seed_x_scaled)
    seeds['cluster'] = gmm.predict(seed_x_scaled)
    results.append(gmm.accuracy_score(seed_x_scaled, seed_y))
print(mean(results))

0.919047619047619


### StandardGaussianMixture

In [57]:
results = []
for i in range(100):
    gmm = StandardGaussianMixture(n_components=n_clusters)
    gmm.fit(seed_x_scaled)
    seeds['cluster'] = gmm.predict(seed_x_scaled)
    results.append(gmm.accuracy_score(seed_x_scaled, seed_y))
print(mean(results))

0.9070476190476191


### SpearmanGaussianMixture

In [59]:
results = []
for i in range(100):
    gmm = SpearmanGaussianMixture(n_components=n_clusters)
    gmm.fit(seed_x_scaled)
    seeds['cluster'] = gmm.predict(seed_x_scaled)
    results.append(gmm.accuracy_score(seed_x_scaled, seed_y))
print(mean(results))

0.9153333333333333


### MADSpearmanGaussianMixture

In [60]:
results = []
for i in range(100):
    gmm = MADSpearmanGaussianMixture(n_components=n_clusters)
    gmm.fit(seed_x_scaled)
    seeds['cluster'] = gmm.predict(seed_x_scaled)
    gmm.accuracy_score(seed_x_scaled, seed_y)
    results.append(gmm.accuracy_score(seed_x_scaled, seed_y))
print(mean(results))


0.8987142857142857


### KendallGaussianMixture

In [63]:
results = []
for i in range(100):
    gmm = KendallGaussianMixture(n_components=n_clusters, reg_covar=1)
    gmm.fit(seed_x_scaled)
    seeds['cluster'] = gmm.predict(seed_x_scaled)
    results.append(gmm.accuracy_score(seed_x_scaled, seed_y))
print(mean(results))

  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weighe

0.9238095238095239


  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),


### MADKendallGaussianMixture

In [65]:
results = []
for i in range(100):
    gmm = MADKendallGaussianMixture(n_components=n_clusters, reg_covar=1)
    gmm.fit(seed_x_scaled)
    seeds['cluster'] = gmm.predict(seed_x_scaled)
    gmm.accuracy_score(seed_x_scaled, seed_y)
    results.append(gmm.accuracy_score(seed_x_scaled, seed_y))
print(mean(results))

  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weighe

0.9223333333333333


  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weigher, additive),
  return WeightedTauResult(_weightedrankedtau(x, y, rank, weighe

### OrtizGaussianMixture

In [62]:
results = []
for i in range(100):
    gmm = OrtizGaussianMixture(n_components=n_clusters)
    gmm.fit(seed_x_scaled)
    seeds['cluster'] = gmm.predict(seed_x_scaled)
    gmm.accuracy_score(seed_x_scaled, seed_y)
    results.append(gmm.accuracy_score(seed_x_scaled, seed_y))
print(mean(results))

0.9154285714285714


### MADOrtizGaussianMixture

In [63]:
results = []
for i in range(100):
    gmm = MADOrtizGaussianMixture(n_components=n_clusters)
    gmm.fit(seed_x_scaled)
    seeds['cluster'] = gmm.predict(seed_x_scaled)
    gmm.accuracy_score(seed_x_scaled, seed_y)
    results.append(gmm.accuracy_score(seed_x_scaled, seed_y))
print(mean(results))

0.8995714285714286


### ApproxOrtizGaussianMixture

In [66]:
results = []
for i in range(2):
    gmm = ApproxOrtizGaussianMixture(n_components=n_clusters)
    gmm.fit(seed_x_scaled)
    seeds['cluster'] = gmm.predict(seed_x_scaled)
    gmm.accuracy_score(seed_x_scaled, seed_y)
    results.append(gmm.accuracy_score(seed_x_scaled, seed_y))
print(mean(results))

KeyboardInterrupt: 

### MADApproxOrtizGaussianMixture

In [65]:
results = []
for i in range(100):
    gmm = MADApproxOrtizGaussianMixture(n_components=n_clusters)
    gmm.fit(seed_x_scaled)
    seeds['cluster'] = gmm.predict(seed_x_scaled)
    gmm.accuracy_score(seed_x_scaled, seed_y)
    results.append(gmm.accuracy_score(seed_x_scaled, seed_y))
print(mean(results))

0.8983333333333333


## ElectricalGrid

In [67]:
electrical = pd.read_csv('../data/Data_for_UCI_named.csv')

In [68]:
electrical

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab,stabf
0,2.959060,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,0.055347,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.781760,-0.005957,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.277210,-0.920492,0.163041,0.766689,0.839444,0.109853,0.003471,unstable
3,0.716415,7.669600,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,0.028871,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.797110,0.455450,0.656947,0.820923,0.049860,unstable
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,2.930406,9.487627,2.376523,6.187797,3.343416,-0.658054,-1.449106,-1.236256,0.601709,0.779642,0.813512,0.608385,0.023892,unstable
9996,3.392299,1.274827,2.954947,6.894759,4.349512,-1.663661,-0.952437,-1.733414,0.502079,0.567242,0.285880,0.366120,-0.025803,stable
9997,2.364034,2.842030,8.776391,1.008906,4.299976,-1.380719,-0.943884,-1.975373,0.487838,0.986505,0.149286,0.145984,-0.031810,stable
9998,9.631511,3.994398,2.757071,7.821347,2.514755,-0.966330,-0.649915,-0.898510,0.365246,0.587558,0.889118,0.818391,0.037789,unstable


In [69]:
electrical.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   tau1    10000 non-null  float64
 1   tau2    10000 non-null  float64
 2   tau3    10000 non-null  float64
 3   tau4    10000 non-null  float64
 4   p1      10000 non-null  float64
 5   p2      10000 non-null  float64
 6   p3      10000 non-null  float64
 7   p4      10000 non-null  float64
 8   g1      10000 non-null  float64
 9   g2      10000 non-null  float64
 10  g3      10000 non-null  float64
 11  g4      10000 non-null  float64
 12  stab    10000 non-null  float64
 13  stabf   10000 non-null  object 
dtypes: float64(13), object(1)
memory usage: 1.1+ MB


In [70]:
def electrical_label(stabf):
    if stabf == 'stable':
        return 1
    else:
        return 0

In [71]:
electrical['label'] = electrical['stabf'].apply(lambda x: electrical_label(x))
#electrical = electrical[0:400]

In [72]:
electrical

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab,stabf,label
0,2.959060,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,0.055347,unstable,0
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.781760,-0.005957,stable,1
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.277210,-0.920492,0.163041,0.766689,0.839444,0.109853,0.003471,unstable,0
3,0.716415,7.669600,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,0.028871,unstable,0
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.797110,0.455450,0.656947,0.820923,0.049860,unstable,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,2.930406,9.487627,2.376523,6.187797,3.343416,-0.658054,-1.449106,-1.236256,0.601709,0.779642,0.813512,0.608385,0.023892,unstable,0
9996,3.392299,1.274827,2.954947,6.894759,4.349512,-1.663661,-0.952437,-1.733414,0.502079,0.567242,0.285880,0.366120,-0.025803,stable,1
9997,2.364034,2.842030,8.776391,1.008906,4.299976,-1.380719,-0.943884,-1.975373,0.487838,0.986505,0.149286,0.145984,-0.031810,stable,1
9998,9.631511,3.994398,2.757071,7.821347,2.514755,-0.966330,-0.649915,-0.898510,0.365246,0.587558,0.889118,0.818391,0.037789,unstable,0


In [73]:
elect_x = np.array(electrical.iloc[:, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]])
elect_y = np.array(electrical.iloc[:, [-1]])

In [74]:
elect_x_scaled = StandardScaler().fit_transform(elect_x)

In [75]:
n_clusters = 2

### K-Means

In [77]:
results = []
for i in range(20):
    gmm = StandardKMeans(n_clusters=n_clusters)
    gmm.fit(elect_x_scaled)
    electrical['cluster'] = gmm.predict(elect_x_scaled)
    results.append(gmm.accuracy_score(elect_x_scaled, elect_y))
print(mean(results))

0.638


### StandardGaussianMixture

In [78]:
results = []
for i in range(20):
    gmm = StandardGaussianMixture(n_components=n_clusters)
    gmm.fit(elect_x_scaled)
    electrical['cluster'] = gmm.predict(elect_x_scaled)
    results.append(gmm.accuracy_score(elect_x_scaled, elect_y))
print(mean(results))

0.79059


### SpearmanGaussianMixture

In [76]:
results = []
for i in range(20):
    gmm = SpearmanGaussianMixture(n_components=n_clusters, reg_covar=1e-3)
    gmm.fit(elect_x_scaled)
    electrical['cluster'] = gmm.predict(elect_x_scaled)
    results.append(gmm.accuracy_score(elect_x_scaled, elect_y))
print(mean(results))

0.65924


### MADSpearmanGaussianMixture

In [77]:
results = []
for i in range(20):
    gmm = MADSpearmanGaussianMixture(n_components=n_clusters, reg_covar=1e-3)
    gmm.fit(elect_x_scaled)
    electrical['cluster'] = gmm.predict(elect_x_scaled)
    results.append(gmm.accuracy_score(elect_x_scaled, elect_y))
print(mean(results))

0.638


### KendallGaussianMixture

In [78]:
results = []
for i in range(20):
    gmm = KendallGaussianMixture(n_components=n_clusters, reg_covar=1e-3)
    gmm.fit(elect_x_scaled)
    electrical['cluster'] = gmm.predict(elect_x_scaled)
    results.append(gmm.accuracy_score(elect_x_scaled, elect_y))
print(mean(results))

0.662765


### MADKendallGaussianMixture

In [79]:
results = []
for i in range(20):
    gmm = MADKendallGaussianMixture(n_components=n_clusters, reg_covar=1e-3)
    gmm.fit(elect_x_scaled)
    electrical['cluster'] = gmm.predict(elect_x_scaled)
    results.append(gmm.accuracy_score(elect_x_scaled, elect_y))
print(mean(results))

0.638


### OrtizGaussianMixture,

In [80]:
results = []
for i in range(20):
    gmm = OrtizGaussianMixture(n_components=n_clusters, reg_covar=1e-3)
    gmm.fit(elect_x_scaled)
    electrical['cluster'] = gmm.predict(elect_x_scaled)
    results.append(gmm.accuracy_score(elect_x_scaled, elect_y))
print(mean(results))

0.66854


### MADOrtizGaussianMixture

In [81]:
results = []
for i in range(20):
    gmm = MADOrtizGaussianMixture(n_components=n_clusters, reg_covar=1e-3)
    gmm.fit(elect_x_scaled)
    electrical['cluster'] = gmm.predict(elect_x_scaled)
    results.append(gmm.accuracy_score(elect_x_scaled, elect_y))
print(mean(results))


0.638


### ApproxOrtizGaussianMixture

In [82]:
results = []
for i in range(20):
    gmm = ApproxOrtizGaussianMixture(n_components=n_clusters, reg_covar=1e-3)
    gmm.fit(elect_x_scaled)
    electrical['cluster'] = gmm.predict(elect_x_scaled)
    gmm.accuracy_score(elect_x_scaled, elect_y)
    results.append(gmm.accuracy_score(elect_x_scaled, elect_y))
print(mean(results))

0.66386


### MADApproxOrtizGaussianMixture

In [83]:
results = []
for i in range(100):
    gmm = MADApproxOrtizGaussianMixture(n_components=n_clusters, reg_covar=1e-3)
    gmm.fit(elect_x_scaled)
    electrical['cluster'] = gmm.predict(elect_x_scaled)
    gmm.accuracy_score(elect_x_scaled, elect_y)
    results.append(gmm.accuracy_score(elect_x_scaled, elect_y))
print(mean(results))

0.638


### Usando 2 dimensiones

## Avila

In [84]:
avila = pd.read_csv(
    '/Users/kevinstivmarincastano/Library/CloudStorage/OneDrive-UniversidadEAFIT/3. Semestre 3/1. Estadística no paramétrica/4. proyecto/robust-k-means/datasets/avila/avila/avila-tr.txt',
    header=None)

In [85]:
avila[10].unique()

array(['A', 'F', 'H', 'E', 'I', 'Y', 'D', 'X', 'G', 'W', 'C', 'B'],
      dtype=object)

In [86]:
avila[10].replace(['A', 'F', 'H', 'E', 'I', 'Y', 'D', 'X', 'G', 'W', 'C', 'B'],
                  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], inplace=True)

In [87]:
avila

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,0.266074,-0.165620,0.320980,0.483299,0.172340,0.273364,0.371178,0.929823,0.251173,0.159345,0
1,0.130292,0.870736,-3.210528,0.062493,0.261718,1.436060,1.465940,0.636203,0.282354,0.515587,0
2,-0.116585,0.069915,0.068476,-0.783147,0.261718,0.439463,-0.081827,-0.888236,-0.123005,0.582939,0
3,0.031541,0.297600,-3.210528,-0.583590,-0.721442,-0.307984,0.710932,1.051693,0.594169,-0.533994,0
4,0.229043,0.807926,-0.052442,0.082634,0.261718,0.148790,0.635431,0.051062,0.032902,-0.086652,1
...,...,...,...,...,...,...,...,...,...,...,...
10425,0.080916,0.588093,0.015130,0.002250,0.261718,-0.557133,0.371178,0.932346,0.282354,-0.580141,1
10426,0.253730,-0.338346,0.352988,-1.154243,0.172340,-0.557133,0.257927,0.348428,0.032902,-0.527134,1
10427,0.229043,-0.000745,0.171611,-0.002793,0.261718,0.688613,0.295677,-1.088486,-0.590727,0.580142,0
10428,-0.301743,0.352558,0.288973,1.638181,0.261718,0.688613,0.069175,0.502761,0.625350,0.718969,3


In [88]:
avila.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10430 entries, 0 to 10429
Data columns (total 11 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       10430 non-null  float64
 1   1       10430 non-null  float64
 2   2       10430 non-null  float64
 3   3       10430 non-null  float64
 4   4       10430 non-null  float64
 5   5       10430 non-null  float64
 6   6       10430 non-null  float64
 7   7       10430 non-null  float64
 8   8       10430 non-null  float64
 9   9       10430 non-null  float64
 10  10      10430 non-null  int64  
dtypes: float64(10), int64(1)
memory usage: 896.5 KB


In [89]:
avila_x = np.array(avila.iloc[:, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])
avila_y = np.array(avila.iloc[:, [-1]])

In [90]:
avila_x_scaled = StandardScaler().fit_transform(avila_x)

In [91]:
n_clusters = 12

### StandardGaussianMixture

In [92]:
results = []
for i in range(10):
    gmm = StandardGaussianMixture(n_components=n_clusters, reg_covar=1e-3)
    gmm.fit(avila_x_scaled)
    avila['cluster'] = gmm.predict(avila_x_scaled)
    results.append(gmm.accuracy_score(avila_x_scaled, avila_y))
print(mean(results))

0.4706903163950144


### SpearmanGaussianMixture

In [93]:
results = []
for i in range(10):
    gmm = SpearmanGaussianMixture(n_components=n_clusters, reg_covar=1)
    gmm.fit(avila_x_scaled)
    avila['cluster'] = gmm.predict(avila_x_scaled)
    results.append(gmm.accuracy_score(avila_x_scaled, avila_y))
print(mean(results))


invalid value encountered in divide



ValueError: array must not contain infs or NaNs

### MADSpearmanGaussianMixture

In [None]:
gmm = MADSpearmanGaussianMixture(n_components=n_clusters, reg_covar=1e-3)
gmm.fit(avila_x_scaled)
avila['cluster'] = gmm.predict(avila_x_scaled)
gmm.accuracy_score(avila_x_scaled, avila_y)


invalid value encountered in divide



ValueError: array must not contain infs or NaNs

In [None]:
results = []
for i in range(10):
    gmm = ApproxOrtizGaussianMixture(n_components=n_clusters, reg_covar=1e-3)
    gmm.fit(avila_x_scaled)
    avila['cluster'] = gmm.predict(avila_x_scaled)
    results.append(gmm.accuracy_score(avila_x_scaled, avila_y))
print(mean(results))


invalid value encountered in divide



ValueError: array must not contain infs or NaNs