In [1]:
from google.colab import drive
drive.mount("/content/gdrive")

Mounted at /content/gdrive


In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

In [3]:
file_path = "/content/gdrive/MyDrive/Colab Notebooks/Datasets/dataset_6.csv"

In [4]:
if os.path.exists(file_path):
    print("Google Drive conectado correctamente.")
else:
    raise FileNotFoundError("No se encontró el archivo. Verifica la ruta del archivo en Google Drive.")

Google Drive conectado correctamente.


In [5]:
dataframe = pd.read_csv(file_path, skiprows=0)
print("Dataset cargado correctamente.")
print(dataframe.head())

Dataset cargado correctamente.
     0    1    2    3    4    5    6    7    8    9  ...  3062  3063  3064  \
0  108  245  109  108  245  109  108  245  109  108  ...   255   255   255   
1  255  255  255   28  114  186  255  255  255  255  ...   255   255   255   
2  189   74  225  189   74  225  189   74  225  189  ...   255   255   255   
3  130  184  210  130  184  210  130  184  210  130  ...   255   255   255   
4  255  255  255  255  255  255  255  255  255  255  ...   255   255   255   

   3065  3066  3067  3068  3069  3070  3071  
0   255   255   255   255   255   255   255  
1   255   255   255   255   255   255   255  
2   255   255   255   255   255   255   255  
3   255   255   255   255   255   255   255  
4   255   255   255   255   255   255   255  

[5 rows x 3072 columns]


In [6]:
print(f"Forma del dataframe: {dataframe.shape}")

Forma del dataframe: (50000, 3072)


In [7]:
X = dataframe.iloc[:, :].values  # Todas las columnas menos la última
y = dataframe.iloc[:, ].values

In [8]:
X = X/255.0
print(X.shape)

(50000, 3072)


In [11]:
X = X.reshape((-1, 32, 32, 3))

In [12]:
print(f"Forma de X después del reshape: {X.shape}")

Forma de X después del reshape: (50000, 32, 32, 3)


In [13]:
train_size = int(0.6 * len(dataframe))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

In [14]:
X_train_flat = X_train.reshape((X_train.shape[0], -1))
X_test_flat = X_test.reshape((X_test.shape[0], -1))

De este punto en adelante no termino de cargar en ninguna ocasion, este proceso es demasiado pesado pese a haber reducido el x_train_flat al 60%

In [None]:
inertia = []
silhouette_scores = []
K = range(2, 20)
for k in K:
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X_train_flat)
    inertia.append(kmeans.inertia_)
    silhouette_scores.append(silhouette_score(X_train_flat, kmeans.labels_))



In [None]:
plt.figure(figsize=(16, 8))
plt.subplot(1, 2, 1)
plt.plot(K, inertia, 'bo-')
plt.xlabel('Número de clusters')
plt.ylabel('Inercia')
plt.title('Método del codo')

In [None]:
plt.subplot(1, 2, 2)
plt.plot(K, silhouette_scores, 'bo-')
plt.xlabel('Número de clusters')
plt.ylabel('Índice de silueta')
plt.title('Índice de silueta')

In [None]:
plt.show()

In [None]:
n_clusters = 10
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
y_train_clusters = kmeans.fit_predict(X_train_flat)

In [None]:
y_test_clusters = kmeans.predict(X_test_flat)

In [None]:
def accuracy(y_true, y_pred):
    return np.sum(y_true == y_pred) / len(y_true)

In [None]:
test_acc = accuracy(y_test, y_test_clusters)
print(f'Test accuracy: {test_acc}')

In [None]:
def classify_image(image):
    image = image.astype('float32') / 255.0
    image_flat = image.reshape((1, -1))
    cluster_label = kmeans.predict(image_flat)
    return cluster_label

In [None]:
new_image = X_test[0]  # Ejemplo de una imagen del conjunto de prueba
cluster_label = classify_image(new_image)
print(f'La imagen pertenece al cluster: {cluster_label}')