# Proyecto 1: Inteligencia Artificial Aplicada a la Ingeniería Eléctrica

## Algoritmo 1: K-means

In [11]:
# Importamos las librerías
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.cluster import KMeans, DBSCAN
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler

In [3]:
# Importamos los datos del anexo A
csv_file = 'AnexoA.csv'
datosA = pd.read_csv(csv_file)
print(datosA.info())
print(datosA.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 133 entries, 0 to 132
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Empresa   133 non-null    object
 1   Circuito  133 non-null    object
 2   Abonados  133 non-null    int64 
 3   DPIR      133 non-null    int64 
 4   FPI       133 non-null    int64 
dtypes: int64(3), object(2)
memory usage: 5.3+ KB
None
  Empresa             Circuito  Abonados  DPIR   FPI
0    CNFL       BARVA-CIPRESAL      3756  3273  2597
1    CNFL      CAJA-INDUSTRIAS      2230  2688  1423
2    CNFL      ESCAZÚ-PIEDADES      7039  2386  2446
3    CNFL    CORONADO-CASCAJAL      9777  2296  2682
4    CNFL  BRASIL-CIUDAD COLON      8644  2112  1852


In [4]:
# Importamos los datos del anexo b
csv_file = 'AnexoB.csv'
datosB = pd.read_csv(csv_file)
print(datosB.info())
print(datosB.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 352 entries, 0 to 351
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Empresa   352 non-null    object
 1   Circuito  352 non-null    object
 2   Abonados  352 non-null    int64 
 3   DPIR      352 non-null    int64 
 4   FPI       352 non-null    int64 
dtypes: int64(3), object(2)
memory usage: 13.9+ KB
None
           Empresa             Circuito  Abonados  DPIR  FPI
0  COOPEALFARORUIZ         LAGUNA NORTE      3365   493  600
1  COOPEALFARORUIZ       ZARCERO CENTRO      1796   465  429
2  COOPEALFARORUIZ                 TORO       197   450  400
3  COOPEALFARORUIZ           SECTOR SUR      2762   344  400
4             CNFL  SABANILLA-SAN PEDRO      5933   564  324


In [26]:
# Análisis exploratorio gráfica de los datos


fig = px.scatter_3d(datosA, x="Abonados", y="DPIR", z="FPI")
fig.update_traces(
    textfont=dict(
        family="Arial",
        size=18,
        color='red'  # Change this to your desired color
    ),
    marker=dict(
        size=5
        #color='rgba(135, 206, 250, 0.8)'  # Marker color
    )
)
fig.show()                        

In [21]:
# Análisis exploratorio gráfica de los datos
fig = px.scatter_3d(datosB, x="Abonados", y="DPIR", z="FPI")
fig.update_traces(
    textfont=dict(
        family="Arial",
        size=18,
        color="crimson"  # Change this to your desired color
    ),
    marker=dict(
        size=5
        #color='rgba(135, 206, 250, 0.8)'  # Marker color
    )
)
fig.show()

In [7]:
data = datosA.select_dtypes(include=[int])
scaler = StandardScaler() #investigar qué hace
data = scaler.fit_transform(data)

### Método del codo

In [31]:
# Usamos el método del codo
sse = {"Número de Clusters":[],
        "SSE / WCSS":[]}

for k in range(1,15):
  kmeans = KMeans(n_clusters=k ,max_iter=100)
  clusters = kmeans.fit_predict(data)
  sse["Número de Clusters"].append(k)
  sse["SSE / WCSS"].append(kmeans.inertia_)


fig = px.line(sse, x="Número de Clusters", y="SSE / WCSS",
             title="Método del codo")

fig.show()

Visualisamos que el gráfico no converge, sino que sigue bajando indefinidamente. Se elige k = 5 porque desde ese punto se percibe que los cambios sucesivos decrecen.

In [35]:
# Elegimos k = 5 a partir de la gráfica
kmeans = KMeans(n_clusters=5, random_state=42)
clusters = kmeans.fit(data)
y_means = kmeans.predict(data)


fig = px.scatter_3d(datosA, x="Abonados", y="DPIR", z="FPI", color=y_means)
fig.update_traces(
    textfont=dict(
        family="Arial",
        size=18,
        color="crimson"  # Change this to your desired color
    ),
    marker=dict(
        size=5
        #color='rgba(135, 206, 250, 0.8)'  # Marker color
    )
)
fig.show()

### Coeficiente silueta

In [41]:
sc = {"Número de Clusters":[],
      "Coeficiente silueta":[]}

for n_clusters in range(0,9):# Maximum range should be 6, as it contains only 6 data points
      kmeans = KMeans(n_clusters=k, random_state=42).fit(data)
      label = kmeans.labels_
      sil_coeff = silhouette_score(data,label,metric = 'euclidean')
      sc["Número de Clusters"].append(n_clusters)
      sc["Coeficiente silueta"].append(sil_coeff)

fig = px.line(sc, x="Número de Clusters", y="Coeficiente silueta",
             title="Método del Coeficiente silueta")

fig.show()

## Algoritmo 2: DBSCAN

In [44]:
dbscan = DBSCAN(eps=0.49, min_samples=10)
dbscan.fit(data)

In [43]:
colors = ['royalblue', 'maroon', 'forestgreen', 'mediumorchid', 'tan', 'deeppink', 'olive', 'goldenrod', 'lightcyan', 'navy']
vectorizer = np.vectorize(lambda x: colors[x % len(colors)])

fig = px.scatter_3d(datosA, x="Abonados", y="DPIR", z="FPI", 
                    color=vectorizer(dbscan.labels_))
fig.update_traces(
    textfont=dict(
        family="Arial",
        size=18,
        color="crimson"  # Change this to your desired color
    ),
    marker=dict(
        size=5
        #color='rgba(135, 206, 250, 0.8)'  # Marker color
    )
)
fig.show()