In [8]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler


#Leer el dataset
data = pd.read_csv("Indicadores_municipales_sabana_DA.csv",encoding="latin-1")

#Dataframe Principal
columna_feature = data['vul_ing']

#Eliminar Columnas con nombres de estados
data.drop(columns=["nom_ent" , "nom_mun"], inplace=True)

# Definir las columnas con categorías alfabéticas
columnas_a_convertir = ['gdo_rezsoc00', 'gdo_rezsoc05', 'gdo_rezsoc10']

# Convertir categorías a valores numéricos y calcular el promedio para cada columna
for columna in columnas_a_convertir:
    categorias_a_numeros = {
        "Muy bajo": 1,
        "Bajo": 2,
        "Medio": 3,
        "Alto": 4,
        "Muy alto": 5
    }

    # Convertir las categorías a valores numéricos en la columna actual
    data[columna] = data[columna].map(categorias_a_numeros)
valores_nulos = data.isnull()

# Cuenta los valores nulos en todo el DataFrame
valores_nulos = data.isnull().sum().sum()

# Cuenta las columnas que contienen al menos un valor nulo
columnas_con_nulos = len(data.columns[data.isnull().any()])

#Cuenta los valores null en la columna vul_ing
col_vul_ing_null= data['vul_ing'].isnull().sum()

# Imprime las columnas con al menos un valor nulo
print("Columnas con al menos un valor nulo: ", columnas_con_nulos)

# Imprime el contador de valores nulos en total
print("Valores nulos en total:", valores_nulos)

#Saca el promedio de la columna vul_Ing
promedio = data['vul_ing'].mean()

#Rellena los datos null de la columna vul_ing con el promedio de la columna
data['vul_ing'].fillna(promedio, inplace=True)

#Imprime la cantidad de calores null antes del relleno
print("Valores Null antes del relleno en columna vul_ing:", col_vul_ing_null)

#Imprime la cantidad actualizada de valores null después del relleno
print("Valor actualizado de valores nulos en la columa vul_ing:" , data['vul_ing'].isnull().sum())

#Rellena todos los valores null en el dataframe con el promedio de su respectiva columna
data.fillna(data.mean(),inplace=True)

#Cuenta los valores null actualizados
Null_update = data.isnull().sum().sum()

print("Valores nulos en total después del relleno:", Null_update)



Columnas con al menos un valor nulo:  56
Valores nulos en total: 305
Valores Null antes del relleno en columna vul_ing: 0
Valor actualizado de valores nulos en la columa vul_ing: 0
Valores nulos en total después del relleno: 0


In [18]:
data

Unnamed: 0,ent,mun,clave_mun,pobtot_ajustada,pobreza,pobreza_e,pobreza_m,vul_car,vul_ing,npnv,...,pobreza_cap_00,pobreza_cap_10,pobreza_patrim_90,pobreza_patrim_00,pobreza_patrim_10,gini_90,gini_00,gini_10,categoria_vulnerabilidad,categoria_vulnerabilidad_numerica
0,1,1,1001,794304,30.531104,2.264478,28.266627,27.983320,8.419106,33.066469,...,12.7,18.474600,43.4,33.7,41.900398,0.473,0.425,0.422628,Muy alta vulnerabilidad,0
1,1,2,1002,48592,67.111172,8.040704,59.070468,22.439389,5.557604,4.891835,...,29.0,30.980801,64.2,48.9,59.175800,0.379,0.533,0.343879,Muy alta vulnerabilidad,0
2,1,3,1003,53104,61.360527,7.241238,54.119289,29.428583,2.921336,6.289554,...,33.1,28.259199,63.9,57.9,56.504902,0.414,0.465,0.386781,Muy baja vulnerabilidad,1
3,1,4,1004,14101,52.800458,4.769001,48.031458,27.128568,7.709276,12.361698,...,21.0,22.386101,59.7,40.1,51.164501,0.392,0.541,0.344984,Muy alta vulnerabilidad,0
4,1,5,1005,101379,45.338512,6.084037,39.254475,26.262912,8.279864,20.118712,...,22.6,22.139999,60.6,42.2,45.703899,0.391,0.469,0.458083,Muy alta vulnerabilidad,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2451,32,54,32054,21016,74.848837,12.301183,62.547654,19.229856,3.177689,2.743618,...,54.8,41.368999,73.5,70.9,70.859596,0.403,0.589,0.342037,Muy baja vulnerabilidad,1
2452,32,55,32055,27385,65.450191,10.203506,55.246687,23.623556,5.007426,5.918827,...,25.9,20.563601,57.8,44.1,46.659199,0.422,0.463,0.362527,Muy alta vulnerabilidad,0
2453,32,56,32056,117528,29.541959,3.535624,26.006335,16.644262,8.828019,44.985759,...,20.7,12.115300,36.6,41.8,32.302700,0.528,0.498,0.436339,Muy alta vulnerabilidad,0
2454,32,57,32057,20456,78.374962,14.607016,63.767946,13.750759,4.440331,3.433948,...,36.4,30.037100,60.5,54.7,57.394501,0.380,0.483,0.365307,Muy baja vulnerabilidad,1


In [9]:
x = data.drop(columns=["vul_ing"])
y = data["vul_ing"]


X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

X_train.reset_index(drop=True, inplace=True)
X_test.reset_index(drop=True, inplace=True)
y_train.reset_index(drop=True, inplace=True)
y_test.reset_index(drop=True, inplace=True)

In [10]:

# Implement the K-NN algorithm
def knn_predict(train_data, train_labels, new_data, K):
    distances_and_labels = []

    for data_point, label in zip(train_data, train_labels):
        distance = np.linalg.norm(new_data - data_point)  # Euclidean distance calculation
        distances_and_labels.append((distance, label))

    distances_and_labels.sort(key=lambda x: x[0])  # Sort by distance

    k_nearest_neighbors = distances_and_labels[:K]

    neighbor_labels = [neighbor[1] for neighbor in k_nearest_neighbors]

    predicted_label = max(set(neighbor_labels), key=neighbor_labels.count)  # Majority vote

    return predicted_label


def calculate_accuracy(predictions, true_labels):
    correct_predictions = np.sum(predictions == true_labels)
    total_predictions = len(true_labels)
    accuracy = (correct_predictions / total_predictions) * 100
    return accuracy

def step_function(y):
    return 1 if y >= 0 else 0

def update_weights(weights, bias, error, features, learning_rate):
    updated_weights = weights + learning_rate * error * features
    updated_bias = bias + learning_rate * error
    return updated_weights, updated_bias

def perceptron_train(X_train, y_train, learning_rate=0.1, epochs=100):
    num_samples, num_features = X_train.shape
    weights = np.zeros(num_features)
    bias = 0

    for _ in range(epochs):
        errors = 0
        for i in range(num_samples):
            y = np.dot(X_train[i], weights) + bias
            prediction = step_function(y)
            error = y_train[i] - prediction
            errors += abs(error)
            if error != 0:
                weights, bias = update_weights(weights, bias, error, X_train[i], learning_rate)
        mean_error = errors / num_samples
        print("Epoch: {}, Mean Error: {:.2f}".format(_, mean_error))

    return weights, bias


In [11]:
# Definir los rangos y las categorías correspondientes
categorias = {
    "Muy baja vulnerabilidad": (0, 4.9999),
    "Muy alta vulnerabilidad": (5, 100)
}

# Función para asignar categorías basadas en el porcentaje de población vulnerable
def asignar_categoria(vulnerabilidad):
    for categoria, (min_valor, max_valor) in categorias.items():
        if min_valor <= vulnerabilidad < max_valor:
            return categoria
    return "Muy alta vulnerabilidad"  # Manejar el caso si el porcentaje es mayor al 100

# Crear una nueva columna 'categoria_vulnerabilidad' en el dataframe
data['categoria_vulnerabilidad'] = data['vul_ing'].apply(asignar_categoria)


# Convertir las categorías a valores numéricos
data['categoria_vulnerabilidad_numerica'] = data['categoria_vulnerabilidad'].astype('category').cat.codes

# Dividir los datos en características (X) y variable objetivo (y)
x = data.drop(columns=["vul_ing", "categoria_vulnerabilidad", "categoria_vulnerabilidad_numerica"])
y = data["categoria_vulnerabilidad_numerica"]

# Normalizar los datos
scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)

# Dividir los datos en conjunto de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(x_scaled, y, test_size=0.2, random_state=42)

# Implementar el modelo de perceptrón
def step_function(y):
    return 1 if y >= 0 else 0

def perceptron_train(X_train, y_train, learning_rate=0.1, epochs=100):
    num_samples, num_features = X_train.shape
    weights = np.zeros(num_features)
    bias = 0
    for _ in range(epochs):
        errors = 0
        for i in range(num_samples):
            y = np.dot(X_train[i], weights) + bias
            prediction = step_function(y)
            error = y_train.iloc[i] - prediction
            errors += abs(error)
            if error != 0:
                weights += learning_rate * error * X_train[i]
                bias += learning_rate * error
        mean_error = errors / num_samples
        print("Epoch: {}, Mean Error: {:.2f}".format(_, mean_error))
    return weights, bias

def perceptron_test(X_test, weights, bias):
    predictions = []
    for i in range(len(X_test)):
        y = np.dot(X_test[i], weights) + bias
        prediction = step_function(y)
        predictions.append(prediction)
    return np.array(predictions)

# Entrenar el modelo de perceptrón
trained_weights, trained_bias = perceptron_train(X_train, y_train)

# Evaluate the Perceptron model on the test set
perceptron_predictions = perceptron_test(X_test, trained_weights, trained_bias)
perceptron_accuracy = accuracy_score(y_test, perceptron_predictions)

# Print the accuracy
print("Accuracy of the Perceptron model on the test set: {:.2f}%".format(perceptron_accuracy * 100))

Epoch: 0, Mean Error: 0.12
Epoch: 1, Mean Error: 0.10
Epoch: 2, Mean Error: 0.07
Epoch: 3, Mean Error: 0.06
Epoch: 4, Mean Error: 0.06
Epoch: 5, Mean Error: 0.06
Epoch: 6, Mean Error: 0.06
Epoch: 7, Mean Error: 0.05
Epoch: 8, Mean Error: 0.05
Epoch: 9, Mean Error: 0.06
Epoch: 10, Mean Error: 0.05
Epoch: 11, Mean Error: 0.04
Epoch: 12, Mean Error: 0.04
Epoch: 13, Mean Error: 0.03
Epoch: 14, Mean Error: 0.03
Epoch: 15, Mean Error: 0.04
Epoch: 16, Mean Error: 0.04
Epoch: 17, Mean Error: 0.04
Epoch: 18, Mean Error: 0.04
Epoch: 19, Mean Error: 0.04
Epoch: 20, Mean Error: 0.03
Epoch: 21, Mean Error: 0.03
Epoch: 22, Mean Error: 0.03
Epoch: 23, Mean Error: 0.03
Epoch: 24, Mean Error: 0.03
Epoch: 25, Mean Error: 0.03
Epoch: 26, Mean Error: 0.04
Epoch: 27, Mean Error: 0.04
Epoch: 28, Mean Error: 0.03
Epoch: 29, Mean Error: 0.03
Epoch: 30, Mean Error: 0.03
Epoch: 31, Mean Error: 0.03
Epoch: 32, Mean Error: 0.03
Epoch: 33, Mean Error: 0.03
Epoch: 34, Mean Error: 0.03
Epoch: 35, Mean Error: 0.03
Ep

In [12]:
X_train = X_train.astype(float)
X_test = X_test.astype(float)

In [13]:
K = 5

# Evaluate the K-NN model on the test set
knn_predictions = [knn_predict(X_train, y_train, test_data, K) for test_data in X_test]
knn_accuracy = calculate_accuracy(np.array(knn_predictions), y_test)

# Print the accuracy
print("Accuracy of the K-NN model on the test set: {:.2f}%".format(knn_accuracy))

Accuracy of the K-NN model on the test set: 92.28%


**Using** **libraries**

KNN

In [15]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

knn_classifier = KNeighborsClassifier(n_neighbors=3)
knn_classifier.fit(X_train, y_train)
y_pred = knn_classifier.predict(X_test)

# Paso 5: Evaluar la precisión del modelo
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")


Accuracy: 0.9186991869918699


Perceptron

In [17]:
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

perceptron_classifier = Perceptron(max_iter=1000, random_state=42)
perceptron_classifier.fit(X_train, y_train)

# Realiza predicciones en el conjunto de prueba
y_pred = perceptron_classifier.predict(X_test)

# Evalúa la precisión del modelo
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")


Accuracy: 0.9573170731707317
