# Analisis de Chi^2

## Importar bibliotecas

In [232]:
import  numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

## Cargar el csv de parkinsons disease data a partir de la columna 1




In [233]:
dataset = pd.read_csv('parkinsons_disease_data.csv')
X = dataset.iloc[:,1:-2].values
print(X)

[[85.  0.  3. ...  0.  0.  0.]
 [75.  0.  0. ...  0.  1.  0.]
 [70.  1.  0. ...  1.  0.  1.]
 ...
 [65.  0.  0. ...  1.  1.  0.]
 [61.  1.  0. ...  0.  1.  1.]
 [56.  0.  0. ...  1.  0.  1.]]


In [234]:
y = dataset.iloc[:, -2].values
print(y)

[0 1 1 ... 1 1 0]


In [235]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)


## Aplicar Chi-cuadrada para seleccionar las top caracteristicas

In [236]:
chi_selector = SelectKBest(chi2, k=5)
X_kBest_train = chi_selector.fit_transform(X_train, y_train)
X_kBest_test = chi_selector.transform(X_test)

## Entrenar un clasificador de RL usando la reduccion de caracteristicas

In [237]:
from sklearn.ensemble import RandomForestClassifier
clasificador = DecisionTreeClassifier()
clasificador.fit(X_kBest_train, y_train)

## Predecir las etiquetas del conjunto de prueba

In [238]:
y_pred = clasificador.predict(X_kBest_test)
print(y_pred)

[0 1 0 1 1 0 0 1 0 0 1 1 1 1 1 0 1 0 1 1 1 1 1 1 0 0 1 0 0 0 1 1 1 1 0 1 1
 1 1 1 1 0 1 0 0 0 1 1 0 0 0 1 0 0 1 0 1 1 1 1 0 1 1 1 1 0 1 0 0 1 1 1 1 1
 1 1 1 1 1 1 1 1 0 0 0 1 0 1 0 0 1 0 0 1 0 0 0 0 1 1 0 0 1 0 1 0 1 0 0 1 0
 1 1 0 1 1 0 1 1 1 1 1 0 0 1 1 1 0 0 0 0 1 0 0 1 1 1 1 0 0 1 0 0 1 1 1 1 1
 0 0 1 1 0 1 1 1 0 1 1 1 1 1 0 1 1 1 0 1 1 0 0 0 0 1 0 1 0 1 1 0 0 1 0 0 0
 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 0 1 0 0 1 0 0 1 1 1 1 0 1 1 1 1 1
 0 1 0 0 1 1 0 0 1 0 1 1 1 0 0 0 1 1 1 0 1 1 1 1 1 1 1 0 1 0 1 1 0 1 0 0 1
 1 1 1 1 0 1 0 1 1 0 0 1 0 0 0 1 1 1 1 1 1 1 0 1 0 1 1 1 1 0 1 1 1 1 1 0 1
 0 0 1 1 0 0 0 1 1 1 1 1 1 0 0 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 0 1 0 1 1 1 1
 1 1 0 1 0 1 0 1 1 1 1 0 0 1 0 1 1 0 0 1 1 1 0 1 1 1 1 1 1 1 1 0 1 0 0 1 1
 1 1 1 0 1 0 0 1 0 1 1 1 1 1 1 0 0 1 1 0 1 1 1 1 1 1 0 1 1 0 1 0 1 1 0 1 0
 0 0 0 1 1 1 1 1 1 0 0 1 1 0]


## Calcular la precision del modelo

In [239]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.80


## Mostrar las características seleccionadas de chi2

In [240]:
#print(f"Características seleccionadas: {dataset.head}")
print(f"Puntuaciones: {chi_selector.scores_}")
print(f"Características seleccionadas (índice): {chi_selector.get_support(indices=True)}")

Puntuaciones: [8.75837584e+00 1.27557728e+00 4.54305284e-01 2.57979378e-01
 4.84931722e+00 5.23821916e-02 8.88376820e+00 4.72901442e-01
 8.29145378e-01 5.24379313e-01 5.85992601e-01 4.39949387e-01
 3.09618046e-01 3.51435898e+00 6.18504575e+00 5.71408102e-01
 6.64617472e-01 4.15777103e+00 1.07440186e+01 1.30356510e+00
 5.33365535e+00 1.96344636e+01 8.39670299e+03 2.83097711e+02
 1.36268478e+02 8.21398108e+01 5.44809683e+01 4.19418842e+01
 2.66065068e+01 4.97666115e-01 3.34709633e-02 7.88115182e-01]
Características seleccionadas (índice): [22 23 24 25 26]
