In [35]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn import decomposition
from sklearn.feature_selection import RFE
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import LassoCV
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier

# Logistička regresija

## Redukcija dimenzionalnosti uz PCA

Učitavanje dataseta sa interneta i odabir kolona klasnog i neklasnih atributa

In [36]:
raw_data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data')
feature_names = [column for column in raw_data.columns if column not in ["name", "status"]]
class_name = 'status'

Selekcija neklasnih i klasnog atributa u posebne nizove

In [37]:
feature_data = raw_data[feature_names]
class_data = raw_data[class_name]

Instanciranje PCA objekta i zadavanje dimenzije, zatim transformacija matrice neklasnih atributa

In [None]:
pca = decomposition.PCA(n_components=5)
feature_data_pca = pca.fit_transform(feature_data)

In [None]:
Podela dataseta u trening i test skup

In [None]:
train_feature_data, test_feature_data, train_class_data, test_class_data =\
    train_test_split(feature_data_pca, class_data, test_size=0.2, random_state=8, stratify=class_data)

In [None]:
Instanciranje objekta logističke regresije i obučavanje modela

In [None]:
logreg = LogisticRegression(max_iter=10000, solver='lbfgs')
logreg.fit(train_feature_data, train_class_data)

In [None]:
Predviđanje na osnovu dobijenog modela, uz test skup

In [None]:
test_predicted_data = logreg.predict(test_feature_data)

In [None]:
Tačnost

In [39]:
accuracy_score(test_class_data, test_predicted_data)

0.9487179487179487

In [None]:
Konfuziona matrica

In [40]:
confusion_matrix(test_class_data, test_predicted_data)

array([[ 9,  1],
       [ 1, 28]], dtype=int64)

## Selekcija atributa uz RFE (Recursive Feature Estimation)

In [None]:
raw_data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data')
feature_names = [column for column in raw_data.columns if column not in ["name", "status"]]
class_name = 'status'

feature_data = raw_data[feature_names]
class_data = raw_data[class_name]

logreg = LogisticRegression(max_iter=10000, solver='lbfgs')

Instanciranje RFE objekta i zadavanje željenog broja atributa, zatim transformacija matrice neklasnih atributa na osnovu obučenog RFE objekta

In [None]:
rfe = RFE(logreg, 5)
feature_data_rfe = rfe.fit_transform(feature_data, class_data)

In [None]:
train_feature_data, test_feature_data, train_class_data, test_class_data =\
    train_test_split(feature_data_rfe, class_data, test_size=0.2, random_state=8, stratify=class_data)


logreg.fit(train_feature_data, train_class_data)

test_predicted_data = logreg.predict(test_feature_data)

In [None]:
accuracy_score(test_class_data, test_predicted_data)

In [43]:
confusion_matrix(test_class_data, test_predicted_data)

array([[ 9,  1],
       [ 1, 28]], dtype=int64)

## Selekcija atributa uz LassoCV i SFE (Select From Model)

In [None]:
raw_data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data')
feature_names = [column for column in raw_data.columns if column not in ["name", "status"]]
class_name = 'status'

feature_data = raw_data[feature_names]
class_data = raw_data[class_name]

logreg = LogisticRegression(max_iter=10000, solver='lbfgs')

In [None]:
lasso = LassoCV(cv=3)
sfm = SelectFromModel(lasso, threshold="mean")

In [None]:
feature_data_sfm = sfm.fit_transform(feature_data, class_data)

train_feature_data, test_feature_data, train_class_data, test_class_data =\
    train_test_split(feature_data_sfm, class_data, test_size=0.2, random_state=8, stratify=class_data)


logreg.fit(train_feature_data, train_class_data)

test_predicted_data = logreg.predict(test_feature_data)

In [None]:
accuracy_score(test_class_data, test_predicted_data)

In [42]:
confusion_matrix(test_class_data, test_predicted_data)

array([[ 9,  1],
       [ 1, 28]], dtype=int64)

## Klasifikacija uz stablo odlučivanja

In [None]:
raw_data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data')
feature_names = [column for column in raw_data.columns if column not in ["name", "status"]]
class_name = 'status'

feature_data = raw_data[feature_names]
class_data = raw_data[class_name]

train_feature_data, test_feature_data, train_class_data, test_class_data =\
    train_test_split(feature_data, class_data, test_size=0.2, random_state=8, stratify=class_data)

Instanciranje i obučavanje stabla odlučivanja

In [None]:
classifier = DecisionTreeClassifier()
classifier.fit(train_feature_data, train_class_data)

In [None]:
test_predicted_data = classifier.predict(test_feature_data)

In [None]:
accuracy_score(test_class_data, test_predicted_data)

In [41]:
confusion_matrix(test_class_data, test_predicted_data)

array([[ 9,  1],
       [ 1, 28]], dtype=int64)