# Classificação Binária

In [3]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import plotly.offline as pyo
import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot_mpl
init_notebook_mode(connected=True)
cf.go_offline()
import datetime
from IPython.display import display, HTML
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

In [4]:
df = pd.read_csv('./dataset_ic2023.csv', sep = ";")
df.head()

Unnamed: 0,Colesterol Total,Idade,Glicemia,Desfecho,Unnamed: 4
0,254,38,93,1,
1,222,79,89,-1,
2,236,64,102,-1,
3,241,50,123,-1,
4,167,34,82,-1,


In [5]:
df.shape

(799, 5)

In [6]:
df.isna().sum()

Colesterol Total      0
Idade                 0
Glicemia              0
Desfecho              0
Unnamed: 4          799
dtype: int64

In [7]:
df = df.drop(df.columns[4], axis=1)
df.head()

Unnamed: 0,Colesterol Total,Idade,Glicemia,Desfecho
0,254,38,93,1
1,222,79,89,-1
2,236,64,102,-1
3,241,50,123,-1
4,167,34,82,-1


In [8]:
# Compute the correlation matrix
correlation_matrix = df.corr()

In [9]:
# Create a heatmap of the correlation matrix using go.Figure
fig = go.Figure(data=go.Heatmap(
    z=correlation_matrix.values,
    x=correlation_matrix.columns,
    y=correlation_matrix.index,
    colorscale='Viridis',
    zmin=-1,  # Minimum correlation value
    zmax=1,   # Maximum correlation value
    colorbar=dict(title='Correlation'),

))

# Add text annotations manually
for i in range(len(correlation_matrix)):
    for j in range(len(correlation_matrix)):
        fig.add_annotation(
            text=str(correlation_matrix.values[i][j].round(2)),
            x=correlation_matrix.columns[j],
            y=correlation_matrix.index[i],
            showarrow=False
        )

# Customize the layout
fig.update_xaxes(side="top")
fig.update_layout(
    title="Correlation Matrix",
    width=600,
    height=600
)

# Show the plot
fig.show()

In [10]:
desfecho_counts = df['Desfecho'].value_counts().reset_index()

# Rename the columns for clarity
desfecho_counts.columns = ['Desfecho', 'Count']
desfecho_counts['Percentage'] = (desfecho_counts['Count']/df.shape[0]).apply(lambda x: f'{x:.2%}')
desfecho_counts

Unnamed: 0,Desfecho,Count,Percentage
0,-1,535,66.96%
1,1,264,33.04%


In [11]:
# Split the data into features (X) and the target variable (y)
X = df.drop('Desfecho', axis=1)
y = df['Desfecho']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Create a KNN classifier
k = 3  # Choose the number of neighbors (you can adjust this)
knn_classifier = KNeighborsClassifier(n_neighbors=k)

# Train the classifier on the training data
knn_classifier.fit(X_train, y_train)

# Make predictions on the test data
y_pred = knn_classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_report_str = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(classification_report_str)

Accuracy: 0.76
Classification Report:
              precision    recall  f1-score   support

          -1       0.80      0.85      0.82       107
           1       0.65      0.57      0.61        53

    accuracy                           0.76       160
   macro avg       0.73      0.71      0.71       160
weighted avg       0.75      0.76      0.75       160



In [16]:
from sklearn.linear_model import BayesianRidge

In [17]:
# Create and train the Bayesian logistic regression model
bayesian_classifier = BayesianRidge()
bayesian_classifier.fit(X_train, y_train)

# Make predictions on the test data
y_pred = bayesian_classifier.predict(X_test)
y_pred_binary = [1 if val > 0 else -1 for val in y_pred]  # Convert predicted values to 1 or -1

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred_binary)
classification_report_str = classification_report(y_test, y_pred_binary)

print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(classification_report_str)

Accuracy: 0.78
Classification Report:
              precision    recall  f1-score   support

          -1       0.79      0.90      0.84       107
           1       0.72      0.53      0.61        53

    accuracy                           0.78       160
   macro avg       0.76      0.71      0.73       160
weighted avg       0.77      0.78      0.76       160

