In [1]:
import pandas as pd
import plotly.express as px
arquivo = pd.read_csv('heart.csv')
df = pd.DataFrame(arquivo, copy=True)

In [2]:
fig = px.histogram(df, x="Age", color="HeartDisease", marginal="box")
fig.update_layout(
    title_text="Idade X Doença cardíaca",
    xaxis_title_text="Idade",
    yaxis_title_text="Quantidade",
    legend_title_text="Doença cardíaca"
)
fig.update_traces(marker_line_width=1, marker_line_color="black")
fig.show()

In [3]:
fig = px.histogram(df, x="Age", color="HeartDisease", pattern_shape="Sex")
fig.update_layout(
    title_text="Idade X Doença cardíaca (especificando sexo)",
    xaxis_title_text="Idade",
    yaxis_title_text="Quantidade",
    legend_title_text="Doença cardíaca"
)
fig.update_traces(marker_line_width=1, marker_line_color="black")
fig.show()

In [4]:
fig = px.histogram(df, x="Sex", color="HeartDisease")
fig.update_layout(
    title_text="Sexo X Doença cardíaca ",
    xaxis_title_text="Sexo",
    yaxis_title_text="Quantidade",
    legend_title_text="Doença cardíaca"
)
fig.update_traces(marker_line_width=1, marker_line_color="black")
fig.show()

In [5]:
fig = px.histogram(df, x="ChestPainType", color="HeartDisease")
fig.update_layout(
    title_text="Tipo de Dor X Doença cardíaca",
    xaxis_title_text="Tipo de Dor",
    yaxis_title_text="Quantidade",
    legend_title_text="Doença cardíaca"
)
fig.update_traces(marker_line_width=1, marker_line_color="black")
fig.show()

In [6]:
fig = px.histogram(df.loc[(df["Cholesterol"] != 0)&(df["Cholesterol"] < 380)], x="Cholesterol", color="HeartDisease")
fig.update_layout(
    title_text="Colesterol X Doença cardíaca",
    xaxis_title_text="Colesterol",
    yaxis_title_text="Quantidade",
    legend_title_text="Doença cardíaca"
)
fig.update_traces(marker_line_width=1, marker_line_color="black")
fig.show()

In [7]:
fig = px.histogram(df.loc[(df["Cholesterol"] != 0)&(df["Cholesterol"] < 380)], x="Cholesterol", color="HeartDisease", pattern_shape="Sex")
fig.update_layout(
    title_text="Colesterol X Doença cardíaca (especificando sexo)",
    xaxis_title_text="Colesterol",
    yaxis_title_text="Quantidade",
    legend_title_text="Doença cardíaca"
)
fig.update_traces(marker_line_width=1, marker_line_color="black")
fig.show()

In [8]:
fig = px.histogram(df, x="ST_Slope", color="HeartDisease")
fig.update_layout(
    title_text="Declive X Doença cardíaca",
    xaxis_title_text="Declive",
    yaxis_title_text="Quantidade",
    legend_title_text="Doença cardíaca"
)
fig.update_traces(marker_line_width=1, marker_line_color="black")
fig.show()

In [9]:
fig = px.histogram(df, x="ST_Slope", color="HeartDisease", pattern_shape="Sex")
fig.update_layout(
    title_text="Declive X Doença cardíaca (especificando sexo)",
    xaxis_title_text="Declive",
    yaxis_title_text="Quantidade",
    legend_title_text="Doença cardíaca"
)
fig.update_traces(marker_line_width=1, marker_line_color="black")
fig.show()

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sklearn.compose import make_column_transformer

from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import make_pipeline

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.neighbors import KNeighborsClassifier

accuracy =[]
model_names =[]
categorical = df.select_dtypes('object').columns

X= df.drop('HeartDisease', axis=1)
y= df['HeartDisease']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=300)

ohe= OneHotEncoder()
ct= make_column_transformer((ohe,categorical),remainder='passthrough')  


lr = LogisticRegression(solver='liblinear')
lda= LinearDiscriminantAnalysis()
svm = SVC(gamma='scale')
knn = KNeighborsClassifier()
ada = AdaBoostClassifier(random_state=0)
gb = GradientBoostingClassifier(random_state=0)
rf = RandomForestClassifier(random_state=0)
et=  ExtraTreesClassifier(random_state=0)

models = [lr,lda,svm,knn,ada,gb,rf,et]

for model in models: 
    pipe = make_pipeline(ct, model)
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)
    accuracy.append(round(accuracy_score(y_test, y_pred),4))
    print (f'model : {model} and  accuracy score is : {round(accuracy_score(y_test, y_pred),4)}')

model_names = ['Logistic','LinearDiscriminant','SVM','KNeighbors','Ada','Gradient','Random','ExtraTree']
result_df1 = pd.DataFrame({'Accuracy':accuracy}, index=model_names)
result_df1

model : LogisticRegression(solver='liblinear') and  accuracy score is : 0.8732
model : LinearDiscriminantAnalysis() and  accuracy score is : 0.8623
model : SVC() and  accuracy score is : 0.721
model : KNeighborsClassifier() and  accuracy score is : 0.6957
model : AdaBoostClassifier(random_state=0) and  accuracy score is : 0.8514
model : GradientBoostingClassifier(random_state=0) and  accuracy score is : 0.8768
model : RandomForestClassifier(random_state=0) and  accuracy score is : 0.8587
model : ExtraTreesClassifier(random_state=0) and  accuracy score is : 0.8514


Unnamed: 0,Accuracy
Logistic,0.8732
LinearDiscriminant,0.8623
SVM,0.721
KNeighbors,0.6957
Ada,0.8514
Gradient,0.8768
Random,0.8587
ExtraTree,0.8514
