In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_curve, auc
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA

In [None]:
df = pd.read_csv('data/magic04.data', header=None)
df.head()

In [None]:
df[10] = df[10].map({'g':0,'h':1})

In [None]:
sns.pairplot(df, hue=10)

In [None]:
X = df.drop(columns=[10])
X

In [None]:
X = MinMaxScaler().fit_transform(X)
X

In [None]:
y = df[10]
y

In [None]:
y.value_counts()

In [None]:
lr = LogisticRegression(max_iter=10000, random_state=0)

In [None]:
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
lr.fit(X_train,y_train)

In [None]:
y_pred = lr.predict(X_test)
print(f"Accuracy : {accuracy_score(y_test,y_pred)}")

In [None]:
report = classification_report(y_test,y_pred)
print(report)

In [None]:
cm = confusion_matrix(y_test,y_pred)
ConfusionMatrixDisplay(cm).plot()

In [None]:
y_pred_proba = lr.predict_proba(X_test)[:,1]
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba) 
roc_auc = auc(fpr, tpr)

plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], 'k--', label='No Skill')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve for Breast Cancer Classification')
plt.legend()
plt.show()