<a href="https://colab.research.google.com/github/marco10507/ml-portfolio/blob/main/logistic_regression_1_class.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, ConfusionMatrixDisplay
import plotly.express as px
import matplotlib
import matplotlib.pyplot as plt


# Generate synthetic data
X, y = make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=42)

x_scaled = StandardScaler().fit_transform(X)

pca = PCA(n_components=3)
x_pca = pca.fit_transform(x_scaled)

df = pd.DataFrame(data=x_pca, columns=["PC1", "PC2", "PC3"])
df["Target"] = y

fig_2d = px.scatter(df, x="PC1", y="PC2", color="Target")

fig_2d.show()

fig_3d = px.scatter_3d(df, x="PC1", y="PC2", z="PC3", color="Target")

fig_3d.show()


In [None]:
# logistic regression
X_train, X_test, y_train, y_test = train_test_split(x_scaled, y, test_size=0.2, random_state=42)

num_folds = 5
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)
degrees = 4

avg_training_accuracies = []
avg_validation_accuracies = []

for degree in range(1, degrees + 1):
  print(f"\nDegree:{degree}\n")

  training_accuracies = []
  validation_accuracies = []

  poly_features = PolynomialFeatures(degree=degree)
  x_poly = poly_features.fit_transform(X_train)

  for fold, (train_index, val_index) in enumerate(kf.split(x_poly, y_train), start=1):
    x_cv_train, x_cv_val = x_poly[train_index], x_poly[val_index]
    y_cv_train, y_cv_val = y_train[train_index], y_train[val_index]

    model = LogisticRegression(max_iter=1000)

    model.fit(x_cv_train, y_cv_train)

    y_train_pred = model.predict(x_cv_train)
    y_val_pred = model.predict(x_cv_val)

    training_accuracy = accuracy_score(y_cv_train, y_train_pred)
    validation_accuracy = accuracy_score(y_cv_val, y_val_pred)

    training_accuracies.append(training_accuracy)
    validation_accuracies.append(validation_accuracy)

    # print(f"Fold {fold}: Training Accuracy = {training_accuracy:.4f}, Validation Accuracy = {validation_accuracy:.4f}")

  avg_training_accuracy = sum(training_accuracies) / num_folds
  avg_validation_accuracy = sum(validation_accuracies) / num_folds

  # print(f"\nAverage Training Accuracy: {avg_training_accuracy:.4f}")
  # print(f"Average Validation Accuracy: {avg_validation_accuracy:.4f}\n")

  avg_training_accuracies.append(avg_training_accuracy)
  avg_validation_accuracies.append(avg_validation_accuracy)


Degree:1


Degree:2


Degree:3


Degree:4

