In [3]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import ConfusionMatrixDisplay

In [4]:
# Load the Iris dataset (you can replace it with any other dataset)
iris = datasets.load_iris()
data = pd.DataFrame(data= np.c_[iris['data'], iris['target']], columns= iris['feature_names'] + ['target'])


In [5]:
# For simplicity, we will use only two features (you can change this as needed)
X = data[['sepal length (cm)', 'sepal width (cm)']]
y = (data['target'] == 2).astype(int)  # Convert to binary classification


In [6]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
# Preprocessing: Standardize the data (optional but can be helpful for logistic regression)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [8]:
# Create and train the Logistic Regression model
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

In [9]:
# Make predictions on the test set
y_pred = model.predict(X_test_scaled)

In [10]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print('\nClassification Report:')
print(classification_report(y_test, y_pred))

Accuracy: 0.9

Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.89      0.92        19
           1       0.83      0.91      0.87        11

    accuracy                           0.90        30
   macro avg       0.89      0.90      0.89        30
weighted avg       0.90      0.90      0.90        30



In [11]:
# Cross-validation
cross_val_scores = cross_val_score(model, X_train_scaled, y_train, cv=5, scoring='accuracy')
print(f'Cross-validation Scores: {cross_val_scores}')
print(f'Mean Cross-validation Score: {np.mean(cross_val_scores)}')


Cross-validation Scores: [0.79166667 0.75       0.83333333 0.79166667 0.70833333]
Mean Cross-validation Score: 0.775
