# Logistic Regression on Synthetic Data

This notebook demonstrates a simple data analysis pipeline using Python.

We generate a synthetic binary classification dataset, explore it with pandas, train a logistic regression model using scikit-learn, evaluate the model, and plot the confusion matrix using matplotlib.



In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt

# Generate synthetic classification data
X, y = make_classification(n_samples=200, n_features=5, n_informative=3, n_redundant=0, n_clusters_per_class=1, random_state=42)

# Create a DataFrame for EDA
columns = ['Feature1', 'Feature2', 'Feature3', 'Feature4', 'Feature5']
data_df = pd.DataFrame(X, columns=columns)
data_df['Target'] = y

# Display the first few rows
data_df.head()


In [None]:
# Basic summary statistics
data_summary = data_df.describe()
data_summary

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(data_df[columns], data_df['Target'], test_size=0.3, random_state=42)

# Initialize and train the logistic regression model
log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)

# Make predictions on the test set
y_pred = log_reg.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
accuracy

# Generate a classification report
class_report = classification_report(y_test, y_pred)
print(class_report)

# Generate confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# Plot confusion matrix
plt.figure()
plt.title('Confusion Matrix')
plt.imshow(conf_matrix, interpolation='nearest')
plt.colorbar()
plt.xticks(range(2), ['Class 0', 'Class 1'])
plt.yticks(range(2), ['Class 0', 'Class 1'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
for i in range(2):
    for j in range(2):
        plt.text(j, i, conf_matrix[i, j], horizontalalignment='center', verticalalignment='center')
plt.tight_layout()
plt.show()
