<a href="https://colab.research.google.com/github/suhan-s255/1BM23CS344-SUHAN-S-6THSEM-ML-LAB/blob/main/1BM23CS344_Lab_3_Logistic_Regression_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

LogisticRegression_Binary

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, auc

df = pd.read_csv("HR_comma_sep.csv")

print(df.head())
print(df.info())
print(df.describe())
print(df['left'].value_counts())
print(df.groupby('left').mean(numeric_only=True))

salary_retention = pd.crosstab(df.salary, df.left)
salary_retention.plot(kind='bar')
plt.title("Employee Retention by Salary")
plt.xlabel("Salary Level")
plt.ylabel("Number of Employees")
plt.tight_layout()
plt.show()

dept_retention = pd.crosstab(df.Department, df.left)
dept_retention.plot(kind='bar', figsize=(10,6))
plt.title("Employee Retention by Department")
plt.xlabel("Department")
plt.ylabel("Number of Employees")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

df_model = df[['satisfaction_level',
               'average_montly_hours',
               'promotion_last_5years',
               'salary',
               'Department',
               'left']]

df_model = pd.get_dummies(df_model, columns=['salary', 'Department'], drop_first=True)

X = df_model.drop('left', axis=1)
y = df_model['left']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:,1]

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

fpr, tpr, thresholds = roc_curve(y_test, y_prob)
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, label="ROC curve (area = %0.2f)" % roc_auc)
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("Receiver Operating Characteristic")
plt.legend(loc="lower right")
plt.tight_layout()
plt.show()

LogisticRegression_Multiclass

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

df = pd.read_csv("zoo-data.csv")
class_df = pd.read_csv("zoo-class_type.csv")

df.columns = ["animal_name","hair","feathers","eggs","milk","airborne","aquatic",
              "predator","toothed","backbone","breathes","venomous","fins",
              "legs","tail","domestic","catsize","class_type"]

df = df.drop("animal_name", axis=1)

X = df.drop("class_type", axis=1)
y = df["class_type"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.3, random_state=42, stratify=y)

model = LogisticRegression(max_iter=2000, multi_class='multinomial')
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_test, y_pred))

cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=sorted(y.unique()),
            yticklabels=sorted(y.unique()))
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.tight_layout()
plt.show()