In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load dataset
column_names = ["Sex", "Length", "Diameter", "Height", "WholeWeight", "ShuckedWeight", "VisceraWeight", "ShellWeight", "Rings"]
data = pd.read_csv(r"abalone\abalone.data", names=column_names, header=None)

# Encode 'Sex' column
data['Sex'] = data['Sex'].map({'M': 0, 'F': 1, 'I': 2})

# Features and target (binary classification)
X = data.drop(columns=['Rings'])
y = data['Rings'] + 1.5  # Age = Rings + 1.5
y_class = (y >= 10).astype(int)  # Class 1 if age ≥ 10 (old), else 0 (young)

# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_class, test_size=0.2, random_state=42)

# Logistic Regression model
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)

# Predictions
y_pred = clf.predict(X_test)

# Evaluation
print("\nLogistic Regression Classification Results:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))



Logistic Regression Classification Results:
Accuracy: 0.8301435406698564
Confusion Matrix:
 [[201  78]
 [ 64 493]]
Classification Report:
               precision    recall  f1-score   support

           0       0.76      0.72      0.74       279
           1       0.86      0.89      0.87       557

    accuracy                           0.83       836
   macro avg       0.81      0.80      0.81       836
weighted avg       0.83      0.83      0.83       836

