In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv("human_features_100.csv")

# Encode Gender: Male = 0, Female = 1
df['Gender'] = df['Gender'].map({'Male': 0, 'Female': 1})

# Use only 'Weight_kg' as the feature
X = df[['Weight_kg']]
y = df['Gender']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict on test set
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Plot 1: True vs Predicted Gender
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.scatter(X_test, y_test, color='blue', label='True Gender', alpha=0.6)
plt.scatter(X_test, y_pred, color='red', marker='x', label='Predicted Gender')
plt.xlabel('Weight (kg)')
plt.ylabel('Gender (0=Male, 1=Female)')
plt.title('True vs Predicted Gender (by Weight)')
plt.legend()
plt.grid(True)

# Plot 2: Accuracy Bar Chart
plt.subplot(1, 2, 2)
plt.bar(['Model Accuracy'], [accuracy], color='green')
plt.ylim(0, 1)
plt.title('Classification Accuracy')
plt.ylabel('Accuracy')
plt.grid(True, axis='y')

plt.tight_layout()
plt.show()
