In [15]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.cluster import DBSCAN
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, silhouette_score

# Load the dataset
penguin_df = pd.read_csv("penguins.csv")  # Update the path to your file

# Handle missing values
imputer = SimpleImputer(strategy='mean')
penguin_df[['culmen_length_mm', 'culmen_depth_mm', 'flipper_length_mm', 'body_mass_g']] = imputer.fit_transform(
    penguin_df[['culmen_length_mm', 'culmen_depth_mm', 'flipper_length_mm', 'body_mass_g']]
)

# Encode the 'sex' column
label_encoder = LabelEncoder()
penguin_df['sex'] = label_encoder.fit_transform(penguin_df['sex'])

# Prepare the feature matrix
X = penguin_df[['culmen_length_mm', 'culmen_depth_mm', 'flipper_length_mm', 'body_mass_g', 'sex']]

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply DBSCAN clustering
dbscan = DBSCAN(eps=1, min_samples=5)  # Adjust eps and min_samples as needed
labels = dbscan.fit_predict(X_scaled)


# Add the cluster labels to the dataframe
penguin_df['cluster'] = labels

# Prepare for classification
# Use cluster labels as pseudo-labels
Y = penguin_df['cluster']

# Train-test split
X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y, test_size=0.2, random_state=42)

# Model implementation
model = MLPClassifier(hidden_layer_sizes=(10,), activation='relu', solver='adam', max_iter=5000, random_state=42)

# Training
model.fit(X_train, Y_train)

# Prediction
Y_pred = model.predict(X_test)

# Metrics
accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred, average='macro', zero_division=0)
recall = recall_score(Y_test, Y_pred, average='macro', zero_division=0)
f1 = f1_score(Y_test, Y_pred, average='macro', zero_division=0)

# Print results
print("Accuracy:", accuracy * 100)
print("Precision:", precision * 100)
print("Recall:", recall * 100)
print("F1 Score:", f1 * 100)


Accuracy: 100.0
Precision: 100.0
Recall: 100.0
F1 Score: 100.0
