In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_absolute_error, accuracy_score
import joblib

# Load dataset
df = pd.read_csv("Fish.csv")

# Encode categorical target for classification
label_encoder = LabelEncoder()
df["Species"] = label_encoder.fit_transform(df["Species"])

# Define features and targets
X = df.drop(columns=["Weight"])  # Features (excluding Weight for regression)
y_reg = df["Weight"]  # Target for Regression
y_clf = df["Species"]  # Target for Classification

# Split data
X_train, X_test, y_train_reg, y_test_reg = train_test_split(X, y_reg, test_size=0.2, random_state=42)
X_train_clf, X_test_clf, y_train_clf, y_test_clf = train_test_split(X, y_clf, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
X_train_clf_scaled = scaler.fit_transform(X_train_clf)
X_test_clf_scaled = scaler.transform(X_test_clf)

# Train Regression Model (Random Forest Regressor)
reg_model = RandomForestRegressor(n_estimators=100, random_state=42)
reg_model.fit(X_train_scaled, y_train_reg)
y_pred_reg = reg_model.predict(X_test_scaled)
reg_mae = mean_absolute_error(y_test_reg, y_pred_reg)

# Train Classification Model (Random Forest Classifier)
clf_model = RandomForestClassifier(n_estimators=100, random_state=42)
clf_model.fit(X_train_clf_scaled, y_train_clf)
y_pred_clf = clf_model.predict(X_test_clf_scaled)
clf_acc = accuracy_score(y_test_clf, y_pred_clf)

# Save models & encoders
joblib.dump(reg_model, "fish_regressor.pkl")
joblib.dump(clf_model, "fish_classifier.pkl")
joblib.dump(scaler, "fish_scaler.pkl")
joblib.dump(label_encoder, "fish_label_encoder.pkl")

print(f"Regression Model MAE: {reg_mae}")
print(f"Classification Model Accuracy: {clf_acc}")

Regression Model MAE: 44.49998958333333
Classification Model Accuracy: 0.9375
