In [4]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Load dataset (replace with the correct path)
data = pd.read_csv("fish.csv")

# Check the columns to verify feature count
print("Columns in dataset:", data.columns)

# Features and target
X = data.drop('fish_species', axis=1)
y = data['fish_species']

# Print the number of features
print(f"Number of features in X: {X.shape[1]}")

# Splitting dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_scaled, y_train)

# Test the model
y_pred = clf.predict(X_test_scaled)

# Print evaluation report
print(classification_report(y_test, y_pred))

# Making prediction for new data
# Ensure the sample data has the correct number of features
sample_data = np.array([[9.27, 53.10, 33.10, 188, 12, 50.56, 552, 3.5]])  # Add missing feature
print(f"Shape of sample data: {sample_data.shape}")

# Transform using the trained scaler
sample_data_scaled = scaler.transform(sample_data)

# Predict fish species
predicted_fish = clf.predict(sample_data_scaled)

print(f"Recommended fish species: {predicted_fish[0]}")


Columns in dataset: Index(['Unnamed: 0', 'ph', 'humidity', 'tempC', 'mq135', 'mq7', 'tds',
       'turbidity', 'fish_species'],
      dtype='object')
Number of features in X: 8
              precision    recall  f1-score   support

Bighead Carp       0.10      0.13      0.11       195
     Catfish       0.11      0.12      0.12       196
 Common Carp       0.08      0.07      0.07       195
  Grass Carp       0.07      0.07      0.07       194
      Mrigal       0.08      0.07      0.08       198
   Pangasius       0.09      0.11      0.10       184
        Rohu       0.06      0.06      0.06       204
 Silver Carp       0.12      0.11      0.11       192
   Snakehead       0.11      0.12      0.12       215
     Tilapia       0.16      0.11      0.13       227

    accuracy                           0.10      2000
   macro avg       0.10      0.10      0.10      2000
weighted avg       0.10      0.10      0.10      2000

Shape of sample data: (1, 8)
Recommended fish species: Bighead C



In [10]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Load dataset (replace with the correct path)
data = pd.read_csv("fish.csv")

# Features and target
X = data.drop('fish_species', axis=1)  # Drop target column
y = data['fish_species']

# Check number of features
print(f"Number of features in dataset: {X.shape[1]}")

# Splitting dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_scaled, y_train)

# Making prediction for a new single data point
sample_data = np.array([[9.27, 53.10, 33.10, 188, 12, 50.56]])  # Ensure 8 features

# Scale the new data
sample_data_scaled = scaler.transform(sample_data)

# Predict fish species
predicted_fish = clf.predict(sample_data_scaled)

print(f"Recommended fish species: {predicted_fish[0]}")


Number of features in dataset: 6
Recommended fish species: Bighead Carp


