In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.tree import ExtraTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
import pickle
import time

# Load and explore the dataset
def load_data(filepath):
    crop = pd.read_csv(filepath)
    print(crop.shape)
    print(crop.info())
    print(crop.isnull().sum())
    print(crop.duplicated().sum())
    print(crop.describe())
    return crop

# Encode the crop labels to numerical values
def encode_labels(crop):
    crop_dict = {
        'rice': 1, 'maize': 2, 'jute': 3, 'cotton': 4, 'coconut': 5, 'papaya': 6,
        'orange': 7, 'apple': 8, 'muskmelon': 9, 'watermelon': 10, 'grapes': 11, 'mango': 12,
        'banana': 13, 'pomegranate': 14, 'lentil': 15, 'blackgram': 16, 'mungbean': 17,
        'mothbeans': 18, 'pigeonpeas': 19, 'kidneybeans': 20, 'chickpea': 21, 'coffee': 22
    }
    crop['crop_num'] = crop['label'].map(crop_dict)
    crop.drop(['label'], axis=1, inplace=True)
    return crop

# Split data into train and test sets
def split_data(crop):
    X = crop.drop(['crop_num'], axis=1)
    y = crop['crop_num']
    return train_test_split(X, y, test_size=0.2, random_state=42)

# Scale data
def scale_data(X_train, X_test):
    minmax_scaler = MinMaxScaler()
    standard_scaler = StandardScaler()

    X_train_minmax = minmax_scaler.fit_transform(X_train)
    X_test_minmax = minmax_scaler.transform(X_test)

    X_train_standard = standard_scaler.fit_transform(X_train)
    X_test_standard = standard_scaler.transform(X_test)

    return (X_train_minmax, X_test_minmax), (X_train_standard, X_test_standard), minmax_scaler, standard_scaler

# Train and evaluate models
def evaluate_models(models, X_train, y_train, X_test, y_test, scaler_name):
    for name, model in models.items():
        start_time = time.time()
        
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='macro')
        recall = recall_score(y_test, y_pred, average='macro')
        f1 = f1_score(y_test, y_pred, average='macro')
        conf_matrix = confusion_matrix(y_test, y_pred)
        
        end_time = time.time()
        execution_time = end_time - start_time
        
        print(f"{name} ({scaler_name}) - Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")
        print("Confusion Matrix:\n", conf_matrix)
        print(f"Execution Time: {execution_time:.2f} seconds")
        print("==========================================================")

# Save the model and scalers
def save_model_and_scalers(model, minmax_scaler, standard_scaler):
    try:
        pickle.dump(model, open('model.pkl', 'wb'))  # Save model
        pickle.dump(minmax_scaler, open('minmaxscaler.pkl', 'wb'))  # Save MinMax Scaler
        pickle.dump(standard_scaler, open('standard_scaler.pkl', 'wb'))  # Save Standard Scaler
        print("Model and scalers saved successfully.")
    except Exception as e:
        print(f"Error saving model or scalers: {e}")

# Recommendation function
def recommend_crop(N, P, K, temperature, humidity, ph, rainfall, model, minmax_scaler):
    features = np.array([[N, P, K, temperature, humidity, ph, rainfall]])
    transformed_features = minmax_scaler.transform(features)  # Use MinMaxScaler
    
    try:
        prediction = model.predict(transformed_features)
        return prediction[0]
    except Exception as e:
        print(f"Error predicting crop: {e}")
        return None  # Return None or any default value to indicate an error

# Main function
if __name__ == "__main__":
    crop = load_data('/mnt/data/crop_recommendation_dataset_5000.csv')
    crop = encode_labels(crop)
    X_train, X_test, y_train, y_test = split_data(crop)
    (X_train_minmax, X_test_minmax), (X_train_standard, X_test_standard), minmax_scaler, standard_scaler = scale_data(X_train, X_test)

    # Initialize models
    models = {
        'Logistic Regression': LogisticRegression(),
        'Naive Bayes': GaussianNB(),
        'Support Vector Machine': SVC(probability=True),
        'K-Nearest Neighbors': KNeighborsClassifier(),
        'Decision Tree': DecisionTreeClassifier(),
        'Random Forest': RandomForestClassifier(),
        'Bagging': BaggingClassifier(),
        'AdaBoost': AdaBoostClassifier(),
        'Gradient Boosting': GradientBoostingClassifier(),
        'Extra Trees': ExtraTreeClassifier(),
    }

    # Evaluate models using MinMaxScaler
    evaluate_models(models, X_train_minmax, y_train, X_test_minmax, y_test, "MinMaxScaler")

    # Train the final Decision Tree model with MinMaxScaler
    dtc = DecisionTreeClassifier()
    dtc.fit(X_train_minmax, y_train)
    y_pred_dtc = dtc.predict(X_test_minmax)
    print(f"Decision Tree Accuracy: {accuracy_score(y_test, y_pred_dtc):.4f}")

    # Recommendation based on new inputs
    N, P, K, temperature, humidity, ph, rainfall = 40, 50, 50, 40.0, 20, 100, 100
    predicted_crop = recommend_crop(N, P, K, temperature, humidity, ph, rainfall, dtc, minmax_scaler)

    # Crop recommendation mapping
    crop_dict = {1: "Rice", 2: "Maize", 3: "Jute", 4: "Cotton", 5: "Coconut", 6: "Papaya", 7: "Orange",
                 8: "Apple", 9: "Muskmelon", 10: "Watermelon", 11: "Grapes", 12: "Mango", 13: "Banana",
                 14: "Pomegranate", 15: "Lentil", 16: "Blackgram", 17: "Mungbean", 18: "Mothbeans",
                 19: "Pigeonpeas", 20: "Kidneybeans", 21: "Chickpea", 22: "Coffee"}

    if predicted_crop in crop_dict:
        crop_name = crop_dict[predicted_crop]
        print(f"{crop_name} is the best crop to be cultivated.")
    else:
        print("Unable to recommend a proper crop for this environment.")

    # Save the model and scalers
    save_model_and_scalers(dtc, minmax_scaler, standard_scaler)


ModuleNotFoundError: No module named 'numpy'