In [None]:
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
data = pd.read_csv('C:\\Users\\tanvi\\Desktop\\Deploy\\agricrop.csv')

# Convert all categorical columns to lowercase to avoid case mismatches
data['commodity_name'] = data['commodity_name'].str.lower()
data['state'] = data['state'].str.lower()
data['district'] = data['district'].str.lower()
data['market'] = data['market'].str.lower()

# Drop rows where 'modal_price' is NaN
data = data.dropna(subset=['modal_price'])

# Extract unique values for the required columns
commodities = data['commodity_name'].unique()
states = data['state'].unique()
districts = data['district'].unique()
markets = data['market'].unique()

# Define and fit label encoders dynamically
label_encoders = {
    'commodity_name': LabelEncoder().fit(commodities),
    'state': LabelEncoder().fit(states),
    'district': LabelEncoder().fit(districts),
    'market': LabelEncoder().fit(markets)
}

# Encode categorical variables
data['commodity_name'] = label_encoders['commodity_name'].transform(data['commodity_name'])
data['state'] = label_encoders['state'].transform(data['state'])
data['district'] = label_encoders['district'].transform(data['district'])
data['market'] = label_encoders['market'].transform(data['market'])

# Features and target variable
X = data[['commodity_name', 'state', 'district', 'market', 'min_price', 'max_price']]
y = data['modal_price']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the K-Nearest Neighbors Regressor model
knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train, y_train)

# Save the trained model
joblib.dump(knn_model, 'knn_model.pkl')

# Load the model (for later use)
loaded_knn_model = joblib.load('knn_model.pkl')

# Make predictions on the test data
y_pred_knn = loaded_knn_model.predict(X_test)

# Calculate the R² score for KNeighbors Regressor
r2_knn = r2_score(y_test, y_pred_knn)
print(f"KNeighbors Regressor R² score: {r2_knn:.4f}")

# Calculate the Mean Squared Error for KNeighbors Regressor
mse_knn = mean_squared_error(y_test, y_pred_knn)
print(f"KNeighbors Regressor MSE: {mse_knn:.4f}")

# Function to predict crop price based on user input using KNeighbors Regressor
def predict_crop_price_knn():
    # Take user inputs
    crop_name = input("Enter the crop name: ").strip().lower()
    state_name = input("Enter the state name: ").strip().lower()
    district_name = input("Enter the district name: ").strip().lower()
    market_name = input("Enter the market name: ").strip().lower()

    # Convert label encoder classes to lowercase for comparison
    crop_classes = [x.lower() for x in label_encoders['commodity_name'].classes_]
    state_classes = [x.lower() for x in label_encoders['state'].classes_]
    district_classes = [x.lower() for x in label_encoders['district'].classes_]
    market_classes = [x.lower() for x in label_encoders['market'].classes_]

    # Check if the crop name, state, district, and market are valid
    if crop_name not in crop_classes:
        print(f"Error: '{crop_name}' is not a recognized crop in the dataset.")
        return
    if state_name not in state_classes:
        print(f"Error: '{state_name}' is not a recognized state in the dataset.")
        return
    if district_name not in district_classes:
        print(f"Error: '{district_name}' is not a recognized district in the dataset.")
        return
    if market_name not in market_classes:
        print(f"Error: '{market_name}' is not a recognized market in the dataset.")
        return

    # Encode the inputs using the corresponding label encoders
    try:
        crop_encoded = label_encoders['commodity_name'].transform([crop_name])[0]
        state_encoded = label_encoders['state'].transform([state_name])[0]
        district_encoded = label_encoders['district'].transform([district_name])[0]
        market_encoded = label_encoders['market'].transform([market_name])[0]

        # Estimate minimum and maximum prices (placeholders, use real logic if available)
        min_price_estimated = 139  
        max_price_estimated = 158  

        # Create a DataFrame for the new input data
        new_data_df = pd.DataFrame({
            'commodity_name': [crop_encoded],
            'state': [state_encoded],
            'district': [district_encoded],
            'market': [market_encoded],
            'min_price': [min_price_estimated],
            'max_price': [max_price_estimated]
        })

        # Predict the modal price using the trained KNeighbors Regressor model
        predicted_price = loaded_knn_model.predict(new_data_df)
        print(f"The predicted modal price for {crop_name} in {market_name} market is: {predicted_price[0]}")

    except Exception as e:
        print(f"Error in making prediction: {e}")

# Call the function to make predictions
predict_crop_price_knn()