In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.utils import resample
from xgboost import XGBRegressor
import joblib
from scipy.spatial import distance

# Load the saved models, label encoder, and scaler
rf_model = joblib.load('CropGenius/models/rf_model.pkl')
xgb_regressor = joblib.load('CropGenius/models/xgb_regressor.pkl')
label_encoder = joblib.load('CropGenius/models/label_encoder.pkl')
scaler = joblib.load('CropGenius/models/scaler.pkl')

# Load the new dataset
new_dataset = pd.read_csv('CropGenius/data/soil_data.csv')

# Example input for crop prediction
temp_c = 25  # Example temperature value
humidity = 60  # Example humidity value
rainfall = 200  # Example rainfall value

new_data = {
    'N': [80],         # Example N value
    'P': [40],         # Example P value
    'K': [40],         # Example K value
    'temperature': [temp_c],
    'humidity': [humidity],
    'ph': [6.5],       # Example pH value
    'rainfall': [rainfall]
}
new_df = pd.DataFrame(new_data)

# Predict the crop using the random forest model
predicted_label = rf_model.predict(new_df)
predicted_crop = label_encoder.inverse_transform(predicted_label)
print(f"Predicted Crop: {predicted_crop[0]}")

# Example input for soil condition prediction
new_soil_data = {
    'temperature': [temp_c],
    'humidity': [humidity],
    'rainfall': [rainfall],
    'label': [label_encoder.transform([predicted_crop[0]])[0]]
}
new_soil_df = pd.DataFrame(new_soil_data)

# Scale the input data
new_soil_df_scaled = scaler.transform(new_soil_df)

# Predict N, P, K, and pH using the XGBRegressor model
predicted_conditions = xgb_regressor.predict(new_soil_df_scaled)
predicted_npkph = predicted_conditions[0]

print(f"Predicted Soil Conditions: N={predicted_npkph[0]}, P={predicted_npkph[1]}, K={predicted_npkph[2]}, pH={predicted_npkph[3]}")

# Find the closest match in the new soil dataset
new_dataset_features = new_dataset[['Sand %', 'Clay %', 'Silt %', 'pH', 'EC mS/cm', 'O.M. %', 'CACO3 %', 'N_NO3 ppm', 'P ppm', 'K ppm ', 'Mg ppm', 'Fe ppm', 'Zn ppm', 'Mn ppm', 'Cu ppm', 'B ppm ']]

def find_closest_match(predicted_npkph, dataset):
    dataset_npkph = dataset[['N_NO3 ppm', 'P ppm', 'K ppm ', 'pH']]
    distances = distance.cdist([predicted_npkph], dataset_npkph, 'euclidean')
    closest_index = np.argmin(distances)
    return dataset.iloc[closest_index]

closest_match = find_closest_match(predicted_npkph, new_dataset)

print("Closest matching soil data:")
print(closest_match)


Predicted Crop: coffee
Predicted Soil Conditions: N=101.8277587890625, P=27.847352981567383, K=29.345985412597656, pH=6.92962646484375
Closest matching soil data:
ID              
Sand %       NaN
Clay %       NaN
Silt %       NaN
pH           NaN
EC mS/cm     NaN
O.M. %       NaN
CACO3 %      NaN
N_NO3 ppm    NaN
P ppm        NaN
K ppm        NaN
Mg ppm       NaN
Fe ppm       NaN
Zn ppm       NaN
Mn ppm       NaN
Cu ppm       NaN
B ppm        NaN
Name: 781, dtype: object
