In [6]:
import pandas as pd
import numpy as np
import joblib

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_absolute_error, accuracy_score

print("\n=== LOADING DATASET ===")
df = pd.read_csv("crop.csv")   # <-- CHANGE FILE NAME IF NEEDED
print(df.head())





=== LOADING DATASET ===
   District  Latitude  Longitude    Region   N   P   K   pH  Rainfall  \
0  Achalpur  21.27405   77.56868  Vidarbha  47  28  65  7.1      1013   
1  Achalpur  21.34170   77.53776  Vidarbha  65  28  50  7.2      1001   
2  Achalpur  21.34532   77.53547  Vidarbha  46  23  57  7.5      1137   
3  Achalpur  21.31377   77.59593  Vidarbha  57  25  59  7.7       882   
4  Achalpur  21.28582   77.56470  Vidarbha  55  28  52  7.3       927   

        Crop  
0  Sugarcane  
1  Sugarcane  
2     Cotton  
3    Soybean  
4  Sugarcane  


In [7]:

# -----------------------------------------
# LABEL ENCODING
# -----------------------------------------
print("\n=== ENCODING CATEGORIES ===")

le_district = LabelEncoder()
le_region = LabelEncoder()
le_crop = LabelEncoder()

df['District_enc'] = le_district.fit_transform(df['District'])
df['Region_enc'] = le_region.fit_transform(df['Region'])
df['Crop_enc'] = le_crop.fit_transform(df['Crop'])



=== ENCODING CATEGORIES ===


In [8]:

# -----------------------------------------
# INPUT FEATURES (LOCATION ONLY)
# -----------------------------------------
X = df[['District_enc','Latitude','Longitude','Region_enc']]

# -----------------------------------------
# SOIL OUTPUTS (REGRESSION TARGETS)
# -----------------------------------------
y_N = df['N']
y_P = df['P']
y_K = df['K']
y_pH = df['pH']

In [9]:

# -----------------------------------------
# SPLIT DATA
# -----------------------------------------
X_train, X_test, yN_train, yN_test = train_test_split(X, y_N, test_size=0.2, random_state=42)
_, _, yP_train, yP_test = train_test_split(X, y_P, test_size=0.2, random_state=42)
_, _, yK_train, yK_test = train_test_split(X, y_K, test_size=0.2, random_state=42)
_, _, ypH_train, ypH_test = train_test_split(X, y_pH, test_size=0.2, random_state=42)

In [10]:


# -----------------------------------------
# TRAIN MODELS (SOIL ESTIMATION)
# -----------------------------------------
print("\n=== TRAINING SOIL MODELS ===")

model_N = RandomForestRegressor(n_estimators=300)
model_P = RandomForestRegressor(n_estimators=300)
model_K = RandomForestRegressor(n_estimators=300)
model_pH = RandomForestRegressor(n_estimators=300)

model_N.fit(X_train, yN_train)
model_P.fit(X_train, yP_train)
model_K.fit(X_train, yK_train)
model_pH.fit(X_train, ypH_train)


=== TRAINING SOIL MODELS ===


0,1,2
,n_estimators,300
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [11]:
# -----------------------------------------
# EVALUATION
# -----------------------------------------
print("\n=== MODEL ERROR (MAE) ===")
print("Nitrogen MAE:", mean_absolute_error(yN_test, model_N.predict(X_test)))
print("Phosphorus MAE:", mean_absolute_error(yP_test, model_P.predict(X_test)))
print("Potassium MAE:", mean_absolute_error(yK_test, model_K.predict(X_test)))
print("pH MAE:", mean_absolute_error(ypH_test, model_pH.predict(X_test)))

# -----------------------------------------
# TRAIN LOCATION-BASED CROP MODEL
# -----------------------------------------
y_crop = df['Crop_enc']

X_train, X_test, y_train, y_test = train_test_split(X, y_crop, test_size=0.2, random_state=42)

crop_model = RandomForestClassifier(n_estimators=300)
crop_model.fit(X_train, y_train)

accuracy = accuracy_score(y_test, crop_model.predict(X_test))
print("\nCrop Prediction Accuracy:", accuracy)



=== MODEL ERROR (MAE) ===
Nitrogen MAE: 8.569386178861789
Phosphorus MAE: 4.344121951219512
Potassium MAE: 7.191873983739836
pH MAE: 0.2614491869918698

Crop Prediction Accuracy: 0.5109756097560976


In [12]:


# -----------------------------------------
# FERTILITY FUNCTION
# -----------------------------------------
def fertility_index(N,P,K,pH):
    score = 0
    if N >= 50: score+=1
    if P >= 40: score+=1
    if K >= 60: score+=1
    if 6.5 <= pH <= 7.5: score+=1

    if score >= 3:
        return "High Fertility"
    elif score == 2:
        return "Medium Fertility"
    else:
        return "Low Fertility"

In [13]:


# -----------------------------------------
# SAVE MODELS
# -----------------------------------------
joblib.dump(model_N, "soil_N.pkl")
joblib.dump(model_P, "soil_P.pkl")
joblib.dump(model_K, "soil_K.pkl")
joblib.dump(model_pH, "soil_pH.pkl")
joblib.dump(crop_model, "crop_recommender.pkl")
joblib.dump(le_district, "district_encoder.pkl")
joblib.dump(le_region, "region_encoder.pkl")
joblib.dump(le_crop, "crop_encoder.pkl")

print("\n=== MODELS SAVED ===")


=== MODELS SAVED ===


In [14]:

# -----------------------------------------
# USER INPUT + PREDICTION
# -----------------------------------------
print("\n=== FARMER INPUT ===")

district = input("District name: ").strip()
latitude = float(input("Latitude: "))
longitude = float(input("Longitude: "))
region = input("Region: ").strip()

district_enc = le_district.transform([district])[0]
region_enc = le_region.transform([region])[0]

user = pd.DataFrame([[district_enc, latitude, longitude, region_enc]],
                    columns=['District_enc','Latitude','Longitude','Region_enc'])

N = model_N.predict(user)[0]
P = model_P.predict(user)[0]
K = model_K.predict(user)[0]
pH = model_pH.predict(user)[0]

fertility = fertility_index(N,P,K,pH)

print("\n--- ESTIMATED SOIL REPORT ---")
print(f"Nitrogen: {N:.2f}")
print(f"Phosphorus: {P:.2f}")
print(f"Potassium: {K:.2f}")
print(f"pH: {pH:.2f}")
print("Soil Fertility:", fertility)



=== FARMER INPUT ===

--- ESTIMATED SOIL REPORT ---
Nitrogen: 97.72
Phosphorus: 18.23
Potassium: 37.96
pH: 6.12
Soil Fertility: Low Fertility


In [15]:

# -----------------------------------------
# TOP CROP PREDICTION
# -----------------------------------------
probs = crop_model.predict_proba(user)[0]
top = np.argsort(probs)[::-1][:3]

print("\n--- TOP CROP RECOMMENDATIONS ---")
for i,idx in enumerate(top):
    crop = le_crop.inverse_transform([idx])[0]
    print(f"{i+1}. {crop} ({probs[idx]*100:.2f}%)")

print("\n✅ SYSTEM READY.")


--- TOP CROP RECOMMENDATIONS ---
1. Rice (100.00%)
2. Sugarcane (0.00%)
3. Soybean (0.00%)

✅ SYSTEM READY.
