In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import joblib

In [4]:
df = pd.read_csv("Crop_Recommendation.csv")
print("Columns:", df.columns)

Columns: Index(['Nitrogen', 'Phosphorus', 'Potassium', 'Temperature', 'Humidity',
       'pH_Value', 'Rainfall', 'Crop'],
      dtype='object')


In [6]:
# Target rows
target_rows = 30000
current_rows = len(df)
rows_needed = target_rows - current_rows
df_new = df.sample(rows_needed, replace=True)
num_cols = ['Nitrogen', 'Phosphorus', 'Potassium', 'Temperature', 'Humidity', 'pH_Value', 'Rainfall']
for col in num_cols:
    df_new[col] = df_new[col] * (1 + np.random.uniform(-0.02, 0.02, size=len(df_new)))
df_expanded = pd.concat([df, df_new], ignore_index=True).drop_duplicates()
while len(df_expanded) < target_rows:
    extra_needed = target_rows - len(df_expanded)
    extra = df.sample(extra_needed, replace=True)
    for col in num_cols:
        extra[col] = extra[col] * (1 + np.random.uniform(-0.02, 0.02, size=len(extra)))
    df_expanded = pd.concat([df_expanded, extra], ignore_index=True).drop_duplicates()

print("Expanded Dataset Shape:", df_expanded.shape)

Expanded Dataset Shape: (30000, 8)


In [8]:
# Step 4a: Features aur Target split
X = df_expanded.drop(columns=['Crop'])
y = df_expanded['Crop']

# Step 4b: Train-Test Split (80%-20%)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Step 4c: Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 4d: Logistic Regression
model = LogisticRegression(solver='saga', C=0.1, max_iter=5000) 
model.fit(X_train_scaled, y_train)

In [10]:
y_pred = model.predict(X_test_scaled)
acc = accuracy_score(y_test, y_pred)
print("Logistic Regression Accuracy (realistic):", acc)


Logistic Regression Accuracy (realistic): 0.9756666666666667


In [12]:
df_sample = df_expanded.sample(20000, random_state=42)


In [14]:
model = LogisticRegression(solver='saga', C=0.05, max_iter=5000)

In [16]:
for col in num_cols:
    df_expanded[col] = df_expanded[col] * (1 + np.random.uniform(-0.05, 0.05, size=len(df_expanded)))

In [24]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(solver='saga', C=0.05, max_iter=5000)
model.fit(X_train_scaled, y_train)

In [26]:
y_train_pred = model.predict(X_train_scaled)
train_acc = accuracy_score(y_train, y_train_pred)
print("Training Accuracy:", train_acc)

y_test_pred = model.predict(X_test_scaled)
test_acc = accuracy_score(y_test, y_test_pred)
print("Test Accuracy:", test_acc)

Training Accuracy: 0.9716666666666667
Test Accuracy: 0.971


In [28]:
from sklearn.tree import DecisionTreeClassifier

# Model define
dt_model = DecisionTreeClassifier(max_depth=10, random_state=42)

# Train
dt_model.fit(X_train_scaled, y_train)

# Predict
y_pred_dt = dt_model.predict(X_test_scaled)

# Accuracy
acc_dt = accuracy_score(y_test, y_pred_dt)
print("Decision Tree Accuracy:", acc_dt)

Decision Tree Accuracy: 0.9873333333333333


In [30]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=100, max_depth=12, random_state=42)
rf_model.fit(X_train_scaled, y_train)

y_pred_rf = rf_model.predict(X_test_scaled)
acc_rf = accuracy_score(y_test, y_pred_rf)
print("Random Forest Accuracy:", acc_rf)

Random Forest Accuracy: 0.9946666666666667


In [37]:
import joblib

# Model ko save karo
joblib.dump(model, 'logistic_model_realistic_91.pkl')
print("Model saved as 'logistic_model_realistic_91.pkl'")

Model saved as 'logistic_model_realistic_91.pkl'


In [42]:
# Example values for a single crop prediction
# Order: Nitrogen, Phosphorus, Potassium, Temperature, Humidity, pH_Value, Rainfall
input_values = [90, 42, 43, 20.5, 80, 6.5, 200] 


In [44]:
# Scale input
input_scaled = scaler.transform([input_values])

# Predict crop
predicted_crop = model.predict(input_scaled)[0]

print("Predicted Crop:", predicted_crop)

Predicted Crop: Rice




In [46]:
import pandas as pd

# Dataset load
df = pd.read_csv("Crop_Recommendation.csv")

unique_crops = df['Crop'].unique()  
num_crops = len(unique_crops)

print("Number of unique crops:", num_crops)
print("Crops are:", unique_crops)


Number of unique crops: 22
Crops are: ['Rice' 'Maize' 'ChickPea' 'KidneyBeans' 'PigeonPeas' 'MothBeans'
 'MungBean' 'Blackgram' 'Lentil' 'Pomegranate' 'Banana' 'Mango' 'Grapes'
 'Watermelon' 'Muskmelon' 'Apple' 'Orange' 'Papaya' 'Coconut' 'Cotton'
 'Jute' 'Coffee']
