In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import numpy as np
from imblearn.over_sampling import RandomOverSampler

In [2]:
data = pd.read_excel('Finally crop pridiction.xlsx')
data.shape


label_encoder = LabelEncoder()
data['Soil Type'] = label_encoder.fit_transform(data['Soil Type'])
data['Wind'] = label_encoder.fit_transform(data['Wind'])
print(data['Soil Type'])

0      0
1      0
2      0
3      1
4      1
      ..
161    0
162    1
163    1
164    1
165    0
Name: Soil Type, Length: 166, dtype: int64


In [3]:
X = data[['Avg. Temp (°C)', 'Avg. Rainfall', 'Avg. Soil pH', 'Avg. Humidity', 'Avg. Sunlight (hours)', 'Wind', 'Soil Type']]
y = data['Crop'] 

# Data Augmentation: Noise Injection
def add_noise(X, noise_level=0.1):
    noisy_data = X.copy()
    noise = np.random.normal(0, noise_level, noisy_data.shape)
    noisy_data += noise
    return noisy_data

# Augment the dataset with noise
X_noisy = add_noise(X)
X_augmented = np.vstack((X, X_noisy))
y_augmented = np.hstack((y, y))

In [4]:
ros = RandomOverSampler(random_state=42)
X_ros, y_ros = ros.fit_resample(X_augmented, y_augmented)

X_train, X_test, y_train, y_test = train_test_split(X_ros, y_ros, test_size=0.4, random_state=42)


print("Training Random Forest Classifier...")
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)


y_pred_rf = rf_model.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print(f"Random Forest Classifier Accuracy: {accuracy_rf:.2f}")

Training Random Forest Classifier...
Random Forest Classifier Accuracy: 0.88


In [8]:
new_conditions = [[24.4,450,7.7,68.4,7,1,2]]  # Example conditions: Temp=25°C, Rainfall=60mm, Soil pH=6.5, etc.


predicted_probs = rf_model.predict_proba(new_conditions)


crop_names = rf_model.classes_
probabilities = predicted_probs[0]


top_3_indices = probabilities.argsort()[-3:][::-1]
top_3_crops = [(crop_names[i], probabilities[i]) for i in top_3_indices]


print("Top 3 crops suitable for the given conditions:")
# print(top_3_crops)
for crop, prob in top_3_crops:
    print(f"{crop}: {prob*100:.2f}%")


17.8,682,7.7,67.4,9,1,1 #cabbage
21.3,444,7.9,47.6,6,2,2 #pepper

Top 3 crops suitable for the given conditions:
Brussels Sprouts: 17.00%
Malabar Spinach: 15.00%
Buckwheat: 11.00%


(21.3, 444, 7.9, 47.6, 6, 2, 2)

In [6]:
import joblib
import h5py

# Save the model using joblib
model_filename = 'crop_prediction.pkl'
joblib.dump(rf_model, model_filename)

# Save the serialized model into an .h5 file
with h5py.File('crop_prediction.h5', 'w') as hf:
    with open(model_filename, 'rb') as f:
        hf.create_dataset('random_forest', data=np.frombuffer(f.read(), dtype='uint8'))

In [10]:
import h5py
import joblib
import numpy as np

# Load the serialized model from the .h5 file
with h5py.File('crop_prediction.h5', 'r') as hf:
    model_data = hf['random_forest'][:]

# Save the buffer back to a .pkl file
with open('crop_prediction.pkl', 'wb') as f:
    f.write(model_data.tobytes())

# Load the model using joblib
model = joblib.load('crop_prediction.pkl')

# Now you can use the loaded model for prediction
# Example: Predicting using some random data
# X_test = np.random.rand(1, 5)  # Replace this with actual test data
# prediction = model.predict([[17.8,682,7.7,67.4,9,1,1]])

predicted_probs = rf_model.predict_proba([[17.8,682,7.7,67.4,9,1,1]])


crop_names = rf_model.classes_
probabilities = predicted_probs[0]


top_3_indices = probabilities.argsort()[-3:][::-1]
top_3_crops = [(crop_names[i], probabilities[i]) for i in top_3_indices]


print("Top 3 crops suitable for the given conditions:")
# print(top_3_crops)
for crop, prob in top_3_crops:
    print(f"{crop}: {prob*100:.2f}%")


Top 3 crops suitable for the given conditions:
Cabbage: 30.00%
Lentils (Red, Green, Brown): 6.00%
Basil: 5.00%
