In [46]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
import pickle
import json
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

In [33]:
df = pd.read_csv('.\\dataset\\cleaned_output_data.csv')

# Display the first few rows to check the data
print(df.head())

   soil_temp  soil_phs                         Plant_Type
0       20.9       5.9                   Medicinal Plants
1       21.0       6.8  Warm-Season Fruits and Vegetables
2       11.0       6.6                 Cool-Season Grains
3       11.0       4.8             Berries and Evergreens
4       20.0       7.2  Warm-Season Fruits and Vegetables


In [34]:
df = df.dropna()
df

Unnamed: 0,soil_temp,soil_phs,Plant_Type
0,20.9,5.9,Medicinal Plants
1,21.0,6.8,Warm-Season Fruits and Vegetables
2,11.0,6.6,Cool-Season Grains
3,11.0,4.8,Berries and Evergreens
4,20.0,7.2,Warm-Season Fruits and Vegetables
...,...,...,...
47337,26.9,7.4,Herbs and Aromatic Plants
47338,24.6,5.9,Medicinal Plants
47339,32.0,8.8,Drought-Resistant Plants
47340,30.9,8.9,Drought-Resistant Plants


In [35]:
# Define features and target variable
X = df[['soil_temp', 'soil_phs']]  # Features
y = df['Plant_Type']  # Target

# Check for missing data
df.isnull().sum()


soil_temp     0
soil_phs      0
Plant_Type    0
dtype: int64

In [36]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)

In [37]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [38]:
# Initialize and train the Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)


In [39]:
# Predict on the training data
y_train_pred = clf.predict(X_train)
train_accuracy = accuracy_score(y_train, y_train_pred)
print("Training Accuracy Score:", train_accuracy)

Training Accuracy Score: 1.0


In [40]:
y_pred = clf.predict(X_test)

# Evaluate the performance
print("Accuracy Score:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))

Accuracy Score: 1.0

Classification Report:
                                    precision    recall  f1-score   support

           Berries and Evergreens       1.00      1.00      1.00       644
               Cool-Season Grains       1.00      1.00      1.00       522
         Drought-Resistant Plants       1.00      1.00      1.00      3696
        Herbs and Aromatic Plants       1.00      1.00      1.00      2950
 Leafy Greens and Root Vegetables       1.00      1.00      1.00      1255
      Legumes and Nitrogen-Fixers       1.00      1.00      1.00       473
                 Medicinal Plants       1.00      1.00      1.00      1060
                     Stone Fruits       1.00      1.00      1.00       145
      Tropical and Wetland Plants       1.00      1.00      1.00      2338
Warm-Season Fruits and Vegetables       1.00      1.00      1.00      1120

                         accuracy                           1.00     14203
                        macro avg       1.00      1.0

In [41]:
# Perform cross-validation
cv_scores = cross_val_score(clf, X, y_encoded, cv=5)  # 5-fold cross-validation

print("Cross-Validation Scores:", cv_scores)
print("Mean Cross-Validation Score:", cv_scores.mean())

Cross-Validation Scores: [1. 1. 1. 1. 1.]
Mean Cross-Validation Score: 1.0


In [42]:
# Get predictions from cross-validation
from sklearn.model_selection import cross_val_predict

y_pred_cv = cross_val_predict(clf, X, y_encoded, cv=5)

# Compute confusion matrix and additional metrics
conf_matrix = confusion_matrix(y_encoded, y_pred_cv)
precision = precision_score(y_encoded, y_pred_cv, average='weighted')
recall = recall_score(y_encoded, y_pred_cv, average='weighted')
f1 = f1_score(y_encoded, y_pred_cv, average='weighted')

print("Confusion Matrix:\n", conf_matrix)
print("Precision Score:", precision)
print("Recall Score:", recall)
print("F1 Score:", f1)

Confusion Matrix:
 [[ 2099     0     0     0     0     0     0     0     0     0]
 [    0  1694     0     0     0     0     0     0     0     0]
 [    0     0 12393     0     0     0     0     0     0     0]
 [    0     0     0  9774     0     0     0     0     0     0]
 [    0     0     0     0  4021     0     0     0     0     0]
 [    0     0     0     0     0  1553     0     0     0     0]
 [    0     0     0     0     0     0  3545     0     0     0]
 [    0     0     0     0     0     0     0   570     0     0]
 [    0     0     0     0     0     0     0     0  7812     0]
 [    0     0     0     0     0     0     0     0     0  3881]]
Precision Score: 1.0
Recall Score: 1.0
F1 Score: 1.0


In [43]:
def predict_plant_type(soil_temp, soil_ph):
    if soil_temp < 0 or soil_temp > 50:  # Example reasonable range
        return 'Invalid soil temperature value'
    if soil_ph < 0 or soil_ph > 14:
        return 'Invalid soil pH value'
    
    # Predict the plant type using the trained model
    prediction = clf.predict([[soil_temp, soil_ph]])
    return prediction[0]

# Test the function with sample inputs
soil_temp = 20
soil_ph = 6.5
print(f"Predicted plant type: {predict_plant_type(soil_temp, soil_ph)}")


Predicted plant type: Leafy Greens and Root Vegetables




In [44]:
with open('E:\\ML Project\\NASA Space App\\SourceCode\\SAVE_STELLA_3024\\PEDOSPHERE\\server\\serverapp\\artifacts_pedo\\pedosphere_model.pickle','wb') as f:
    pickle.dump(clf,f)

In [45]:
columns={
    'data_columns':[col.lower() for col in X.columns]
}
with open("E:\\ML Project\\NASA Space App\\SourceCode\\SAVE_STELLA_3024\\PEDOSPHERE\\server\\serverapp\\artifacts_pedo\\columns.json","w")as f:
    f.write(json.dumps(columns))