In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix,mean_squared_error, r2_score


In [2]:
soil_data = "random_soil_data.csv"  # Path to your dataset
df = pd.read_csv(soil_data)

In [5]:
# Define target variable (1 = Suitable, 0 = Not Suitable) based on Bearing Capacity threshold
threshold = 250
df["Suitability"] = (df["Bearing Capacity (kN/m²)"] > threshold).astype(int)

# Encode categorical feature (Soil Type)
label_encoder = LabelEncoder()
df["Soil Type"] = label_encoder.fit_transform(df["Soil Type"])

In [7]:
# Define features (X) and target (y)
X = df.drop(columns=["Bearing Capacity (kN/m²)", "Suitability"])
y = df["Suitability"]

# Normalize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [9]:
# Split data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)



In [11]:

# Train a Logistic Regression model
model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)


In [13]:
# Predictions
y_pred = model.predict(X_test)


In [15]:
# Model Evaluation
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Model Accuracy: 49.32%

Classification Report:
               precision    recall  f1-score   support

           0       0.49      0.57      0.53      8932
           1       0.50      0.42      0.46      9068

    accuracy                           0.49     18000
   macro avg       0.49      0.49      0.49     18000
weighted avg       0.49      0.49      0.49     18000


Confusion Matrix:
 [[5057 3875]
 [5248 3820]]


In [30]:
import pickle
import streamlit as st
import numpy as np



In [32]:
# Load the model and other tools
model = pickle.load(open('soil_model.pkl', 'rb'))
scaler = pickle.load(open('soil_scaler.pkl', 'rb'))
label_encoder = pickle.load(open('soil_encoder.pkl', 'rb'))


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [34]:
# Streamlit UI
st.title("Soil Suitability Prediction")

DeltaGenerator()

In [36]:
ph = st.number_input("Enter soil pH:")
moisture = st.number_input("Enter moisture (%):")
soil_type = st.selectbox("Select Soil Type:", label_encoder.classes_)

In [13]:
if st.button("Predict"):
    encoded_type = label_encoder.transform([soil_type])[0]
    input_data = np.array([[ph, moisture, encoded_type]])
    input_scaled = scaler.transform(input_data)
    pred = model.predict(input_scaled)[0]
    result = "Suitable" if pred == 1 else "Not Suitable"
    st.success(f"Prediction: {result}")
