In [2]:
# Soil Nutrient Dataset Creation
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Set random seed for reproducibility
np.random.seed(42)

# Generate synthetic soil nutrient data
num_samples = 1000

# Generate features
data = {
    'Nitrogen': np.random.uniform(0, 200, num_samples),
    'Phosphorus': np.random.uniform(0, 100, num_samples),
    'Potassium': np.random.uniform(0, 300, num_samples),
    'pH': np.random.uniform(3, 10, num_samples),
    'Organic_Matter': np.random.uniform(0, 10, num_samples),
    'Moisture': np.random.uniform(0, 100, num_samples)
}

# Create DataFrame
df = pd.DataFrame(data)

# Define rules for nutrient status
def determine_nutrient_status(row):
    n_score = (row['Nitrogen'] / 200) * 0.3
    p_score = (row['Phosphorus'] / 100) * 0.25
    k_score = (row['Potassium'] / 300) * 0.25
    om_score = (row['Organic_Matter'] / 10) * 0.2
    
    total_score = n_score + p_score + k_score + om_score
    
    if total_score < 0.5:
        return 'Low'
    elif total_score < 0.8:
        return 'Medium'
    else:
        return 'High'

# Apply the function
df['Nutrient_Status'] = df.apply(determine_nutrient_status, axis=1)

# Add some noise to make it more realistic
noise_indices = np.random.choice(df.index, size=int(num_samples*0.1), replace=False)
for idx in noise_indices:
    current_status = df.loc[idx, 'Nutrient_Status']
    if current_status == 'Low':
        df.loc[idx, 'Nutrient_Status'] = np.random.choice(['Low', 'Medium'], p=[0.7, 0.3])
    elif current_status == 'Medium':
        df.loc[idx, 'Nutrient_Status'] = np.random.choice(['Low', 'Medium', 'High'], p=[0.2, 0.6, 0.2])
    else:
        df.loc[idx, 'Nutrient_Status'] = np.random.choice(['Medium', 'High'], p=[0.3, 0.7])

# Save to CSV
df.to_csv('soil_nutrient_data.csv', index=False)

# Prepare and save the model
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# Encode the target variable
encoder = LabelEncoder()
df['Nutrient_Status_Encoded'] = encoder.fit_transform(df['Nutrient_Status'])

# Split data
X = df[['Nitrogen', 'Phosphorus', 'Potassium', 'pH', 'Organic_Matter', 'Moisture']]
y = df['Nutrient_Status_Encoded']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save model and encoder
import pickle

model_data = {
    'model': model,
    'encoder': encoder
}

with open('soil_nutrient_model.pkl', 'wb') as f:
    pickle.dump(model_data, f)

print("Dataset and model created successfully!")

Dataset and model created successfully!
