In [3]:
%pip install pandas scikit-learn tensorflow

Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [4]:
sustainability_data = pd.read_csv('farmer_advisor_dataset.csv')
market_price_data = pd.read_csv('market_researcher_dataset.csv')

In [1]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import joblib

# Example feature lists; adjust to your dataset
numeric_features1 = ["Demand_Index", "Supply_Index", "Competitor_Price_per_ton", "Economic_Indicator","Weather_Impact_Score","Consumer_Trend_Index"]
categorical_features1 = ['Product', 'Seasonal_Factor']

# Create pipelines for numeric and categorical features
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

# Combine transformers using ColumnTransformer
preprocessor_market = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features1),
        ('cat', categorical_transformer, categorical_features1)
    ]
)

# Fit the preprocessor using the training data (X_train should only contain the features)
# preprocessor.fit(X_train)


In [5]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import joblib

# Example feature lists; adjust to your dataset
numeric_features2 = ["Soil_pH", "Soil_Moisture", "Temperature_C", "Rainfall_mm","Fertilizer_Usage_kg","Pesticide_Usage_kg","Crop_Yield_ton"]
categorical_features2 = ["Crop_Type",]

# Create pipelines for numeric and categorical features
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

# Combine transformers using ColumnTransformer
preprocessor_sus = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features2),
        ('cat', categorical_transformer, categorical_features2)
    ]
)

# Fit the preprocessor using the training data (X_train should only contain the features)
# preprocessor.fit(X_train)

In [17]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pickle

# Define the preprocessing function for the market data
def create_market_preprocessor(market_data_path):
    # Load the market data
    market_df = market_price_data
    
    # Drop Market_ID column
    if 'Market_ID' in market_df.columns:
        market_df = market_df.drop('Market_ID', axis=1)
        market_df = market_df.drop('Market_Price_per_ton', axis=1)
    
    # Identify categorical columns that need label encoding
    market_categorical_cols = ['Product', 'Seasonal_Factor']
    market_numeric_cols = [col for col in market_df.columns if col not in market_categorical_cols]
    
    # Create label encoders for categorical columns
    market_label_encoders = {}
    for col in market_categorical_cols:
        le = LabelEncoder()
        le.fit(market_df[col].astype(str))
        market_label_encoders[col] = le
    
    # Create standard scaler for numeric columns
    market_scaler = StandardScaler()
    market_scaler.fit(market_df[market_numeric_cols])
    
    # Combine into a dictionary to save
    scaler_market = {
        'label_encoders': market_label_encoders,
        'standard_scaler': market_scaler,
        'categorical_cols': market_categorical_cols,
        'numeric_cols': market_numeric_cols
    }
    
    # Save the preprocessor
    with open('scaler_market.pkl', 'wb') as f:
        pickle.dump(scaler_market, f)
    
    return scaler_market

# Define the preprocessing function for the farm/sustainability data
def create_sustainability_preprocessor(farm_data_path):
    # Load the farm data
    farm_df = sustainability_data
    
    # Drop Farm_ID column
    if 'Farm_ID' in farm_df.columns:
        farm_df = farm_df.drop('Farm_ID', axis=1)
        farm_df = farm_df.drop('Sustainability_Score', axis=1)
    
    # Identify categorical columns that need label encoding
    farm_categorical_cols = ['Crop_Type']
    farm_numeric_cols = [col for col in farm_df.columns if col not in farm_categorical_cols]
    
    # Create label encoders for categorical columns
    farm_label_encoders = {}
    for col in farm_categorical_cols:
        le = LabelEncoder()
        le.fit(farm_df[col].astype(str))
        farm_label_encoders[col] = le
    
    # Create standard scaler for numeric columns
    farm_scaler = StandardScaler()
    farm_scaler.fit(farm_df[farm_numeric_cols])
    
    # Combine into a dictionary to save
    scaler_sus = {
        'label_encoders': farm_label_encoders,
        'standard_scaler': farm_scaler,
        'categorical_cols': farm_categorical_cols,
        'numeric_cols': farm_numeric_cols
    }
    
    # Save the preprocessor
    with open('scaler_sus.pkl', 'wb') as f:
        pickle.dump(scaler_sus, f)
    
    return scaler_sus

# Function to preprocess new data using the saved preprocessors
def preprocess_market_data(data, preprocessor):
    df = data.copy()
    
    # Apply label encoders for categorical columns
    for col, encoder in preprocessor['label_encoders'].items():
        if col in df.columns:
            df[col] = encoder.transform(df[col].astype(str))
    
    # Apply standard scaler for numeric columns
    if len(preprocessor['numeric_cols']) > 0:
        df[preprocessor['numeric_cols']] = preprocessor['standard_scaler'].transform(df[preprocessor['numeric_cols']])
    
    return df

def preprocess_sustainability_data(data, preprocessor):
    df = data.copy()
    
    # Apply label encoders for categorical columns
    for col, encoder in preprocessor['label_encoders'].items():
        if col in df.columns:
            df[col] = encoder.transform(df[col].astype(str))
    
    # Apply standard scaler for numeric columns
    if len(preprocessor['numeric_cols']) > 0:
        df[preprocessor['numeric_cols']] = preprocessor['standard_scaler'].transform(df[preprocessor['numeric_cols']])
    
    return df

In [None]:
# Create and save preprocessors first
scaler_market = create_market_preprocessor('market_researcher_dataset.csv')
scaler_sus = create_sustainability_preprocessor('farmer_advisor_dataset.csv')

# Load the preprocessors
with open('scaler_market.pkl', 'rb') as f:
    loaded_market_preprocessor = pickle.load(f)

with open('scaler_sus.pkl', 'rb') as f:
    loaded_sustainability_preprocessor = pickle.load(f)

# Use the preprocessing functions defined earlier
preprocessed_market_data = preprocess_market_data(market_price_data.copy(), loaded_market_preprocessor)
preprocessed_farm_data = preprocess_sustainability_data(sustainability_data.copy(), loaded_sustainability_preprocessor)

print("Data preprocessing completed successfully!")


In [16]:
# Split sustainability data into features and target
X_sustainability = sustainability_data.drop(columns=['Sustainability_Score'])
y_sustainability = sustainability_data['Sustainability_Score']

# Split market price data into features and target
X_market = market_price_data.drop(columns=['Market_Price_per_ton'])
y_market = market_price_data['Market_Price_per_ton']

# Apply preprocessors to sustainability data
X_sustainability_processed = preprocessor_sus.fit_transform(X_sustainability)
y_sustainability_processed = y_sustainability  # No need to preprocess target variable

# Apply preprocessors to market data
X_market_processed = preprocessor_market.fit_transform(X_market)
y_market_processed = y_market  # No need to preprocess target variable

# Create training and test sets for sustainability data
X_sus_train, X_sus_test, y_sus_train, y_sus_test = train_test_split(
    X_sustainability_processed, y_sustainability_processed, 
    test_size=0.2, random_state=42
)

# Create training and test sets for market price data
X_market_train, X_market_test, y_market_train, y_market_test = train_test_split(
    X_market_processed, y_market_processed, 
    test_size=0.2, random_state=42
)

# Convert sparse matrices to dense arrays for neural network if needed
from scipy.sparse import issparse

X_sus_train = X_sus_train.toarray() if issparse(X_sus_train) else X_sus_train
X_sus_test = X_sus_test.toarray() if issparse(X_sus_test) else X_sus_test
X_market_train = X_market_train.toarray() if issparse(X_market_train) else X_market_train
X_market_test = X_market_test.toarray() if issparse(X_market_test) else X_market_test

# Save preprocessors
import joblib
joblib.dump(preprocessor_sus, 'preprocessor_sus.joblib')
joblib.dump(preprocessor_market, 'preprocessor_market.joblib')

with open('preprocessor_market.pkl', 'wb') as f:
        pickle.dump(preprocessor_market, f)

with open('preprocessor_sus.pkl', 'wb') as f:
        pickle.dump(preprocessor_sus, f)

print("Data preprocessing completed successfully!")
print(f"Sustainability data shape: {X_sus_train.shape}")
print(f"Market data shape: {X_market_train.shape}")

Data preprocessing completed successfully!
Sustainability data shape: (8000, 11)
Market data shape: (8000, 13)


In [15]:


# Create training and test sets for sustainability data
X_sus_train, X_sus_test, y_sus_train, y_sus_test = train_test_split(
    X_sustainability, y_sustainability, test_size=0.2, random_state=42)




# Create training and test sets for market price data
X_market_train, X_market_test, y_market_train, y_market_test = train_test_split(
    X_market, y_market, test_size=0.2, random_state=42)


In [11]:
scaler_sus = StandardScaler()
X_sus_train = scaler_sus.fit_transform(X_sus_train)
X_sus_test = scaler_sus.transform(X_sus_test)

scaler_market = StandardScaler()
X_market_train = scaler_market.fit_transform(X_market_train)
X_market_test = scaler_market.transform(X_market_test)

# Define the ANN model for sustainability score prediction
model_sustainability = Sequential([
    Dense(64, activation='relu', input_shape=(X_sus_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])

model_sustainability.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the sustainability model with the callback
model_sustainability.fit(
    X_sus_train, y_sus_train,
    epochs=50, batch_size=32,
    validation_split=0.2
)

# Define the ANN model for market price prediction
model_market_price = Sequential([
    Dense(64, activation='relu', input_shape=(X_market_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])

model_market_price.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the market price model with the callback
model_market_price.fit(
    X_market_train, y_market_train,
    epochs=50, batch_size=32,
    validation_split=0.2
)

# Evaluate the models
sustainability_eval = model_sustainability.evaluate(X_sus_test, y_sus_test)
market_price_eval = model_market_price.evaluate(X_market_test, y_market_test)

print(f"Sustainability Model Evaluation - Loss: {sustainability_eval[0]}, MAE: {sustainability_eval[1]}")
print(f"Market Price Model Evaluation - Loss: {market_price_eval[0]}, MAE: {market_price_eval[1]}")

# Make predictions
sustainability_predictions = model_sustainability.predict(X_sus_test)
market_price_predictions = model_market_price.predict(X_market_test)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 2773.7141 - mae: 44.5625 - val_loss: 848.9009 - val_mae: 24.9639
Epoch 2/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 815.7899 - mae: 24.5127 - val_loss: 836.1210 - val_mae: 24.8085
Epoch 3/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 827.7446 - mae: 24.6647 - val_loss: 835.0690 - val_mae: 24.8054
Epoch 4/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 827.0107 - mae: 24.7263 - val_loss: 834.8760 - val_mae: 24.7721
Epoch 5/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 816.8625 - mae: 24.5864 - val_loss: 835.0338 - val_mae: 24.7954
Epoch 6/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 823.3554 - mae: 24.6857 - val_loss: 837.5551 - val_mae: 24.8190
Epoch 7/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - loss: 99230.5234 - mae: 292.7558 - val_loss: 62813.9648 - val_mae: 221.8765
Epoch 2/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - loss: 41253.9531 - mae: 170.3229 - val_loss: 14111.3564 - val_mae: 101.7235
Epoch 3/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 13898.7529 - mae: 102.6964 - val_loss: 13858.3965 - val_mae: 101.1555
Epoch 4/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 13729.5225 - mae: 101.6849 - val_loss: 13821.8447 - val_mae: 101.0751
Epoch 5/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 13563.4570 - mae: 100.7585 - val_loss: 13784.1816 - val_mae: 100.9642
Epoch 6/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 13448.5283 - mae: 100.4918 - val_loss: 13786.2061 - val_mae: 100.9894
Epoch 7/50
[1m200/200[0

In [12]:
from tensorflow.keras.models import load_model
model_sustainability.save('model_sustainability.keras')
model_market_price.save('model_market_price.keras')

In [13]:
import pickle
with open('model_sustainability.pkl', 'wb') as file:
    pickle.dump(model_sustainability, file)
                
# Save the market price model
with open('model_market_price.pkl', 'wb') as file:
    pickle.dump(model_market_price, file)

print("Models saved successfully in pickle format.")
           

Models saved successfully in pickle format.
