In [1]:
# Import necessary libraries
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd

# Load and preprocess data
data = pd.read_csv('C:/Maize_Chainat_C/45Datapoint_Master/ML_analysis/7Oct_270Dataset/5VI_CD_CV_16ENV_270_01.csv')
X = data.drop('Seed', axis=1)  # Features
#X = data[['Cire_CD', 'NDRE_CD']]
y = data['Seed']  # Target

# Generate synthetic data for regression
X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a simple Graph Neural Network (GNN) regression model
class GNNRegression(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GNNRegression, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize GNN model
input_dim = X_train.shape[1]
hidden_dim = 64
output_dim = 1
gnn_model = GNNRegression(input_dim, hidden_dim, output_dim)
optimizer = optim.Adam(gnn_model.parameters(), lr=0.001)
criterion = nn.MSELoss()

# Train GNN model
batch_size = 32
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32))
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

for epoch in range(50):
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        pred_y = gnn_model(batch_X)
        loss = criterion(pred_y, batch_y.unsqueeze(1))
        loss.backward()
        optimizer.step()

# Evaluate GNN model on test data
with torch.no_grad():
    gnn_predictions = gnn_model(torch.tensor(X_test, dtype=torch.float32)).numpy()

# Initialize ensemble models (Random Forest and Gradient Boosting)
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
gb_model = GradientBoostingRegressor(n_estimators=100, random_state=42)

# Train ensemble models
rf_model.fit(X_train, y_train)
gb_model.fit(X_train, y_train)

# Evaluate ensemble models on test data
rf_predictions = rf_model.predict(X_test)
gb_predictions = gb_model.predict(X_test)

# Calculate Mean Squared Error (MSE) for each model
gnn_mse = mean_squared_error(y_test, gnn_predictions)
rf_mse = mean_squared_error(y_test, rf_predictions)
gb_mse = mean_squared_error(y_test, gb_predictions)

# Print MSE results
print(f"GNN MSE: {gnn_mse:.4f}")
print(f"Random Forest MSE: {rf_mse:.4f}")
print(f"Gradient Boosting MSE: {gb_mse:.4f}")


GNN MSE: 381.3702
Random Forest MSE: 2591.6402
Gradient Boosting MSE: 1239.3669
