
# PROJECT: ** Automated Phenotype Prediction**


AUTHOR: Dr. Maya Ammathil Manoharan

FIELD: Botany / Computational Biology

DESCRIPTION: Using PyTorch to model plant growth from experimental Excel data.

GOAL: Train a neural network to understand how Sunlight and Water affect Plant Height.





In [1]:



import torch              # The core Deep Learning library
import torch.nn as nn       # Contains the building blocks for Neural Networks
import torch.optim as optim # Contains the "Coaches" (optimization algorithms)
import pandas as pd         # The tool for reading Excel/Table data
import numpy as np          # The tool for scientific math and arrays



In [2]:
# ==========================================
# 1. DATA PREPARATION (The Lab Notebook)
# ==========================================
# In a real project, you'd use: df = pd.read_excel('botany_data.xlsx')
data = {
    'Sunlight_Hours': [5.0, 9.0, 1.0, 7.0, 6.5],
    'Water_Liters': [3.0, 7.0, 5.0, 9.0, 8.0],
    'Final_Height_cm': [12.0, 25.0, 15.0, 28.0, 24.0]
}
df = pd.DataFrame(data)

# Step A: Extract numbers from the table.
# We use .astype(np.float32) because PyTorch prefers 32-bit decimals for speed.
X_raw = df[['Sunlight_Hours', 'Water_Liters']].values.astype(np.float32)
Y_raw = df['Final_Height_cm'].values.astype(np.float32)

# Step B: Convert to Tensors.
# Tensors are "Smart Arrays" that can live on a GPU and track their own math.
X_train = torch.from_numpy(X_raw)

# Step C: Reshape the target.
# '-1' tells PyTorch: "Auto-calculate rows." '1' means "Put data in 1 column."
# This is required so the "Answer Key" matches the "Model Output" shape.
Y_train = torch.from_numpy(Y_raw).reshape(-1, 1)



In [3]:
# ==========================================
# 2. DESIGNING THE "BRAIN" (The Model)
# ==========================================
class PhenotypePredictor(nn.Module):
    # This class is a blueprint for our specific prediction machine.

    def __init__(self, input_dim, output_dim):
        # The 'Setup' phase: define the parts of the machine.
        super(PhenotypePredictor, self).__init__() # Mandatory "Setup" for PyTorch internals

        # Define a Linear Layer: This creates the Weights (W) and Bias (B).
        # It calculates: Output = (Sun * W1) + (Water * W2) + Bias
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        # The 'Running' phase: defines how data flows through the parts.
        # Takes the inputs (x) and pushes them through the linear calculator.
        prediction = self.linear(x)
        return prediction

# Create the actual model instance: 2 Inputs (Sun, Water) -> 1 Output (Height)
model = PhenotypePredictor(input_dim=2, output_dim=1)



In [4]:
# ==========================================
# 3. SETTING UP THE LEARNING RULES
# ==========================================
# The "Teacher": Mean Squared Error measures how far off the prediction was.
criterion = nn.MSELoss()

# The "Coach": Stochastic Gradient Descent (SGD) adjusts the Weights.
# lr (Learning Rate) is the "Step Size." 0.01 means "Make small, careful changes."
optimizer = optim.SGD(model.parameters(), lr=0.01)



In [5]:
# ==========================================
# 4. THE TRAINING LOOP (The Adaptation)
# ==========================================
print("--- Training Started ---")

for epoch in range(100): # Run the experiment for 100 iterations (Days)

    # 1. FORWARD PASS: Make a guess based on current biological rules.
    Y_pred = model(X_train)

    # 2. CALCULATE LOSS: See how much the guess differs from the actual Excel data.
    loss = criterion(Y_pred, Y_train)

    # 3. BACKWARD PASS: Trace the error backward to find which Weight caused it.
    optimizer.zero_grad() # Clear the "math scratchpad" from the previous day.
    loss.backward()       # Calculate the "direction" to move the weights (Gradients).

    # 4. OPTIMIZE: The Coach actually changes the Weights and Bias values.
    optimizer.step()

    if (epoch + 1) % 20 == 0:
        print(f"Day {epoch+1}: Error (Loss) is {loss.item():.4f}")


--- Training Started ---
Day 20: Error (Loss) is 1.7604
Day 40: Error (Loss) is 0.8744
Day 60: Error (Loss) is 0.7693
Day 80: Error (Loss) is 0.7363
Day 100: Error (Loss) is 0.7115


In [6]:

# ==========================================
# 5. INFERENCE (Testing the Trained Brain)
# ==========================================
model.eval() # Switch to 'Evaluation Mode' (stops the training behavior)

with torch.no_grad(): # Tells PyTorch: "No need to take notes; just give me the answer."
    # Predict for a new plant: 8 hours sun, 4 liters water.
    new_data = torch.tensor([[8.0, 4.0]])
    prediction = model(new_data)

    print("\n--- Final Results ---")
    print(f"Prediction for [8hr Sun, 4L Water]: {prediction.item():.2f} cm")

# Extract the learned weights to see the "Logic" the model discovered
w = model.linear.weight.data
b = model.linear.bias.data
print(f"Model learned: Height = ({w[0][0]:.2f} * Sun) + ({w[0][1]:.2f} * Water) + {b.item():.2f}")


--- Final Results ---
Prediction for [8hr Sun, 4L Water]: 16.05 cm
Model learned: Height = (0.68 * Sun) + (2.56 * Water) + 0.41
