In [1]:
import sys
print(sys.executable)
import seaborn as sns
print("Seaborn imported successfully ✅")


C:\Users\sahan\.conda\envs\tfenv\python.exe
Seaborn imported successfully ✅


In [2]:
"""
AgriGenAI - Day 2: Genotype-Trait Mapping
==========================================
Goal: Map phenotype features → plant traits → genotype predictions
Time: 8 hours

What this does:
1. Load Day 1 features + metadata
2. Create trait labels from disease categories
3. Build hybrid database with parent genotypes
4. Train ML model: features → trait predictions
5. Simulate genotype prediction from traits

This is the KEY INNOVATION: Image → Phenotype → Genotype!
"""

import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib
import warnings
warnings.filterwarnings('ignore')

print("=" * 60)
print("🧬 AgriGenAI - Day 2: Genotype-Trait Mapping")
print("=" * 60)

🧬 AgriGenAI - Day 2: Genotype-Trait Mapping


In [19]:
# ============================================
# 1. CONFIGURATION
# ============================================

class Config:
    """Configuration for Day 2"""
    # Input paths (from Day 1)
    FEATURES_FILE = Path('C:/Users/sahan/Desktop/AgriGenAI2/AgriGenAI_Output/features/phenotype_features.npy')
    METADATA_FILE = Path('C:/Users/sahan/Desktop/AgriGenAI2/AgriGenAI_Output/metadata/image_metadata.csv')
    STATS_FILE = Path('C:/Users/sahan/Desktop/AgriGenAI2/AgriGenAI_Output/features/feature_statistics.npz')
    
    # Output paths (Day 2)
    MODELS_PATH = Path('C:/Users/sahan/Desktop/AgriGenAI2/AgriGenAI_Output/models')
    HYBRIDS_PATH = Path('C:/Users/sahan/Desktop/AgriGenAI2/AgriGenAI_Output/hybrids')
    VIZ_PATH = Path('C:/Users/sahan/Desktop/AgriGenAI2/AgriGenAI_Output/visualizations')
    METADATA_PATH = Path('C:/Users/sahan/Desktop/AgriGenAI2/AgriGenAI_Output/metadata')
    
    # Model parameters
    RANDOM_STATE = 42
    TEST_SIZE = 0.2

# Create directories
for path in [Config.MODELS_PATH, Config.HYBRIDS_PATH]:
    path.mkdir(parents=True, exist_ok=True)

In [6]:
# ============================================
# 2. LOAD DAY 1 OUTPUTS
# ============================================

print("\n📂 Loading Day 1 outputs...")

# Load features
features = np.load(Config.FEATURES_FILE)
print(f"✅ Features loaded: {features.shape}")

# Load metadata
df = pd.read_csv(Config.METADATA_FILE)
print(f"✅ Metadata loaded: {len(df)} rows")

# Verify alignment
assert features.shape[0] == len(df), "❌ Features and metadata don't match!"
print(f"✅ Data alignment verified!")


📂 Loading Day 1 outputs...
✅ Features loaded: (15313, 2048)
✅ Metadata loaded: 15313 rows
✅ Data alignment verified!


In [7]:
# ============================================
# 3. CREATE TRAIT LABELS
# ============================================

print("\n🏷️  Creating trait labels from phenotype data...")

class TraitMapper:
    """Map disease categories to plant traits"""
    
    @staticmethod
    def map_yield_trait(category):
        """Map category to yield potential"""
        high_yield = ['healthy']
        medium_yield = ['Bacterial_spot', 'Leaf_Mold', 'Septoria_leaf_spot', 
                       'Target_Spot', 'Two-spotted_spider_mite']
        low_yield = ['Early_blight', 'Late_blight', 'Tomato_mosaic_virus', 
                     'Tomato_Yellow_Leaf_Curl_Virus']
        
        if category in high_yield:
            return 'High'
        elif category in medium_yield:
            return 'Medium'
        elif category in low_yield:
            return 'Low'
        else:
            return 'Medium'  # Default for fruits
    
    @staticmethod
    def map_disease_resistance(category):
        """Map category to disease resistance"""
        if category == 'healthy':
            return 'Resistant'
        elif category in ['Bacterial_spot', 'Leaf_Mold', 'Septoria_leaf_spot']:
            return 'Moderate'
        else:
            return 'Susceptible'
    
    @staticmethod
    def map_stress_tolerance(category):
        """Map category to environmental stress tolerance"""
        # Healthy plants likely have better stress tolerance
        # Plants with viral diseases show poor stress response
        if category == 'healthy':
            return 'High'
        elif category in ['Tomato_mosaic_virus', 'Tomato_Yellow_Leaf_Curl_Virus']:
            return 'Low'
        else:
            return 'Medium'

# Apply trait mapping
mapper = TraitMapper()

df['yield_trait'] = df['category'].apply(mapper.map_yield_trait)
df['disease_resistance'] = df['category'].apply(mapper.map_disease_resistance)
df['stress_tolerance'] = df['category'].apply(mapper.map_stress_tolerance)

print(f"✅ Traits created!")
print(f"\nTrait distributions:")
print(f"   Yield: {df['yield_trait'].value_counts().to_dict()}")
print(f"   Disease Resistance: {df['disease_resistance'].value_counts().to_dict()}")
print(f"   Stress Tolerance: {df['stress_tolerance'].value_counts().to_dict()}")



🏷️  Creating trait labels from phenotype data...
✅ Traits created!

Trait distributions:
   Yield: {'Medium': 7128, 'Low': 6913, 'High': 1272}
   Disease Resistance: {'Susceptible': 10182, 'Moderate': 3859, 'Resistant': 1272}
   Stress Tolerance: {'Medium': 9456, 'Low': 4585, 'High': 1272}


In [8]:
# ============================================
# 4. BUILD HYBRID DATABASE
# ============================================

print("\n🌱 Creating tomato hybrid database...")

hybrid_database = {
    'Arka_Vikas': {
        'parent_genotypes': ['G1', 'G2'],
        'traits': {
            'yield': 'High',
            'disease_resistance': 'Resistant',
            'stress_tolerance': 'High',
            'fruit_size': 'Large',
            'maturity_days': 70
        },
        'optimal_temp': (20, 32),
        'humidity_tolerance': (50, 80),
        'genes': ['fw2.2-AA', 'HSP-High', 'Tm-2a-Present']
    },
    'Pusa_Ruby': {
        'parent_genotypes': ['G3', 'G4'],
        'traits': {
            'yield': 'High',
            'disease_resistance': 'Moderate',
            'stress_tolerance': 'Medium',
            'fruit_size': 'Medium',
            'maturity_days': 65
        },
        'optimal_temp': (15, 28),
        'humidity_tolerance': (40, 70),
        'genes': ['fw2.2-Aa', 'HSP-Medium', 'Tm-2-Absent']
    },
    'Himsona': {
        'parent_genotypes': ['G5', 'G6'],
        'traits': {
            'yield': 'Medium',
            'disease_resistance': 'Resistant',
            'stress_tolerance': 'High',
            'fruit_size': 'Large',
            'maturity_days': 75
        },
        'optimal_temp': (18, 32),
        'humidity_tolerance': (60, 90),
        'genes': ['fw2.2-AA', 'HSP-High', 'Tm-2a-Present']
    },
    'Kashi_Amrit': {
        'parent_genotypes': ['G2', 'G7'],
        'traits': {
            'yield': 'High',
            'disease_resistance': 'Resistant',
            'stress_tolerance': 'Medium',
            'fruit_size': 'Medium',
            'maturity_days': 68
        },
        'optimal_temp': (18, 30),
        'humidity_tolerance': (45, 75),
        'genes': ['fw2.2-Aa', 'HSP-Medium', 'Tm-2a-Present']
    },
    'Punjab_Chhuhara': {
        'parent_genotypes': ['G4', 'G8'],
        'traits': {
            'yield': 'Medium',
            'disease_resistance': 'Moderate',
            'stress_tolerance': 'High',
            'fruit_size': 'Small',
            'maturity_days': 60
        },
        'optimal_temp': (20, 35),
        'humidity_tolerance': (50, 85),
        'genes': ['fw2.2-aa', 'HSP-High', 'Tm-2-Absent']
    }
}

# Save hybrid database
import json
hybrid_db_file = Config.HYBRIDS_PATH / 'hybrid_database.json'
with open(hybrid_db_file, 'w') as f:
    json.dump(hybrid_database, f, indent=2)

print(f"✅ Hybrid database created: {len(hybrid_database)} varieties")
print(f"   Saved to: {hybrid_db_file}")



🌱 Creating tomato hybrid database...
✅ Hybrid database created: 5 varieties
   Saved to: C:\Users\sahan\Desktop\AgriGenAI2\AgriGenAI_Output\hybrids\hybrid_database.json


In [9]:
# ============================================
# 5. TRAIN TRAIT PREDICTION MODELS
# ============================================

print("\n🤖 Training trait prediction models...")

class TraitPredictor:
    """Train Random Forest models for trait prediction"""
    
    def __init__(self, config):
        self.config = config
        self.models = {}
        self.results = {}
    
    def train_model(self, X, y, trait_name):
        """Train model for a specific trait"""
        print(f"\n   Training model for: {trait_name}")
        
        # Split data
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, 
            test_size=self.config.TEST_SIZE, 
            random_state=self.config.RANDOM_STATE,
            stratify=y
        )
        
        # Train Random Forest
        model = RandomForestClassifier(
            n_estimators=100,
            max_depth=20,
            min_samples_split=5,
            random_state=self.config.RANDOM_STATE,
            n_jobs=-1
        )
        
        model.fit(X_train, y_train)
        
        # Evaluate
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        
        print(f"      ✅ Accuracy: {accuracy:.2%}")
        
        # Store results
        self.models[trait_name] = model
        self.results[trait_name] = {
            'accuracy': accuracy,
            'y_test': y_test,
            'y_pred': y_pred,
            'classes': model.classes_
        }
        
        return model, accuracy
    
    def train_all(self, features, df):
        """Train models for all traits"""
        traits = {
            'yield_trait': df['yield_trait'],
            'disease_resistance': df['disease_resistance'],
            'stress_tolerance': df['stress_tolerance']
        }
        
        for trait_name, trait_labels in traits.items():
            self.train_model(features, trait_labels, trait_name)
        
        return self.models, self.results
    
    def save_models(self):
        """Save trained models"""
        for trait_name, model in self.models.items():
            model_file = self.config.MODELS_PATH / f'{trait_name}_model.pkl'
            joblib.dump(model, model_file)
            print(f"   💾 Saved: {model_file}")

# Train models
predictor = TraitPredictor(Config)
models, results = predictor.train_all(features, df)
predictor.save_models()

print(f"\n✅ All models trained and saved!")



🤖 Training trait prediction models...

   Training model for: yield_trait
      ✅ Accuracy: 92.20%

   Training model for: disease_resistance
      ✅ Accuracy: 93.31%

   Training model for: stress_tolerance
      ✅ Accuracy: 95.79%
   💾 Saved: C:\Users\sahan\Desktop\AgriGenAI2\AgriGenAI_Output\models\yield_trait_model.pkl
   💾 Saved: C:\Users\sahan\Desktop\AgriGenAI2\AgriGenAI_Output\models\disease_resistance_model.pkl
   💾 Saved: C:\Users\sahan\Desktop\AgriGenAI2\AgriGenAI_Output\models\stress_tolerance_model.pkl

✅ All models trained and saved!


In [10]:
# ============================================
# 6. VISUALIZE MODEL PERFORMANCE
# ============================================

print("\n📊 Creating performance visualizations...")

def plot_confusion_matrices(results, config):
    """Plot confusion matrices for all traits"""
    fig, axes = plt.subplots(1, 3, figsize=(18, 5))
    
    for idx, (trait_name, result) in enumerate(results.items()):
        cm = confusion_matrix(result['y_test'], result['y_pred'])
        
        sns.heatmap(
            cm, 
            annot=True, 
            fmt='d', 
            cmap='Blues',
            xticklabels=result['classes'],
            yticklabels=result['classes'],
            ax=axes[idx]
        )
        
        axes[idx].set_title(f'{trait_name.replace("_", " ").title()}\n'
                           f'Accuracy: {result["accuracy"]:.2%}')
        axes[idx].set_ylabel('True Label')
        axes[idx].set_xlabel('Predicted Label')
    
    plt.tight_layout()
    save_path = config.VIZ_PATH / 'trait_prediction_performance.png'
    plt.savefig(save_path, dpi=300, bbox_inches='tight')
    print(f"✅ Saved: {save_path}")
    plt.close()

plot_confusion_matrices(results, Config)



📊 Creating performance visualizations...
✅ Saved: C:\Users\sahan\Desktop\AgriGenAI2\AgriGenAI_Output\visualizations\trait_prediction_performance.png


In [15]:
print("\n🧬 Creating genotype prediction system...")

class GenotypePredictorSimulator:
    """
    Simulates genotype prediction from phenotype traits
    In real system, this would use actual genotype-phenotype database
    """
    
    def __init__(self, hybrid_database):
        self.hybrid_db = hybrid_database
        self.genotype_map = self._build_complete_genotype_map()
    
    def _build_complete_genotype_map(self):
        genotype_map = {
            # High Yield Combinations (9 total)
           ('High', 'Resistant', 'High'): {
            'genotype_id': 'G1',
            'genes': ['fw2.2-AA', 'HSP-High', 'Tm-2a-Present'],
            'description': 'Superior genotype: high yield, disease resistant, stress tolerant',
            'breeding_value': 'Excellent - Use as primary parent'
        },
        ('High', 'Resistant', 'Medium'): {
            'genotype_id': 'G2',
            'genes': ['fw2.2-Aa', 'HSP-Medium', 'Tm-2a-Present'],
            'description': 'High yield with disease resistance, moderate stress response',
            'breeding_value': 'Very Good - Suitable for stable climates'
        },
        ('High', 'Resistant', 'Low'): {
            'genotype_id': 'G9',
            'genes': ['fw2.2-AA', 'HSP-Low', 'Tm-2a-Present'],
            'description': 'High yield and disease resistant but stress sensitive',
            'breeding_value': 'Good - Requires controlled environment'
        },
        ('High', 'Moderate', 'High'): {
            'genotype_id': 'G10',
            'genes': ['fw2.2-Aa', 'HSP-High', 'Tm-2-Partial'],
            'description': 'High yield, stress tolerant, moderate disease resistance',
            'breeding_value': 'Very Good - For harsh climates'
        },
        ('High', 'Moderate', 'Medium'): {
            'genotype_id': 'G3',
            'genes': ['fw2.2-Aa', 'HSP-Medium', 'Tm-2-Absent'],
            'description': 'Good all-rounder with high yield potential',
            'breeding_value': 'Good - General purpose variety'
        },
        ('High', 'Moderate', 'Low'): {
            'genotype_id': 'G11',
            'genes': ['fw2.2-AA', 'HSP-Low', 'Tm-2-Absent'],
            'description': 'High yield but vulnerable to disease and stress',
            'breeding_value': 'Fair - Use in protected cultivation'
        },
        ('High', 'Susceptible', 'High'): {
            'genotype_id': 'G12',
            'genes': ['fw2.2-Aa', 'HSP-High', 'Tm-2-Absent'],
            'description': 'High yield and stress tolerant but disease susceptible',
            'breeding_value': 'Fair - Requires disease management'
        },
        ('High', 'Susceptible', 'Medium'): {
            'genotype_id': 'G13',
            'genes': ['fw2.2-AA', 'HSP-Medium', 'Tm-2-Absent'],
            'description': 'High yield but needs disease protection',
            'breeding_value': 'Fair - For experienced farmers'
        },
        ('High', 'Susceptible', 'Low'): {
            'genotype_id': 'G14',
            'genes': ['fw2.2-Aa', 'HSP-Low', 'Tm-2-Absent'],
            'description': 'High yield potential but multiple vulnerabilities',
            'breeding_value': 'Poor - Not recommended for field use'
        },
        
        # Medium Yield Combinations (9 total)
        ('Medium', 'Resistant', 'High'): {
            'genotype_id': 'G5',
            'genes': ['fw2.2-AA', 'HSP-High', 'Tm-2a-Present'],
            'description': 'Balanced genotype: disease resistant, stress tolerant',
            'breeding_value': 'Very Good - Reliable performer'
        },
        ('Medium', 'Resistant', 'Medium'): {
            'genotype_id': 'G6',
            'genes': ['fw2.2-Aa', 'HSP-Medium', 'Tm-2a-Present'],
            'description': 'Moderate yield with good disease resistance',
            'breeding_value': 'Good - Safe choice for farmers'
        },
        ('Medium', 'Resistant', 'Low'): {
            'genotype_id': 'G15',
            'genes': ['fw2.2-aa', 'HSP-Low', 'Tm-2a-Present'],
            'description': 'Disease resistant but stress sensitive, moderate yield',
            'breeding_value': 'Fair - For controlled environments'
        },
        ('Medium', 'Moderate', 'High'): {
            'genotype_id': 'G4',
            'genes': ['fw2.2-aa', 'HSP-High', 'Tm-2-Absent'],
            'description': 'Stress tolerant genotype with moderate yield',
            'breeding_value': 'Good - For marginal lands'
        },
        ('Medium', 'Moderate', 'Medium'): {
            'genotype_id': 'G7',
            'genes': ['fw2.2-Aa', 'HSP-Medium', 'Tm-2-Partial'],
            'description': 'Average genotype across all traits',
            'breeding_value': 'Good - General cultivation variety'
        },
        ('Medium', 'Moderate', 'Low'): {
            'genotype_id': 'G16',
            'genes': ['fw2.2-aa', 'HSP-Low', 'Tm-2-Absent'],
            'description': 'Moderate yield, vulnerable to stress and disease',
            'breeding_value': 'Fair - Requires management'
        },
        ('Medium', 'Susceptible', 'High'): {
            'genotype_id': 'G17',
            'genes': ['fw2.2-Aa', 'HSP-High', 'Tm-2-Absent'],
            'description': 'Stress tolerant but disease susceptible, moderate yield',
            'breeding_value': 'Fair - For dry regions with disease management'
        },
        ('Medium', 'Susceptible', 'Medium'): {
            'genotype_id': 'G18',
            'genes': ['fw2.2-aa', 'HSP-Medium', 'Tm-2-Absent'],
            'description': 'Moderate yield, needs disease protection',
            'breeding_value': 'Fair - For experienced farmers'
        },
        ('Medium', 'Susceptible', 'Low'): {
            'genotype_id': 'G19',
            'genes': ['fw2.2-aa', 'HSP-Low', 'Tm-2-Absent'],
            'description': 'Low breeding value across traits',
            'breeding_value': 'Poor - Not recommended'
        },
        
        # Low Yield Combinations (9 total)
        ('Low', 'Resistant', 'High'): {
            'genotype_id': 'G20',
            'genes': ['fw2.2-aa', 'HSP-High', 'Tm-2a-Present'],
            'description': 'Disease resistant and stress tolerant but low yield',
            'breeding_value': 'Fair - For breeding stock only'
        },
        ('Low', 'Resistant', 'Medium'): {
            'genotype_id': 'G21',
            'genes': ['fw2.2-aa', 'HSP-Medium', 'Tm-2a-Present'],
            'description': 'Disease resistant but low productivity',
            'breeding_value': 'Fair - For conservation breeding'
        },
        ('Low', 'Resistant', 'Low'): {
            'genotype_id': 'G22',
            'genes': ['fw2.2-aa', 'HSP-Low', 'Tm-2a-Present'],
            'description': 'Disease resistant only, poor agronomic traits',
            'breeding_value': 'Poor - Limited use'
        },
        ('Low', 'Moderate', 'High'): {
            'genotype_id': 'G23',
            'genes': ['fw2.2-aa', 'HSP-High', 'Tm-2-Partial'],
            'description': 'Stress tolerant but low yield and moderate disease resistance',
            'breeding_value': 'Fair - For harsh environments only'
        },
        ('Low', 'Moderate', 'Medium'): {
            'genotype_id': 'G24',
            'genes': ['fw2.2-aa', 'HSP-Medium', 'Tm-2-Absent'],
            'description': 'Low productivity, average other traits',
            'breeding_value': 'Poor - Not recommended for cultivation'
        },
        ('Low', 'Moderate', 'Low'): {
            'genotype_id': 'G25',
            'genes': ['fw2.2-aa', 'HSP-Low', 'Tm-2-Absent'],
            'description': 'Poor performance across all traits',
            'breeding_value': 'Very Poor - Avoid'
        },
        ('Low', 'Susceptible', 'High'): {
            'genotype_id': 'G26',
            'genes': ['fw2.2-aa', 'HSP-High', 'Tm-2-Absent'],
            'description': 'Only stress tolerance is acceptable, low yield and disease susceptible',
            'breeding_value': 'Poor - Very limited use'
        },
        ('Low', 'Susceptible', 'Medium'): {
            'genotype_id': 'G27',
            'genes': ['fw2.2-aa', 'HSP-Medium', 'Tm-2-Absent'],
            'description': 'Poor genotype with multiple weaknesses',
            'breeding_value': 'Very Poor - Discard'
        },
        ('Low', 'Susceptible', 'Low'): {
            'genotype_id': 'G8',
            'genes': ['fw2.2-aa', 'HSP-Low', 'Tm-2-Absent'],
            'description': 'Inferior genotype: low yield, disease susceptible, stress sensitive',
            'breeding_value': 'Very Poor - Not viable'
        }
        }
        return genotype_map

    def predict_genotype(self, traits):
        """Predict genotype from trait combination"""
        trait_tuple = (traits['yield'], traits['disease_resistance'], 
                       traits['stress_tolerance'])
        
        # Find matching genotype
        if trait_tuple in self.genotype_map:
            return self.genotype_map[trait_tuple]
        else:
            # Default to closest match
            return {
                'genotype_id': 'G_Unknown',
                'genes': ['Mixed'],
                'description': 'Genotype requires further analysis'
            }
    
    def recommend_hybrids(self, predicted_traits):
        """Recommend hybrid crosses based on predicted traits"""
        recommendations = []
        
        for hybrid_name, hybrid_data in self.hybrid_db.items():
            # Score compatibility (0-100)
            score = 0
            
            # Yield match
            if hybrid_data['traits']['yield'] == predicted_traits['yield']:
                score += 40
            
            # Disease resistance match
            if hybrid_data['traits']['disease_resistance'] == predicted_traits['disease_resistance']:
                score += 30
            
            # Stress tolerance match
            if hybrid_data['traits']['stress_tolerance'] == predicted_traits['stress_tolerance']:
                score += 30
            
            recommendations.append({
                'hybrid_name': hybrid_name,
                'compatibility_score': score,
                'parent_genotypes': hybrid_data['parent_genotypes'],
                'traits': hybrid_data['traits'],
                'genes': hybrid_data['genes']
            })
        
        # Sort by score
        recommendations.sort(key=lambda x: x['compatibility_score'], reverse=True)
        return recommendations[:3]  # Top 3

# Initialize genotype predictor
genotype_predictor = GenotypePredictorSimulator(hybrid_database)

# Save genotype predictor
predictor_file = Config.MODELS_PATH / 'genotype_predictor.pkl'
joblib.dump(genotype_predictor, predictor_file)
print(f"✅ Genotype predictor saved: {predictor_file}")



🧬 Creating genotype prediction system...
✅ Genotype predictor saved: C:\Users\sahan\Desktop\AgriGenAI2\AgriGenAI_Output\models\genotype_predictor.pkl


In [16]:
# ============================================
# 8. TEST PREDICTION PIPELINE
# ============================================

print("\n🧪 Testing full prediction pipeline...")

def test_prediction_pipeline(sample_idx=0):
    """Test the complete image → traits → genotype → recommendation pipeline"""
    
    print(f"\n📸 Testing with image #{sample_idx}")
    print(f"   Image: {df.iloc[sample_idx]['image_path']}")
    print(f"   Category: {df.iloc[sample_idx]['category']}")
    print(f"   Organ: {df.iloc[sample_idx]['organ']}")
    
    # Get feature vector
    sample_features = features[sample_idx].reshape(1, -1)
    
    # Predict traits
    predicted_traits = {}
    print(f"\n🔮 Predicted Traits:")
    for trait_name, model in models.items():
        pred = model.predict(sample_features)[0]
        predicted_traits[trait_name.replace('_trait', '')] = pred
        print(f"   {trait_name.replace('_', ' ').title()}: {pred}")
    
    # Predict genotype
    genotype = genotype_predictor.predict_genotype(predicted_traits)
    print(f"\n🧬 Predicted Genotype:")
    print(f"   ID: {genotype['genotype_id']}")
    print(f"   Genes: {', '.join(genotype['genes'])}")
    print(f"   Description: {genotype['description']}")
    
    # Recommend hybrids
    recommendations = genotype_predictor.recommend_hybrids(predicted_traits)
    print(f"\n🏆 Top 3 Hybrid Recommendations:")
    for i, rec in enumerate(recommendations, 1):
        print(f"\n   {i}. {rec['hybrid_name']} (Score: {rec['compatibility_score']}/100)")
        print(f"      Parents: {' × '.join(rec['parent_genotypes'])}")
        print(f"      Genes: {', '.join(rec['genes'])}")

# Test with a few samples
test_prediction_pipeline(0)  # First image
test_prediction_pipeline(len(df)//2)  # Middle image
test_prediction_pipeline(len(df)-1)  # Last image


🧪 Testing full prediction pipeline...

📸 Testing with image #0
   Image: ..\AgriGenAI_Dataset\PlantVillage\images\Bacterial_spot\Bs10.jpg
   Category: Bacterial_spot
   Organ: leaf

🔮 Predicted Traits:
   Yield Trait: Medium
   Disease Resistance: Moderate
   Stress Tolerance: Medium

🧬 Predicted Genotype:
   ID: G7
   Genes: fw2.2-Aa, HSP-Medium, Tm-2-Partial
   Description: Average genotype across all traits

🏆 Top 3 Hybrid Recommendations:

   1. Punjab_Chhuhara (Score: 70/100)
      Parents: G4 × G8
      Genes: fw2.2-aa, HSP-High, Tm-2-Absent

   2. Pusa_Ruby (Score: 60/100)
      Parents: G3 × G4
      Genes: fw2.2-Aa, HSP-Medium, Tm-2-Absent

   3. Himsona (Score: 40/100)
      Parents: G5 × G6
      Genes: fw2.2-AA, HSP-High, Tm-2a-Present

📸 Testing with image #7656
   Image: ..\AgriGenAI_Dataset\PlantVillage\images\Target_Spot\Ts164.jpg
   Category: Target_Spot
   Organ: leaf

🔮 Predicted Traits:
   Yield Trait: Medium
   Disease Resistance: Susceptible
   Stress Tolerance: 

In [20]:
# ============================================
# 9. SAVE FINAL METADATA
# ============================================

print("\n💾 Saving enhanced metadata with trait labels...")

# Save enhanced metadata
enhanced_metadata_file = Config.METADATA_PATH / 'image_metadata_with_traits.csv'
df.to_csv(enhanced_metadata_file, index=False)
print(f"✅ Saved: {enhanced_metadata_file}")


💾 Saving enhanced metadata with trait labels...
✅ Saved: C:\Users\sahan\Desktop\AgriGenAI2\AgriGenAI_Output\metadata\image_metadata_with_traits.csv


In [21]:
# ============================================
# 10. SUMMARY
# ============================================

print("\n" + "=" * 60)
print("✅ DAY 2 COMPLETE!")
print("=" * 60)

print(f"\n📊 Summary:")
print(f"   - Images processed: {len(df)}")
print(f"   - Trait prediction models trained: {len(models)}")
print(f"   - Hybrid varieties in database: {len(hybrid_database)}")
print(f"   - Model accuracies:")
for trait_name, result in results.items():
    print(f"      • {trait_name}: {result['accuracy']:.2%}")

print(f"\n📁 Outputs saved:")
print(f"   - Trait prediction models: {Config.MODELS_PATH}")
print(f"   - Hybrid database: {hybrid_db_file}")
print(f"   - Genotype predictor: {predictor_file}")
print(f"   - Enhanced metadata: {enhanced_metadata_file}")

print(f"\n🚀 Ready for Day 3: Weather Integration & Hybrid Recommendation Engine!")


✅ DAY 2 COMPLETE!

📊 Summary:
   - Images processed: 15313
   - Trait prediction models trained: 3
   - Hybrid varieties in database: 5
   - Model accuracies:
      • yield_trait: 92.20%
      • disease_resistance: 93.31%
      • stress_tolerance: 95.79%

📁 Outputs saved:
   - Trait prediction models: C:\Users\sahan\Desktop\AgriGenAI2\AgriGenAI_Output\models
   - Hybrid database: C:\Users\sahan\Desktop\AgriGenAI2\AgriGenAI_Output\hybrids\hybrid_database.json
   - Genotype predictor: C:\Users\sahan\Desktop\AgriGenAI2\AgriGenAI_Output\models\genotype_predictor.pkl
   - Enhanced metadata: C:\Users\sahan\Desktop\AgriGenAI2\AgriGenAI_Output\metadata\image_metadata_with_traits.csv

🚀 Ready for Day 3: Weather Integration & Hybrid Recommendation Engine!
