# Mushroom Classification with Deep Learning

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/YOUR_USERNAME/FractalN/blob/main/Mushroom_Classifier.ipynb)


In [None]:
# Check if we're running in Colab
IN_COLAB = 'google.colab' in str(get_ipython())

if IN_COLAB:
    # Clone repository (includes dataset in data/ folder)
    !git clone https://github.com/YOUR_USERNAME/FractalN.git
    %cd FractalN
    
    # Install additional requirements
    !pip install -r requirements.txt
    
    # Verify dataset is present
    !ls -R data/
    
    # Mount Google Drive for saving results
    from google.colab import drive
    drive.mount('/content/drive')
    
    # Create results directory in Drive
    !mkdir -p "/content/drive/MyDrive/FractalN_Results"
else:
    # Verify dataset is present in local environment
    if not os.path.exists('data'):
        raise FileNotFoundError("Dataset not found! Please ensure 'data' directory exists.")
    print("Dataset found in local environment.")

In [None]:
# Setup environment
import os
import sys
from src.gpu_config import setup_gpu
import tensorflow as tf

# Setup GPU
setup_gpu()

print("TensorFlow version:", tf.__version__)
print("GPU devices:", tf.config.list_physical_devices('GPU'))

# Set results directory based on environment
RESULTS_DIR = '/content/drive/MyDrive/FractalN_Results' if IN_COLAB else 'results'

In [None]:
# Setup paths and check for cached processed data
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    
    # Define paths
    DRIVE_ROOT = '/content/drive/MyDrive/FractalN_Data'
    AUGMENTED_DATA_PATH = f"{DRIVE_ROOT}/final_processed_data.zip"  # Final processed data after all steps
    RESULTS_DIR = f"{DRIVE_ROOT}/Results"
    
    # Create directories
    !mkdir -p "{DRIVE_ROOT}"
    !mkdir -p "{RESULTS_DIR}"
    
    # Check if final processed data exists in Drive
    NEED_PROCESSING = not os.path.exists(AUGMENTED_DATA_PATH)
else:
    RESULTS_DIR = 'results'
    NEED_PROCESSING = not os.path.exists('data/processed')

## 1. Complete Data Pipeline
Process data in three steps:
1. Organize raw data
2. Augment organized data
3. Preprocess augmented data

In [None]:
if NEED_PROCESSING:
    print("No cached processed data found. Starting complete data pipeline...")
    
    # Step 1: Organize raw data
    print("\n1. Organizing raw data...")
    from src.utils.reorganize_data import reorganize_mushroom_data
    reorganize_mushroom_data()
    
    # Step 2: Augment organized data
    print("\n2. Augmenting organized data...")
    from src.utils.augment_mushroom_data import augment_mushroom_data
    augment_mushroom_data(target_count=20000)
    
    # Step 3: Preprocess augmented data
    print("\n3. Preprocessing augmented data...")
    from src.utils.preprocess_data import preprocess_dataset
    preprocess_dataset(
        data_dir='data/mushroom_data',
        output_dir='data/processed',
        test_size=0.2,
        img_size=(224, 224)
    )
    
    # Cache the final processed data
    if IN_COLAB:
        print("\nSaving final processed data to Google Drive...")
        !zip -r "{AUGMENTED_DATA_PATH}" data/processed
        print(f"Final processed data saved to: {AUGMENTED_DATA_PATH}")
else:
    print("Found cached processed data. Loading...")
    if IN_COLAB:
        !unzip -q "{AUGMENTED_DATA_PATH}" -d "data/"
    print("Cached data loaded successfully!")

# Verify final data structure and distribution
!tree data/processed -L 3

print("\nFinal dataset distribution:")
for split in ['train', 'test']:
    print(f"\n{split.capitalize()} set:")
    for category in ['poisonous', 'edible']:
        count = len(list(Path(f'data/processed/{split}/{category}').glob('*.jpg')))
        print(f"{category}: {count} images")

## 2. Model Training
Train using the fully processed dataset

In [None]:
from src.train import train_model
from src.model import create_model
!rm -rf data/processed/*
print("Starting model training...")
train_model(preprocess=True)  # Set to True for first run

print("\nDisplaying training results...")
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

img = mpimg.imread('training_history.png')
plt.figure(figsize=(15, 5))
plt.imshow(img)
plt.axis('off')
plt.show()

print("\nTraining Metrics:")
with open('training_metrics.txt', 'r') as f:
    print(f.read())

## 4. Model Evaluation and Prediction
Test the trained model on sample images

In [None]:
from src.predict import predict_mushroom
import random
from pathlib import Path

def test_random_images(num_samples=3):
    test_dir = Path('data/processed/test')
    
    for category in ['edible', 'poisonous']:
        print(f"\nTesting {category} mushrooms:")
        category_path = test_dir / category
        image_files = list(category_path.glob('*.[Jj][Pp][Gg]'))
        
        for _ in range(num_samples):
            test_image = random.choice(image_files)
            print(f"\nImage: {test_image.name}")
            prediction, confidence = predict_mushroom(
                'mushroom_classifier.keras',
                str(test_image)
            )
            print(f"Predicted: {prediction}")
            print(f"Confidence: {confidence:.2%}")

# Test model on random images
test_random_images()

## 5. Save Model and Results

In [None]:
# Save results to Google Drive if in Colab
if IN_COLAB:
    !cp mushroom_classifier.keras "{RESULTS_DIR}/"
    !cp best_model.keras "{RESULTS_DIR}/"
    !cp training_history.png "{RESULTS_DIR}/"
    !cp training_metrics.txt "{RESULTS_DIR}/"
    !cp training_log.csv "{RESULTS_DIR}/"
    print(f"Model and results saved to Google Drive: {RESULTS_DIR}")
else:
    !mkdir -p results
    !cp mushroom_classifier.keras results/
    !cp best_model.keras results/
    !cp training_history.png results/
    !cp training_metrics.txt results/
    !cp training_log.csv results/
    print("Model and results saved in 'results' directory")