In [1]:
"""
Corrected and runnable script for Solar Panel Defect Classification
- Fixes common issues: folder structure checks, preprocessing consistency, class imbalance handling,
  transfer learning with EfficientNetB0, callbacks, fine-tuning, and prediction helper.

Usage:
- Place this file in the same folder as your `Solar_Panel_Dataset` directory (or update DATA_DIR).
- Run in an environment with TensorFlow installed (>=2.10 recommended).
- Adjust HYPERPARAMS as needed.

Note: This is a single-file script version of the notebook flow. If you prefer a .ipynb, tell me and I will prepare one.
"""

import os
import numpy as np
import matplotlib.pyplot as plt
import itertools
import json
from collections import Counter

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix

# --------------------------- USER CONFIG ---------------------------
DATA_DIR = 'Solar_Panel_Dataset'  # change if your dataset folder differs
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
INITIAL_EPOCHS = 12
FINE_TUNE_EPOCHS = 10
SEED = 42
MODEL_SAVE_PATH = 'best_solar_model.h5'

# --------------------------- SANITY CHECKS --------------------------
if not os.path.exists(DATA_DIR):
    raise FileNotFoundError(f"Dataset directory not found: {DATA_DIR}. Make sure it exists and contains class subfolders.")

# Expecting structure like DATA_DIR/train/<class>/images or directly DATA_DIR/<class>/images
# We'll support both: prefer if there's a train/validation split already.

has_train_subdir = os.path.isdir(os.path.join(DATA_DIR, 'train'))

if has_train_subdir:
    train_root = os.path.join(DATA_DIR, 'train')
    val_root = os.path.join(DATA_DIR, 'val') if os.path.isdir(os.path.join(DATA_DIR, 'val')) else os.path.join(DATA_DIR, 'validation')
    if not os.path.isdir(val_root):
        raise FileNotFoundError('Found train/ but no val/ or validation/. Please create a validation split or remove train/ directory.')
    DATA_SOURCE = 'split'
else:
    train_root = DATA_DIR
    val_root = None
    DATA_SOURCE = 'single'

print(f"DATA_SOURCE = {DATA_SOURCE}")

# --------------------------- DATA GENERATORS ------------------------
np.random.seed(SEED)

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.15,
    height_shift_range=0.15,
    shear_range=0.1,
    zoom_range=0.15,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2 if DATA_SOURCE=='single' else None
)

if DATA_SOURCE == 'single':
    train_gen = train_datagen.flow_from_directory(
        train_root,
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        subset='training',
        seed=SEED
    )

    val_gen = train_datagen.flow_from_directory(
        train_root,
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        subset='validation',
        seed=SEED
    )
else:
    # when dataset already split to train/ and val/ directories
    train_gen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=20,
                                   width_shift_range=0.15,
                                   height_shift_range=0.15,
                                   shear_range=0.1,
                                   zoom_range=0.15,
                                   horizontal_flip=True,
                                   fill_mode='nearest').flow_from_directory(
        train_root,
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        seed=SEED
    )

    val_gen = ImageDataGenerator(rescale=1./255).flow_from_directory(
        val_root,
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        shuffle=False
    )

# print class indices (important to validate)
print('\nClass indices:')
print(train_gen.class_indices)

NUM_CLASSES = train_gen.num_classes

# ------------------------ HANDLE CLASS IMBALANCE --------------------
# compute class weights using labels from the generator
if DATA_SOURCE == 'single':
    # using generator filenames to compute class distribution
    labels = train_gen.classes
else:
    labels = train_gen.classes

class_weights = None
try:
    class_labels = np.unique(labels)
    weights = compute_class_weight('balanced', classes=class_labels, y=labels)
    class_weights = {int(c): float(w) for c, w in zip(class_labels, weights)}
    print('\nClass weights:')
    print(class_weights)
except Exception as e:
    print('Could not compute class weights:', e)

# --------------------------- BUILD MODEL ----------------------------
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
base_model.trainable = False

x = GlobalAveragePooling2D(name='gap')(base_model.output)
x = Dropout(0.35, name='dropout')(x)
outputs = Dense(NUM_CLASSES, activation='softmax', name='predictions')(x)

model = Model(inputs=base_model.input, outputs=outputs)
model.compile(optimizer=Adam(learning_rate=1e-3),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

# --------------------------- CALLBACKS ------------------------------
callbacks = [
    EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True),
    ModelCheckpoint(MODEL_SAVE_PATH, monitor='val_loss', save_best_only=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=3, min_lr=1e-7, verbose=1)
]

# --------------------------- TRAIN HEAD ------------------------------
history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=INITIAL_EPOCHS,
    class_weight=class_weights,
    callbacks=callbacks
)

# --------------------------- EVALUATE -------------------------------
print('\nEvaluating on validation set...')
val_gen.reset()
val_steps = int(np.ceil(val_gen.samples / BATCH_SIZE))
val_preds = model.predict(val_gen, steps=val_steps, verbose=1)
val_pred_classes = np.argmax(val_preds, axis=1)
true_classes = val_gen.classes
class_names = list(train_gen.class_indices.keys())

print('\nClassification Report:')
print(classification_report(true_classes, val_pred_classes, target_names=class_names))

# Confusion matrix
cm = confusion_matrix(true_classes, val_pred_classes)

# plot confusion matrix
plt.figure(figsize=(8,6))
plt.imshow(cm, interpolation='nearest')
plt.title('Confusion matrix')
plt.colorbar()
_tick_marks = np.arange(len(class_names))
plt.xticks(_tick_marks, class_names, rotation=45)
plt.yticks(_tick_marks, class_names)

thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    plt.text(j, i, format(cm[i, j], 'd'),
             horizontalalignment="center",
             color="white" if cm[i, j] > thresh else "black")

plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.tight_layout()
plt.show()

# --------------------------- FINE-TUNING ----------------------------
# Unfreeze top layers of base model and recompile with lower lr
base_model.trainable = True
# Freeze all layers except the top N layers (optional):
fine_tune_at = int(len(base_model.layers) * 0.70)
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False

model.compile(optimizer=Adam(learning_rate=1e-5),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

print('\nStarting fine-tuning...')
history_ft = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=FINE_TUNE_EPOCHS,
    class_weight=class_weights,
    callbacks=callbacks
)

# Save final model
model.save('solar_model_final.h5')
print('\nModel saved to solar_model_final.h5')

# --------------------------- PREDICTION HELPERS ---------------------
from tensorflow.keras.preprocessing import image

def predict_image(img_path, model, class_indices, target_size=IMG_SIZE):
    img = image.load_img(img_path, target_size=target_size)
    x = image.img_to_array(img) / 255.0
    x = np.expand_dims(x, axis=0)
    preds = model.predict(x)
    idx = np.argmax(preds, axis=1)[0]
    inv_map = {v:k for k,v in class_indices.items()}
    return inv_map[idx], preds[0][idx]

# Example usage (uncomment and change path):
# class_map = train_gen.class_indices
# result, conf = predict_image('path/to/sample.jpg', model, class_map)
# print('Predicted:', result, 'Confidence:', conf)

# --------------------------- NOTES & NEXT STEPS ---------------------
# 1) If you still get poor accuracy:
#    - check dataset quality (duplicates, mislabeled images)
#    - increase augmentation, add brightness/contrast variations
#    - consider class-specific augmentation if one class is scarce
#    - expand dataset or use synthetic augmentation (Roboflow)
# 2) For object detection (to localize defects), label images using labelImg or Roboflow
#    and train a YOLOv8 model (ultralytics). That is a separate workflow.
#    Example CLI: pip install ultralytics; yolo task=detect mode=train model=yolov8s.pt data=solar.yaml epochs=50 imgsz=640
# 3) To deploy: convert to a small TF SavedModel or TFLite for edge devices.

print('\nScript finished. Open the saved model and run `predict_image()` for quick tests.')



A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.1.3 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\Lenovo\anaconda3\Lib\site-packages\ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "C:\Users\Lenovo\anaconda3\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "C:\Users\Lenovo\anaconda3\Lib\site-packages\ipykernel\kernelapp.py", line 701, in start
    self.io_loop.start()
  File "C:\Users\Lenovo\anaconda3\Lib\site-

ImportError: 
A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.1.3 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.



ImportError: initialization failed