In [5]:
import os
import shutil
import random
from pathlib import Path

def split_dataset(root_dir, output_dir, train_ratio=0.8, val_ratio=0.10, test_ratio=0.10):
    root_dir = Path(root_dir)
    output_dir = Path(output_dir)

    if not root_dir.exists():
        raise FileNotFoundError(f"Input directory {root_dir} does not exist.")

    class_dirs = [d for d in root_dir.iterdir() if d.is_dir()]

    for class_dir in class_dirs:
        class_name = class_dir.name
        image_files = list(class_dir.glob("*.*"))  # all files inside class folder
        random.shuffle(image_files)

        total = len(image_files)
        n_train = int(train_ratio * total)
        n_val = int(val_ratio * total)

        splits = {
            "train": image_files[:n_train],
            "val": image_files[n_train:n_train + n_val],
            "test": image_files[n_train + n_val:]
        }

        for split_name, file_list in splits.items():
            dest_dir = output_dir / split_name / class_name
            dest_dir.mkdir(parents=True, exist_ok=True)
            for file_path in file_list:
                shutil.copy(file_path, dest_dir / file_path.name)

    print(f"\n✅ Dataset split completed and stored in: {output_dir}")

# Example usage:
split_dataset(
    root_dir="/home/dharun/Desktop/solar_panel/Faulty_solar_panel-20250514T144914Z-1-001/Faulty_solar_panel/",         # your main folder with class folders
    output_dir="/home/dharun/Desktop/solar_panel/split_folder"     # output folder to store splits
)



✅ Dataset split completed and stored in: /home/dharun/Desktop/solar_panel/split_folder


In [7]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import os

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Paths
data_dir = "/home/dharun/Desktop/solar_panel/split_folder"

# Transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],  # mean
                         [0.229, 0.224, 0.225])  # std
])

# Load datasets
train_dataset = datasets.ImageFolder(os.path.join(data_dir, 'train'), transform=transform)
val_dataset = datasets.ImageFolder(os.path.join(data_dir, 'val'), transform=transform)
test_dataset = datasets.ImageFolder(os.path.join(data_dir, 'test'), transform=transform)

# Dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Class count
num_classes = len(train_dataset.classes)
print(f"Classes: {train_dataset.classes}")

# Load ResNet-152
model = models.resnet152(pretrained=True)

# Freeze feature extractor if needed
for param in model.parameters():
    param.requires_grad = False

# Replace final layer
model.fc = nn.Linear(model.fc.in_features, num_classes)

model = model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.fc.parameters(), lr=0.001)

# Training loop
num_epochs = 1
best_val_acc = 0.0

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    train_acc = correct / total
    val_acc = 0.0

    # Validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    val_acc = correct / total

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")

    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_resnet152_model.pth")

print("✅ Training complete!")

# Final Test Evaluation
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)
test_acc = correct / total
print(f"📊 Test Accuracy: {test_acc:.4f}")


Using device: cuda
Classes: ['Bird-drop', 'Clean', 'Dusty', 'Electrical-damage', 'Physical-Damage', 'Snow-Covered']




Epoch [1/1], Loss: 31.8107, Train Acc: 0.4510, Val Acc: 0.6628
✅ Training complete!
📊 Test Accuracy: 0.8090


In [8]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import os

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Paths
data_dir = "/home/dharun/Desktop/solar_panel/split_folder"

# Transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],  # mean
                         [0.229, 0.224, 0.225])  # std
])

# Load datasets
train_dataset = datasets.ImageFolder(os.path.join(data_dir, 'train'), transform=transform)
val_dataset = datasets.ImageFolder(os.path.join(data_dir, 'val'), transform=transform)
test_dataset = datasets.ImageFolder(os.path.join(data_dir, 'test'), transform=transform)

# Dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Class count
num_classes = len(train_dataset.classes)
print(f"Classes: {train_dataset.classes}")

# Load ResNet-152
model = models.resnet152(pretrained=True)

# Freeze feature extractor
for param in model.parameters():
    param.requires_grad = False

# Replace final layer
model.fc = nn.Linear(model.fc.in_features, num_classes)

model = model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.fc.parameters(), lr=0.001)

# Training loop
num_epochs = 50
best_val_acc = 0.0

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    train_acc = correct / total

    # Validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    val_acc = correct / total

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")

    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_resnet152_model.pth")  # Save weights
        torch.save(model, "best_resnet152_model.pt")                # Save entire model

print("✅ Training complete!")

# Final Test Evaluation
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)
    
test_acc = correct / total
print(f"📊 Test Accuracy: {test_acc:.4f}")


Using device: cuda
Classes: ['Bird-drop', 'Clean', 'Dusty', 'Electrical-damage', 'Physical-Damage', 'Snow-Covered']
Epoch [1/50], Loss: 30.5290, Train Acc: 0.4928, Val Acc: 0.6860
Epoch [2/50], Loss: 19.1809, Train Acc: 0.7334, Val Acc: 0.7209
Epoch [3/50], Loss: 15.9644, Train Acc: 0.7651, Val Acc: 0.7326
Epoch [4/50], Loss: 13.7294, Train Acc: 0.7954, Val Acc: 0.8023
Epoch [5/50], Loss: 12.1941, Train Acc: 0.8285, Val Acc: 0.7907
Epoch [6/50], Loss: 10.1635, Train Acc: 0.8646, Val Acc: 0.7907
Epoch [7/50], Loss: 9.8455, Train Acc: 0.8674, Val Acc: 0.8023
Epoch [8/50], Loss: 8.6137, Train Acc: 0.8905, Val Acc: 0.7907
Epoch [9/50], Loss: 8.6288, Train Acc: 0.8905, Val Acc: 0.7907
Epoch [10/50], Loss: 8.2432, Train Acc: 0.8890, Val Acc: 0.8372
Epoch [11/50], Loss: 7.7692, Train Acc: 0.8833, Val Acc: 0.7674
Epoch [12/50], Loss: 8.7376, Train Acc: 0.8818, Val Acc: 0.7791
Epoch [13/50], Loss: 7.9655, Train Acc: 0.8890, Val Acc: 0.8140
Epoch [14/50], Loss: 5.9995, Train Acc: 0.9207, Val Acc