In [None]:
# Step 1: Setup Environment and Install Dependencies

# Mount Google Drive (for saving outputs)
from google.colab import drive
drive.mount('/content/drive')

# Install required packages
%pip install torch torchvision
%pip install yacs albumentations opencv-python matplotlib pandas pillow scikit-image
%pip install pytorch-msssim opencv-contrib-python requests
%pip install gdown  # For downloading datasets
%pip install scikit-learn tqdm


In [None]:
# Step 2: Clone or Download Project Files

# Option 1: Clone from GitHub
!git clone https://github.com/ronakgadhiya09/rooftop-solar-potential.git
%cd rooftop-solar-potential

# Option 2: If you prefer to download specific files directly
# Uncomment and run the following code instead
"""
import os

# Create project structure
!mkdir -p Solar_Potential_Estimation/configs
!mkdir -p Solar_Potential_Estimation/model
!mkdir -p Solar_Potential_Estimation/lib/config
!mkdir -p Solar_Potential_Estimation/lib/utils
!mkdir -p Solar_Potential_Estimation/inference
!mkdir -p Solar_Potential_Estimation/test_crowd/images
!mkdir -p Solar_Potential_Estimation/test_crowd/masks
!mkdir -p Solar_Potential_Estimation/test_crowd/pred
!mkdir -p Solar_Potential_Estimation/test_crowd/original/components
!mkdir -p Solar_Potential_Estimation/test_crowd/original/roof_data
!mkdir -p Solar_Potential_Estimation/test_crowd/original/total_pon
!mkdir -p Solar_Potential_Estimation/test_crowd/original/com_bin
!mkdir -p Solar_Potential_Estimation/i_outputs
!mkdir -p solar_power_json

# Download key files
!gdown --id YOUR_ID_HERE -O Solar_Potential_Estimation/configs/inria_hrnet_ocr.yaml
!gdown --id YOUR_ID_HERE -O Solar_Potential_Estimation/model/seg_hrnet_ocr.py
!gdown --id YOUR_ID_HERE -O Solar_Potential_Estimation/lib/config/config.py
!gdown --id YOUR_ID_HERE -O Solar_Potential_Estimation/lib/utils/utils.py
!gdown --id YOUR_ID_HERE -O Solar_Potential_Estimation/inference/i_inference.py
!gdown --id YOUR_ID_HERE -O Solar_Potential_Estimation/inference/i_roof_separation_with_area.py
!gdown --id YOUR_ID_HERE -O Solar_Potential_Estimation/inference/get_solar_data.py
"""


In [None]:
# Step 3: Download Pre-trained Weights

# Download pre-trained model weights
!gdown --folder https://drive.google.com/drive/folders/1RmPxBfePZctk_RLSwMcZFcjqx4HxDiI7 -O Solar_Potential_Estimation/i_outputs

# Check if weights were downloaded successfully
import os
if os.path.exists('Solar_Potential_Estimation/i_outputs/epoch_99.pth'):
    print("✅ Pre-trained weights downloaded successfully")
else:
    print("❌ Failed to download pre-trained weights")
    print("Please download manually from: https://drive.google.com/drive/folders/1RmPxBfePZctk_RLSwMcZFcjqx4HxDiI7")


In [None]:
# Step 4: Download Sample Test Image

# Download a sample test image from the dataset
# You can replace this with your own aerial image
!cp Solar_Potential_Estimation/india_dataset/twoChannels_in/val/images/25_6.png Solar_Potential_Estimation/test_crowd/images/

# Alternatively, upload your own image
from google.colab import files
print("You can also upload your own aerial image:")
# Uncomment the line below to upload your own image
# uploaded = files.upload()

# If you uploaded an image, save it to the test directory
"""
import shutil
for filename in uploaded.keys():
    shutil.move(filename, f'Solar_Potential_Estimation/test_crowd/images/{filename}')
    print(f"Saved {filename} to test directory")
"""


In [None]:
# Step 5: Import Required Libraries and Setup Paths

import os
import sys
import numpy as np
import torch
import cv2
from PIL import Image
import matplotlib.pyplot as plt
import albumentations as A
from albumentations.pytorch import ToTensorV2

# Add project directories to path
sys.path.append('Solar_Potential_Estimation')
from model.seg_hrnet_ocr import get_seg_model
from lib.config import config

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Set paths
CONFIG_PATH = 'Solar_Potential_Estimation/configs/inria_hrnet_ocr.yaml'
CHECKPOINT_PATH = 'Solar_Potential_Estimation/i_outputs/epoch_99.pth'
IMAGE_DIR = 'Solar_Potential_Estimation/test_crowd/images'
OUTPUT_DIR = 'Solar_Potential_Estimation/test_crowd/pred'
MASKS_DIR = 'Solar_Potential_Estimation/test_crowd/masks'

# Create output directory if it doesn't exist
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(MASKS_DIR, exist_ok=True)


In [None]:
# Step 6: Load Model Configuration and Initialize Model

# Load configuration
cfg = config.config
cfg.defrost()
cfg.merge_from_file(CONFIG_PATH)
cfg.freeze()

# Initialize model
model = get_seg_model(cfg)
model = model.to(device)

# Load pre-trained weights
checkpoint = torch.load(CHECKPOINT_PATH, map_location=device)
if 'state_dict' in checkpoint:
    model.load_state_dict(checkpoint['state_dict'])
else:
    model.load_state_dict(checkpoint)

# Set model to evaluation mode
model.eval()
print("Model loaded successfully!")


In [None]:
# Step 7: Define Image Preprocessing Functions

def preprocess_image(image_path):
    """
    Preprocess an image for inference
    """
    # Define transforms
    transform = A.Compose([
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ])
    
    # Load image
    image = np.array(Image.open(image_path).convert('RGB')).astype(np.uint8)
    
    # Apply transforms
    transformed = transform(image=image)
    image_tensor = transformed['image']
    
    # Add batch dimension
    image_tensor = image_tensor.unsqueeze(0)
    
    return image_tensor, image

def postprocess_output(output, threshold=0.5):
    """
    Convert model output to binary mask
    """
    # Apply sigmoid and threshold
    pred = torch.sigmoid(output)
    pred = (pred > threshold).float()
    
    # Convert to numpy
    pred = pred.cpu().numpy()
    
    return pred[0, 0]  # Return first image, first channel


In [None]:
# Step 8: Run Inference on Test Images

# Get list of test images
test_images = os.listdir(IMAGE_DIR)
print(f"Found {len(test_images)} test images")

# Process each image
for img_name in test_images:
    # Skip non-image files
    if not (img_name.endswith('.png') or img_name.endswith('.jpg') or img_name.endswith('.jpeg')):
        continue
        
    print(f"Processing {img_name}...")
    img_path = os.path.join(IMAGE_DIR, img_name)
    
    # Preprocess image
    image_tensor, original_image = preprocess_image(img_path)
    image_tensor = image_tensor.to(device)
    
    # Run inference
    with torch.no_grad():
        output = model(image_tensor)
    
    # Postprocess output
    pred_mask = postprocess_output(output)
    
    # Save prediction mask as numpy array
    mask_filename = os.path.join(MASKS_DIR, img_name.replace('.png', '.npy').replace('.jpg', '.npy').replace('.jpeg', '.npy'))
    np.save(mask_filename, pred_mask)
    
    # Create visualization
    plt.figure(figsize=(15, 5))
    
    # Original image
    plt.subplot(1, 3, 1)
    plt.imshow(original_image)
    plt.title('Original Image')
    plt.axis('off')
    
    # Prediction mask
    plt.subplot(1, 3, 2)
    plt.imshow(pred_mask, cmap='gray')
    plt.title('Predicted Mask')
    plt.axis('off')
    
    # Overlay
    plt.subplot(1, 3, 3)
    overlay = original_image.copy()
    overlay_mask = np.zeros_like(original_image)
    overlay_mask[:, :, 0] = pred_mask * 255  # Red channel
    alpha = 0.5
    cv2_overlay = cv2.addWeighted(original_image, 1, overlay_mask, alpha, 0)
    plt.imshow(cv2_overlay)
    plt.title('Overlay')
    plt.axis('off')
    
    # Save visualization
    vis_filename = os.path.join(OUTPUT_DIR, img_name.replace('.png', '_.png').replace('.jpg', '_.png').replace('.jpeg', '_.png'))
    plt.savefig(vis_filename)
    plt.close()
    
    print(f"✅ Saved prediction for {img_name}")

print("Inference complete!")


In [None]:
# Step 9: Roof Separation and Area Calculation

# Create necessary directories
os.makedirs('Solar_Potential_Estimation/test_crowd/original/components', exist_ok=True)
os.makedirs('Solar_Potential_Estimation/test_crowd/original/roof_data', exist_ok=True)
os.makedirs('Solar_Potential_Estimation/test_crowd/original/total_pon', exist_ok=True)
os.makedirs('Solar_Potential_Estimation/test_crowd/original/com_bin', exist_ok=True)

import cv2
import pandas as pd
from skimage import measure
from skimage.morphology import binary_dilation, disk

def separate_roofs(mask_path, image_path, min_area=100):
    """
    Separate individual roofs from a binary mask using connected components analysis
    """
    # Load mask
    mask = np.load(mask_path)
    
    # Load original image for visualization
    original_image = cv2.imread(image_path)
    original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
    
    # Apply connected components analysis
    labeled_mask = measure.label(mask, connectivity=2)
    props = measure.regionprops(labeled_mask)
    
    # Filter regions by area
    filtered_mask = np.zeros_like(labeled_mask)
    roof_data = []
    
    for i, prop in enumerate(props):
        if prop.area >= min_area:
            # Keep this region
            roof_id = i + 1
            filtered_mask[labeled_mask == prop.label] = roof_id
            
            # Calculate area (in pixels)
            roof_area = prop.area
            
            # Calculate usable area (excluding edges)
            binary_mask = (labeled_mask == prop.label).astype(np.uint8)
            dilated = binary_dilation(binary_mask, disk(3))
            eroded = binary_dilation(binary_mask, disk(3))
            edge_mask = dilated ^ eroded
            usable_area = roof_area - np.sum(edge_mask)
            
            # Convert to real-world area (assuming 0.3m per pixel)
            # This is a simplification - real-world applications would use GSD (Ground Sample Distance)
            pixel_to_meter = 0.3  # 0.3 meters per pixel
            real_area = roof_area * (pixel_to_meter ** 2)
            
            # Estimate number of solar panels (assuming 1.7m² per panel)
            panel_area = 1.7  # m²
            num_panels = int(real_area / panel_area)
            
            # Estimate solar potential (simplified: 250W per panel, 4 peak sun hours per day)
            # Actual calculation would use PVGIS API with location data
            solar_potential = num_panels * 0.25 * 4 * 365  # kWh per year
            
            roof_data.append({
                'Roof_ID': roof_id,
                'Roof_Area': roof_area,
                'Net_Usable_Area': usable_area,
                'Real_Area': real_area,
                'Panels': num_panels,
                'Solar_potential_per_year': solar_potential
            })
    
    # Create visualization with colored roofs
    vis_image = np.zeros_like(original_image)
    total_potential = 0
    
    for roof in roof_data:
        roof_id = roof['Roof_ID']
        # Generate a random color for this roof
        color = np.random.randint(0, 255, size=3)
        vis_image[filtered_mask == roof_id] = color
        
        # Add text with roof ID
        y, x = np.where(filtered_mask == roof_id)
        if len(y) > 0 and len(x) > 0:
            center_y, center_x = int(np.mean(y)), int(np.mean(x))
            cv2.putText(vis_image, f"{roof_id}", (center_x, center_y), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        
        total_potential += roof['Solar_potential_per_year']
    
    return filtered_mask, vis_image, roof_data, total_potential

# Process each mask
for mask_file in os.listdir(MASKS_DIR):
    if not mask_file.endswith('.npy'):
        continue
    
    base_name = mask_file.replace('.npy', '')
    mask_path = os.path.join(MASKS_DIR, mask_file)
    image_path = os.path.join(IMAGE_DIR, f"{base_name}.png")
    
    if not os.path.exists(image_path):
        # Try other extensions
        for ext in ['.jpg', '.jpeg']:
            alt_path = os.path.join(IMAGE_DIR, f"{base_name}{ext}")
            if os.path.exists(alt_path):
                image_path = alt_path
                break
    
    if not os.path.exists(image_path):
        print(f"⚠️ Could not find original image for {mask_file}")
        continue
    
    print(f"Processing {base_name}...")
    
    # Separate roofs
    labeled_mask, vis_image, roof_data, total_potential = separate_roofs(mask_path, image_path)
    
    # Save results
    # 1. Save labeled mask
    np.save(f"Solar_Potential_Estimation/test_crowd/original/com_bin/{base_name}.npy", labeled_mask)
    
    # 2. Save visualization
    plt.figure(figsize=(10, 10))
    plt.imshow(vis_image)
    plt.title(f"Identified Roofs: {base_name}")
    plt.axis('off')
    plt.savefig(f"Solar_Potential_Estimation/test_crowd/original/components/{base_name}.png")
    plt.close()
    
    # 3. Save roof data as CSV
    df = pd.DataFrame(roof_data)
    df.to_csv(f"Solar_Potential_Estimation/test_crowd/original/roof_data/{base_name}.csv", index=False)
    
    # 4. Save total potential
    with open(f"Solar_Potential_Estimation/test_crowd/original/total_pon/{base_name}.txt", 'w') as f:
        f.write(f"Total Solar Potential: {total_potential:.2f} kWh/year")
    
    print(f"✅ Processed {base_name}")
    print(f"   - Found {len(roof_data)} roofs")
    print(f"   - Total solar potential: {total_potential:.2f} kWh/year")

print("Roof separation and area calculation complete!")


In [None]:
# Step 10: Get Solar Potential Data from PVGIS API (Optional)

import requests
import json
import time

# Create directory for solar power data
os.makedirs('solar_power_json', exist_ok=True)

def get_pvgis_data(lat=28.6139, lon=77.2090, year=2016):
    """
    Get solar radiation data from PVGIS API
    Default coordinates are for New Delhi, India
    """
    url = f"https://re.jrc.ec.europa.eu/api/v5_2/seriescalc?lat={lat}&lon={lon}&startyear={year}&endyear={year}&outputformat=json&pvcalculation=1&peakpower=1&loss=14"
    
    try:
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            return data
        else:
            print(f"Error: API returned status code {response.status_code}")
            return None
    except Exception as e:
        print(f"Error fetching data from PVGIS API: {e}")
        return None

# Get solar radiation data for New Delhi, India
print("Fetching solar radiation data from PVGIS API...")
pvgis_data = get_pvgis_data()

if pvgis_data:
    # Save data to file
    with open('solar_power_json/solar_data.json', 'w') as f:
        json.dump(pvgis_data, f, indent=2)
    print("✅ Solar radiation data saved to solar_power_json/solar_data.json")
    
    # Extract and display monthly production
    if 'outputs' in pvgis_data and 'monthly' in pvgis_data['outputs']:
        monthly_data = pvgis_data['outputs']['monthly']
        
        # Extract month names and values
        months = [month['month'] for month in monthly_data]
        e_m = [month['E_m'] for month in monthly_data]  # Monthly energy production (kWh)
        
        # Plot monthly production
        plt.figure(figsize=(12, 6))
        plt.bar(months, e_m, color='orange')
        plt.title('Monthly Solar Energy Production (kWh/kWp)')
        plt.xlabel('Month')
        plt.ylabel('Energy (kWh/kWp)')
        plt.grid(axis='y', linestyle='--', alpha=0.7)
        plt.savefig('solar_power_json/monthly_production.png')
        plt.show()
        
        # Calculate annual production
        annual_production = sum(e_m)
        print(f"Annual solar energy production: {annual_production:.2f} kWh/kWp")
        print("This means for each kWp (kilowatt-peak) of installed capacity,")
        print(f"you can expect approximately {annual_production:.2f} kWh of energy per year.")
        
        # Calculate average daily production
        avg_daily = annual_production / 365
        print(f"Average daily production: {avg_daily:.2f} kWh/kWp")
else:
    print("❌ Failed to fetch solar radiation data")
    print("Using default values for solar potential calculation")


In [None]:
# Step 11: Refine Solar Potential Calculation with PVGIS Data

# Check if we have PVGIS data
if os.path.exists('solar_power_json/solar_data.json'):
    # Load PVGIS data
    with open('solar_power_json/solar_data.json', 'r') as f:
        pvgis_data = json.load(f)
    
    # Extract annual production per kWp
    if 'outputs' in pvgis_data and 'totals' in pvgis_data['outputs']:
        annual_production_per_kwp = pvgis_data['outputs']['totals']['E_y']
        print(f"Using PVGIS data: {annual_production_per_kwp:.2f} kWh/kWp annually")
    else:
        # Default value if data structure is unexpected
        annual_production_per_kwp = 1600  # Default for India
        print(f"Using default value: {annual_production_per_kwp:.2f} kWh/kWp annually")
else:
    # Use default value
    annual_production_per_kwp = 1600  # Default for India
    print(f"Using default value: {annual_production_per_kwp:.2f} kWh/kWp annually")

# Update solar potential calculations for all processed images
for csv_file in os.listdir('Solar_Potential_Estimation/test_crowd/original/roof_data'):
    if not csv_file.endswith('.csv'):
        continue
    
    base_name = csv_file.replace('.csv', '')
    csv_path = os.path.join('Solar_Potential_Estimation/test_crowd/original/roof_data', csv_file)
    
    print(f"Updating solar potential for {base_name}...")
    
    # Load roof data
    df = pd.read_csv(csv_path)
    
    # Update solar potential calculation
    # Assuming 250W panels (0.25 kWp)
    df['Solar_potential_per_year'] = df['Panels'] * 0.25 * annual_production_per_kwp
    
    # Save updated data
    df.to_csv(csv_path, index=False)
    
    # Update total potential
    total_potential = df['Solar_potential_per_year'].sum()
    with open(f"Solar_Potential_Estimation/test_crowd/original/total_pon/{base_name}.txt", 'w') as f:
        f.write(f"Total Solar Potential: {total_potential:.2f} kWh/year")
    
    print(f"✅ Updated solar potential for {base_name}: {total_potential:.2f} kWh/year")

print("Solar potential calculation refinement complete!")


In [None]:
# Step 12: Visualize Results

import matplotlib.patches as mpatches

# Find all processed images
processed_images = []
for csv_file in os.listdir('Solar_Potential_Estimation/test_crowd/original/roof_data'):
    if csv_file.endswith('.csv'):
        base_name = csv_file.replace('.csv', '')
        processed_images.append(base_name)

# Create comprehensive visualization for each image
for base_name in processed_images:
    print(f"Creating visualization for {base_name}...")
    
    # Load original image
    image_path = os.path.join(IMAGE_DIR, f"{base_name}.png")
    if not os.path.exists(image_path):
        # Try other extensions
        for ext in ['.jpg', '.jpeg']:
            alt_path = os.path.join(IMAGE_DIR, f"{base_name}{ext}")
            if os.path.exists(alt_path):
                image_path = alt_path
                break
    
    if not os.path.exists(image_path):
        print(f"⚠️ Could not find original image for {base_name}")
        continue
    
    original_image = cv2.imread(image_path)
    original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
    
    # Load segmentation mask
    mask_path = os.path.join(MASKS_DIR, f"{base_name}.npy")
    mask = np.load(mask_path)
    
    # Load labeled mask
    labeled_mask_path = f"Solar_Potential_Estimation/test_crowd/original/com_bin/{base_name}.npy"
    labeled_mask = np.load(labeled_mask_path)
    
    # Load roof data
    csv_path = f"Solar_Potential_Estimation/test_crowd/original/roof_data/{base_name}.csv"
    roof_data = pd.read_csv(csv_path)
    
    # Load total potential
    with open(f"Solar_Potential_Estimation/test_crowd/original/total_pon/{base_name}.txt", 'r') as f:
        total_potential = f.read().strip()
    
    # Create visualization
    plt.figure(figsize=(20, 15))
    
    # Original image
    plt.subplot(2, 2, 1)
    plt.imshow(original_image)
    plt.title('Original Image')
    plt.axis('off')
    
    # Segmentation mask
    plt.subplot(2, 2, 2)
    plt.imshow(mask, cmap='gray')
    plt.title('Rooftop Segmentation')
    plt.axis('off')
    
    # Labeled roofs
    plt.subplot(2, 2, 3)
    # Create a colormap for the labeled mask
    cmap = plt.cm.get_cmap('tab20', len(roof_data) + 1)
    colored_mask = np.zeros((*labeled_mask.shape, 3))
    
    # Create legend patches
    patches = []
    
    # Color each roof and add to legend
    for i, row in roof_data.iterrows():
        roof_id = row['Roof_ID']
        color = cmap(roof_id)[:3]  # Get RGB values
        colored_mask[labeled_mask == roof_id] = color
        
        # Add to legend
        area = row['Real_Area']
        potential = row['Solar_potential_per_year']
        patches.append(mpatches.Patch(color=color, 
                                      label=f"ID {roof_id}: {area:.1f}m² ({potential:.1f} kWh/yr)"))
    
    plt.imshow(colored_mask)
    plt.title('Individual Roofs with IDs')
    plt.axis('off')
    
    # Add legend with scrollbar if many roofs
    if len(roof_data) > 10:
        plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc='upper left', 
                   borderaxespad=0., fontsize='small')
    else:
        plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc='upper left', 
                   borderaxespad=0.)
    
    # Summary statistics
    plt.subplot(2, 2, 4)
    plt.axis('off')
    
    # Create a table with statistics
    total_area = roof_data['Real_Area'].sum()
    total_panels = roof_data['Panels'].sum()
    total_energy = roof_data['Solar_potential_per_year'].sum()
    
    stats_text = f"""
    # Solar Potential Summary: {base_name}
    
    ## Overall Statistics:
    - Total number of roofs: {len(roof_data)}
    - Total roof area: {total_area:.2f} m²
    - Estimated solar panels: {total_panels}
    - Annual solar energy production: {total_energy:.2f} kWh
    
    ## Top 5 Roofs by Potential:
    """
    
    # Add top 5 roofs by potential
    top_roofs = roof_data.sort_values('Solar_potential_per_year', ascending=False).head(5)
    for i, row in top_roofs.iterrows():
        stats_text += f"- Roof ID {int(row['Roof_ID'])}: {row['Solar_potential_per_year']:.2f} kWh/year ({row['Real_Area']:.2f} m²)\n"
    
    plt.text(0, 0.5, stats_text, fontsize=12, va='center', ha='left')
    
    # Adjust layout and save
    plt.tight_layout()
    plt.savefig(f"Solar_Potential_Estimation/test_crowd/original/{base_name}_summary.png", 
                bbox_inches='tight', dpi=300)
    plt.close()
    
    print(f"✅ Created visualization for {base_name}")

print("Visualization complete!")


In [None]:
# Step 13: Save Results to Google Drive (Optional)

# Save results to Google Drive
try:
    # Create directory in Google Drive
    drive_dir = "/content/drive/MyDrive/Solar_Potential_Results"
    os.makedirs(drive_dir, exist_ok=True)
    
    # Copy results
    !cp -r Solar_Potential_Estimation/test_crowd/original/* {drive_dir}/
    !cp -r Solar_Potential_Estimation/test_crowd/pred/* {drive_dir}/
    !cp -r solar_power_json/* {drive_dir}/
    
    print(f"✅ Results saved to Google Drive: {drive_dir}")
except Exception as e:
    print(f"❌ Error saving to Google Drive: {e}")
    print("You can manually copy the results from the following directories:")
    print("- Solar_Potential_Estimation/test_crowd/original/")
    print("- Solar_Potential_Estimation/test_crowd/pred/")
    print("- solar_power_json/")


In [None]:
# Step 14: Summary and Conclusion

print("=" * 80)
print("🌞 SOLAR POTENTIAL ESTIMATION COMPLETE 🌞")
print("=" * 80)

# Count processed images
processed_count = len([f for f in os.listdir('Solar_Potential_Estimation/test_crowd/original/roof_data') if f.endswith('.csv')])

print(f"Processed {processed_count} images")

# Calculate total potential across all images
total_energy = 0
total_area = 0
total_roofs = 0
total_panels = 0

for csv_file in os.listdir('Solar_Potential_Estimation/test_crowd/original/roof_data'):
    if csv_file.endswith('.csv'):
        df = pd.read_csv(os.path.join('Solar_Potential_Estimation/test_crowd/original/roof_data', csv_file))
        total_energy += df['Solar_potential_per_year'].sum()
        total_area += df['Real_Area'].sum()
        total_roofs += len(df)
        total_panels += df['Panels'].sum()

print(f"\nOverall Results:")
print(f"- Total number of identified roofs: {total_roofs}")
print(f"- Total roof area: {total_area:.2f} m²")
print(f"- Estimated solar panels: {total_panels}")
print(f"- Annual solar energy production: {total_energy:.2f} kWh")

if total_panels > 0:
    print(f"- Average energy per panel: {total_energy/total_panels:.2f} kWh/year")

print("\nResults are saved in the following directories:")
print("- Solar_Potential_Estimation/test_crowd/original/components/ - Individual roof visualizations")
print("- Solar_Potential_Estimation/test_crowd/original/roof_data/ - CSV files with detailed roof data")
print("- Solar_Potential_Estimation/test_crowd/original/total_pon/ - Total potential for each image")
print("- Solar_Potential_Estimation/test_crowd/original/ - Summary visualizations")

print("\nThank you for using the Solar Potential Estimation tool!")
print("=" * 80)


In [None]:
# Step 15: Optional - Training the Model (Advanced)

# This cell is optional and for advanced users who want to train the model on their own data
# Training requires significant computational resources and time

"""
# Uncomment and run this cell if you want to train the model

# Step 15.1: Setup Training Environment
import os
import sys
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np
import time
from tqdm import tqdm

# Add project directories to path
sys.path.append('Solar_Potential_Estimation')
from model.seg_hrnet_ocr import get_seg_model
from lib.config import config

# Step 15.2: Define Dataset Class for Training
class RooftopDataset(torch.utils.data.Dataset):
    def __init__(self, root, split='train'):
        self.root = root
        self.split = split
        self.images = sorted(os.listdir(os.path.join(root, split, 'images')))
        self.masks = sorted(os.listdir(os.path.join(root, split, 'masks')))
        
        print(f"Found {len(self.images)} images and {len(self.masks)} masks in {split} set")
        
        # Define transformations
        if split == 'train':
            self.transform = A.Compose([
                A.HorizontalFlip(p=0.5),
                A.VerticalFlip(p=0.5),
                A.RandomRotate90(p=0.5),  
                A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.3),
                A.RandomGamma(p=0.2),
                A.ElasticTransform(p=0.1, alpha=120, sigma=6),
                A.GaussianBlur(blur_limit=(3, 7), p=0.1), 
                A.GridDistortion(num_steps=5, distort_limit=0.3, p=0.1),
                A.Cutout(p=0.1, max_h_size=32, max_w_size=32, num_holes=5),
                A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                ToTensorV2()
            ], additional_targets={'mask': 'mask'})
        else:
            self.transform = A.Compose([
                A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                ToTensorV2()
            ], additional_targets={'mask': 'mask'})
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        # Load images and masks
        img_path = os.path.join(self.root, self.split, 'images', self.images[idx])
        mask_path = os.path.join(self.root, self.split, 'masks', self.masks[idx])
        
        # Loading image and mask
        image = np.array(Image.open(img_path).convert('RGB')).astype(np.uint8)
        mask = np.load(mask_path)
        
        # Ensure mask has correct shape
        if len(mask.shape) == 2:
            mask = np.expand_dims(mask, axis=-1)
        
        # Apply transformations
        transformed = self.transform(image=image, mask=mask)
        image = transformed['image']
        mask = transformed['mask']
        
        # Ensure mask has correct shape (1, H, W)
        if mask.ndim == 2:
            mask = mask.unsqueeze(0)
        elif mask.ndim == 3 and mask.shape[0] != 1:
            mask = mask.permute(2, 0, 1)
        
        mask = mask.float()
        
        return {'image': image, 'mask': mask}

# Step 15.3: Define Training Configuration
class TrainingConfig:
    def __init__(self):
        # Data paths
        self.data_root = 'Solar_Potential_Estimation/india_dataset/twoChannels_in'
        
        # Training parameters
        self.batch_size = 4
        self.num_workers = 2
        self.learning_rate = 1e-4
        self.weight_decay = 1e-4
        self.num_epochs = 100
        
        # Model parameters
        self.config_path = 'Solar_Potential_Estimation/configs/inria_hrnet_ocr.yaml'
        self.pretrained_path = 'Solar_Potential_Estimation/pretrained/f_epoch_49.pth'  # Optional
        
        # Output paths
        self.output_dir = 'Solar_Potential_Estimation/outputs'
        self.log_dir = os.path.join(self.output_dir, 'logs')
        self.checkpoint_dir = os.path.join(self.output_dir, 'checkpoints')
        
        # Create output directories
        os.makedirs(self.log_dir, exist_ok=True)
        os.makedirs(self.checkpoint_dir, exist_ok=True)
        
        # Device
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Using device: {self.device}")

# Step 15.4: Define Training Functions
def train_one_epoch(model, dataloader, criterion, optimizer, device, epoch):
    model.train()
    running_loss = 0.0
    
    pbar = tqdm(dataloader, desc=f"Epoch {epoch+1}")
    for batch in pbar:
        images = batch['image'].to(device)
        masks = batch['mask'].to(device)
        
        # Forward pass
        outputs = model(images)
        
        # Calculate loss
        loss = criterion(outputs, masks)
        
        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Update statistics
        running_loss += loss.item()
        pbar.set_postfix({"loss": loss.item()})
    
    return running_loss / len(dataloader)

def validate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Validation"):
            images = batch['image'].to(device)
            masks = batch['mask'].to(device)
            
            # Forward pass
            outputs = model(images)
            
            # Calculate loss
            loss = criterion(outputs, masks)
            
            # Update statistics
            running_loss += loss.item()
    
    return running_loss / len(dataloader)

def dice_coefficient(y_pred, y_true, smooth=1e-6):
    # Apply sigmoid to predictions
    y_pred = torch.sigmoid(y_pred)
    
    # Flatten tensors
    y_pred = y_pred.view(-1)
    y_true = y_true.view(-1)
    
    # Calculate Dice coefficient
    intersection = (y_pred * y_true).sum()
    dice = (2. * intersection + smooth) / (y_pred.sum() + y_true.sum() + smooth)
    
    return dice.item()

# Step 15.5: Main Training Loop
def train_model():
    # Initialize configuration
    cfg = TrainingConfig()
    
    # Create datasets and dataloaders
    train_dataset = RooftopDataset(cfg.data_root, split='train')
    val_dataset = RooftopDataset(cfg.data_root, split='val')
    
    train_loader = DataLoader(train_dataset, batch_size=cfg.batch_size, shuffle=True, num_workers=cfg.num_workers)
    val_loader = DataLoader(val_dataset, batch_size=cfg.batch_size, shuffle=False, num_workers=cfg.num_workers)
    
    # Initialize model
    model_cfg = config.config
    model_cfg.defrost()
    model_cfg.merge_from_file(cfg.config_path)
    model_cfg.freeze()
    
    model = get_seg_model(model_cfg)
    model = model.to(cfg.device)
    
    # Load pretrained weights if available
    if os.path.exists(cfg.pretrained_path):
        print(f"Loading pretrained weights from {cfg.pretrained_path}")
        checkpoint = torch.load(cfg.pretrained_path, map_location=cfg.device)
        if 'state_dict' in checkpoint:
            model.load_state_dict(checkpoint['state_dict'])
        else:
            model.load_state_dict(checkpoint)
    
    # Define loss function and optimizer
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=cfg.learning_rate, weight_decay=cfg.weight_decay)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)
    
    # Initialize TensorBoard writer
    writer = SummaryWriter(cfg.log_dir)
    
    # Training loop
    best_val_loss = float('inf')
    
    for epoch in range(cfg.num_epochs):
        # Train
        train_loss = train_one_epoch(model, train_loader, criterion, optimizer, cfg.device, epoch)
        
        # Validate
        val_loss = validate(model, val_loader, criterion, cfg.device)
        
        # Update learning rate
        scheduler.step(val_loss)
        
        # Log metrics
        writer.add_scalar('Loss/train', train_loss, epoch)
        writer.add_scalar('Loss/val', val_loss, epoch)
        writer.add_scalar('LR', optimizer.param_groups[0]['lr'], epoch)
        
        # Print progress
        print(f"Epoch {epoch+1}/{cfg.num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
        
        # Save checkpoint
        checkpoint_path = os.path.join(cfg.checkpoint_dir, f"epoch_{epoch}.pth")
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'train_loss': train_loss,
            'val_loss': val_loss,
        }, checkpoint_path)
        
        # Save best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_path = os.path.join(cfg.checkpoint_dir, "best_model.pth")
            torch.save(model.state_dict(), best_model_path)
            print(f"Saved best model with validation loss: {best_val_loss:.4f}")
    
    writer.close()
    print("Training complete!")

# Execute training
# train_model()
"""

print("Optional training cell is available but commented out.")
print("To train the model, uncomment the code in this cell and run it.")
print("Note: Training requires significant computational resources and time.")
print("For most users, using the pre-trained model is recommended.")
