# Set Project Environment, Device, and Display

In [1]:
#!pip install ultralytics==8.0.20

# Libraries and dependencies to install and import
import torchvision.transforms as transforms
from IPython.display import display, Image
from PIL import UnidentifiedImageError
import matplotlib.pyplot as plt
from torchvision import models
from ultralytics import YOLO
from tqdm.notebook import tqdm
import torch.nn as nn
from PIL import Image
import warnings
import shutil
import torch
import math
import csv
import os
import os.path as osp


# Use cuda if a GPU is available on the device. Otherwise, use CPU only.
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print('Device for the project:', device)

# Settings for error display and plotting
warnings.filterwarnings("ignore")
%matplotlib inline 


cuda


# Set Model Versions and Path Directories

In [2]:
# Define solar panel classification and segmentation model versions
classi_model        = "model9"
seg_model           = "model1"

# Set the EPSCoR state of interest and associated model task name
state               = "Arkansas"
project_name        = f"{classi_model}inference"

# Set the path direcotry
path = "C:\\Users\\{your username}}\\"

# Path directories for the task of solar panel classification
classi_output_path  = osp.join(path, project_name, "")
classi_dir_path     = osp.join(path, "counties", "")
classi_model_path   = osp.join(path, "models\\classi", classi_model+".pt")

# Path directories for the task of solar panel segmentation
seg_model_path      = osp.join(path, "models\\seg", seg_model+".pt")
seg_result_file     = f"{seg_model}_predictions.csv"
pred_file_path      = classi_output_path+f"\\{seg_model}"

# Make directories for output directories
os.makedirs(classi_output_path, exist_ok=True)
os.makedirs(pred_file_path, exist_ok=True)

In [3]:
print('Directory for storing the solar panel classification result:', classi_output_path)
print('Directory for storing the solar panel segmentation result:', pred_file_path)
print(f"Results file: {seg_result_file}")

C:\Users\pjrio\model9inference
C:\Users\pjrio\model9inference\model1
results file: model1_predictions.csv


# Utility Functions

In [4]:
# For fine-tuning (not feature extraction), do not set 'requires_grad' to 'False' since all of the model's parameter must be loaded.
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

# Initialize the chosen pre-trained model parameters    
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    model_ft = None
    input_size = 0
    #print('feature_extract:', feature_extract)
    #print('model_name:', model_name)
    #print('use_pretrained:', use_pretrained)
    
    if model_name == "densenet":
        model_ft = models.densenet121(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes) 
        input_size = 224
        
    return model_ft, input_size

# Set model name and number of classes
model_name = "densenet"     # Other options: resnet, alexnet, vgg, squeezenet, inception
num_classes = 2

# Set model settings -- model extraction ('True') or fine-tuning ('False')
feature_extract = False

# Create an instance of the pre-trained model; get the model's input size
model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)

In [5]:
# Stores images from the residential building datasaet that are detected to have solar panels
def infer(folder_path, destination_path, output_inference):
    
    if not os.path.exists(destination_path):
          os.makedirs(destination_path)
    else:
        return
    
    # Create a list of image file names in the folder
    image_filenames = [filename for filename in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, filename))]

    # Create a transform to resize and normalize the input images
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    solar_panels = []
  
    # Loop over the image filenames with the progress bar
    #for filename in tqdm(image_filenames,desc='Images: '):
    for filename in image_filenames:
        try:
            # Load the image and apply the transform
            image_path   = os.path.join(folder_path, filename)
            image        = Image.open(image_path)
            input_tensor = transform(image).unsqueeze(0)

            # Perform inference
            with torch.no_grad():
                output = model(input_tensor.to(device))

            # Process the output and print the result
            predicted_class = torch.argmax(output, dim=1).item()
            if(output_inference):
                print(f'{filename}: predicted class = {predicted_class}')

            if(predicted_class==1):
                solar_panels.append(filename)
                source_image_path = os.path.join(folder_path, filename)
                destination_image_path = os.path.join(destination_path, filename)
                #print(filename)
                #print(source_image_path)
                #print(destination_image_path)
                if not os.path.exists(destination_image_path):
                    shutil.copy(source_image_path, destination_path)


        except UnidentifiedImageError as e:
            #print(f"Error: Could not open image file {filename}. Skipping this file.")
            continue

In [6]:
# Determine the area of the segmentation (solar panels)
def calculate_pixel_area():
    latitude     = 34.7465     # center of arkansas latitude
    zoom_level   = 21          # image zoom lvl
    earth_radius = 6378137     # earth radius in meters
    tile_size    = 256         # Google map's regular tile pixels size
    meters_per_pixel = (math.cos(latitude * math.pi/180) * 2 * math.pi * earth_radius) / (tile_size * 2**zoom_level)
    square_meters_per_pixel = meters_per_pixel**2
    return square_meters_per_pixel

pixel_area = calculate_pixel_area()
print('Pixel Area:', pixel_area)

0.0037619668420127116


In [7]:
# Load the data with the county name
county_name_path = f"C:\\Users\\pjrio\\tl_2020_05_bg\\merge\\{state}.txt"
county_numbers = {}
with open(county_name_path) as f:
    for line in f:
        #print(line)
        county_ab, state_number, county_number, county_name, something = line.strip().split(",")
        county_numbers[county_name.replace(" County","").lower()] = state_number + county_number
#print(len(county_names))
print('Number of Counties:',county_numbers)

{'arkansas': '05001', 'ashley': '05003', 'baxter': '05005', 'benton': '05007', 'boone': '05009', 'bradley': '05011', 'calhoun': '05013', 'carroll': '05015', 'chicot': '05017', 'clark': '05019', 'clay': '05021', 'cleburne': '05023', 'cleveland': '05025', 'columbia': '05027', 'conway': '05029', 'craighead': '05031', 'crawford': '05033', 'crittenden': '05035', 'cross': '05037', 'dallas': '05039', 'desha': '05041', 'drew': '05043', 'faulkner': '05045', 'franklin': '05047', 'fulton': '05049', 'garland': '05051', 'grant': '05053', 'greene': '05055', 'hempstead': '05057', 'hot spring': '05059', 'howard': '05061', 'independence': '05063', 'izard': '05065', 'jackson': '05067', 'jefferson': '05069', 'johnson': '05071', 'lafayette': '05073', 'lawrence': '05075', 'lee': '05077', 'lincoln': '05079', 'little river': '05081', 'logan': '05083', 'lonoke': '05085', 'madison': '05087', 'marion': '05089', 'miller': '05091', 'mississippi': '05093', 'monroe': '05095', 'montgomery': '05097', 'nevada': '05099

In [8]:
# Select working directories in case there you want to ignore or process specific dirs
to_process = ['Jefferson_County','Woodruff_County']
ignore     = ['Benton_County','Pulaski_County','Saline','Sebastian','Johnson_County', 'Lafayette_County', 'Lawrence_County', 'Lee_County', 'Lincoln_County', 'Little River_County', 'Logan_County', 'Lonoke_County', 'Madison_County', 'Marion_County', 'Miller_County', 'Mississippi_County', 'Monroe_County', 'Montgomery_County', 'Nevada_County', 'Newton_County', 'Ouachita_County', 'Perry_County', 'Phillips_County', 'Pike_County', 'Poinsett_County', 'Polk_County', 'Pope_County', 'Prairie_County', 'Pulaski_County', 'Randolph_County', 'Saline_County', 'Scott_County', 'Searcy_County', 'Sevier_County', 'Sharp_County', 'Stone_County', 'Union_County', 'Van Buren_County', 'Woodruff_County',"Sebastian_County","Faulkner_County","Washington_County", "Garland_County","White_County","Yell_County"]

# select one of the options:
# 1. get all subsiderectories from base dir
subdirectories   = [name for name in os.listdir(classi_dir_path) if os.path.isdir(os.path.join(classi_dir_path, name))]
# 2. get subdirectories  from base dir that are in the to_process list
#subdirectories   = [name for name in os.listdir(classi_dir_path) if os.path.isdir(os.path.join(classi_dir_path, name)) and name in to_process]
# 3. get subdirectories  from base dir that are not in the ignore list
#subdirectories   = [name for name in os.listdir(classi_dir_path) if os.path.isdir(os.path.join(classi_dir_path, name)) and name not in ignore]

print('County Subdirectories:',subdirectories)
print('Number of Counties:', len(subdirectories))

['Arkansas_County', 'Ashley_County', 'Baxter_County', 'Benton_County', 'Boone_County', 'Bradley_County', 'Calhoun_County', 'Carroll_County', 'Chicot_County', 'Clark_County', 'Clay_County', 'Cleburne_County', 'Cleveland_County', 'Columbia_County', 'Conway_County', 'Craighead_County', 'Crawford_County', 'Crittenden_County', 'Cross_County', 'Dallas_County', 'Desha_County', 'Drew_County', 'Faulkner_County', 'Franklin_County', 'Fulton_County', 'Garland_County', 'Grant_County', 'Greene_County', 'Hempstead_County', 'Hot Spring_County', 'Howard_County', 'Independence_County', 'Izard_County', 'Jackson_County', 'Jefferson_County', 'Johnson_County', 'Lafayette_County', 'Lawrence_County', 'Lee_County', 'Lincoln_County', 'Little River_County', 'Logan_County', 'Lonoke_County', 'Madison_County', 'Marion_County', 'Miller_County', 'Mississippi_County', 'Monroe_County', 'Montgomery_County', 'Nevada_County', 'Newton_County', 'Ouachita_County', 'Perry_County', 'Phillips_County', 'Pike_County', 'Poinsett_C

# Solar Panel Classification

In [None]:
# Display image inference
output_inference  = False

# Load the saved model
saved_model = torch.load(classi_model_path)

# Create a new model instance and load the saved model's state dictionary
model,_ = initialize_model(model_name, num_classes, feature_extract=False, use_pretrained=False)
model.load_state_dict(saved_model)

# Set the device for the model
model = model.to(device)

# Set the model to evaluation mode
model.eval()
    
# Perform solar panel classification by subdirectories
for subdirectory in tqdm(subdirectories, desc='Processing counties: '):  
#         print(subdirectory)
        tract_dir = classi_dir_path+f"\\{subdirectory}"
        tracts   = [name for name in os.listdir(tract_dir) if os.path.isdir(os.path.join(tract_dir, name))]
        for tract in tracts:
#             print("\t",tract)
            blkgps_dir = tract_dir + f"\\{tract}"
            blkgps   = [name for name in os.listdir(blkgps_dir) if os.path.isdir(os.path.join(blkgps_dir, name))]
            for blkgp in blkgps:
#                 print("\t\t",blkgp)
                #Create the input and output folder paths
                input_folder_path  = os.path.join(blkgps_dir, blkgp)
                output_folder_path = os.path.join(classi_output_path, f"{project_name}",subdirectory,tract,blkgp)
#                 print(input_folder_path)
#                 print(output_folder_path)
                #print("**Processing: " + subdirectory+" " + tract+" " + blkgp)
                infer(input_folder_path, output_folder_path, output_inference)

Processing counties:   0%|          | 0/74 [00:00<?, ?it/s]

Images:   0%|          | 0/1 [00:00<?, ?it/s]

Images:   0%|          | 0/1491 [00:00<?, ?it/s]

Images:   0%|          | 0/2 [00:00<?, ?it/s]

Images:   0%|          | 0/1825 [00:00<?, ?it/s]

Images:   0%|          | 0/525 [00:00<?, ?it/s]

Images:   0%|          | 0/863 [00:00<?, ?it/s]

Images:   0%|          | 0/817 [00:00<?, ?it/s]

Images:   0%|          | 0/402 [00:00<?, ?it/s]

Images:   0%|          | 0/621 [00:00<?, ?it/s]

Images:   0%|          | 0/917 [00:00<?, ?it/s]

Images:   0%|          | 0/359 [00:00<?, ?it/s]

Images:   0%|          | 0/1040 [00:00<?, ?it/s]

Images:   0%|          | 0/618 [00:00<?, ?it/s]

Images:   0%|          | 0/1083 [00:00<?, ?it/s]

Images:   0%|          | 0/1139 [00:00<?, ?it/s]

Images:   0%|          | 0/438 [00:00<?, ?it/s]

Images:   0%|          | 0/1 [00:00<?, ?it/s]

Images:   0%|          | 0/20 [00:00<?, ?it/s]

Images:   0%|          | 0/1 [00:00<?, ?it/s]

Images:   0%|          | 0/869 [00:00<?, ?it/s]

Images:   0%|          | 0/531 [00:00<?, ?it/s]

Images:   0%|          | 0/775 [00:00<?, ?it/s]

Images:   0%|          | 0/1123 [00:00<?, ?it/s]

Images:   0%|          | 0/398 [00:00<?, ?it/s]

Images:   0%|          | 0/467 [00:00<?, ?it/s]

Images:   0%|          | 0/1025 [00:00<?, ?it/s]

Images:   0%|          | 0/568 [00:00<?, ?it/s]

Images:   0%|          | 0/819 [00:00<?, ?it/s]

Images:   0%|          | 0/430 [00:00<?, ?it/s]

Images:   0%|          | 0/458 [00:00<?, ?it/s]

Images:   0%|          | 0/277 [00:00<?, ?it/s]

Images:   0%|          | 0/578 [00:00<?, ?it/s]

Images:   0%|          | 0/314 [00:00<?, ?it/s]

Images:   0%|          | 0/836 [00:00<?, ?it/s]

Images:   0%|          | 0/384 [00:00<?, ?it/s]

Images:   0%|          | 0/484 [00:00<?, ?it/s]

Images:   0%|          | 0/363 [00:00<?, ?it/s]

Images:   0%|          | 0/447 [00:00<?, ?it/s]

Images:   0%|          | 0/566 [00:00<?, ?it/s]

Images:   0%|          | 0/635 [00:00<?, ?it/s]

Images:   0%|          | 0/500 [00:00<?, ?it/s]

Images:   0%|          | 0/801 [00:00<?, ?it/s]

Images:   0%|          | 0/682 [00:00<?, ?it/s]

Images:   0%|          | 0/673 [00:00<?, ?it/s]

Images:   0%|          | 0/894 [00:00<?, ?it/s]

Images:   0%|          | 0/1073 [00:00<?, ?it/s]

Images:   0%|          | 0/665 [00:00<?, ?it/s]

Images:   0%|          | 0/914 [00:00<?, ?it/s]

Images:   0%|          | 0/998 [00:00<?, ?it/s]

Images:   0%|          | 0/718 [00:00<?, ?it/s]

Images:   0%|          | 0/1082 [00:00<?, ?it/s]

Images:   0%|          | 0/1095 [00:00<?, ?it/s]

Images:   0%|          | 0/862 [00:00<?, ?it/s]

Images:   0%|          | 0/1078 [00:00<?, ?it/s]

Images:   0%|          | 0/1241 [00:00<?, ?it/s]

Images:   0%|          | 0/1695 [00:00<?, ?it/s]

Images:   0%|          | 0/1330 [00:00<?, ?it/s]

Images:   0%|          | 0/611 [00:00<?, ?it/s]

Images:   0%|          | 0/868 [00:00<?, ?it/s]

Images:   0%|          | 0/782 [00:00<?, ?it/s]

Images:   0%|          | 0/1545 [00:00<?, ?it/s]

Images:   0%|          | 0/971 [00:00<?, ?it/s]

Images:   0%|          | 0/1544 [00:00<?, ?it/s]

Images:   0%|          | 0/793 [00:00<?, ?it/s]

Images:   0%|          | 0/1396 [00:00<?, ?it/s]

Images:   0%|          | 0/1158 [00:00<?, ?it/s]

Images:   0%|          | 0/549 [00:00<?, ?it/s]

Images:   0%|          | 0/923 [00:00<?, ?it/s]

Images:   0%|          | 0/1038 [00:00<?, ?it/s]

Images:   0%|          | 0/301 [00:00<?, ?it/s]

Images:   0%|          | 0/209 [00:00<?, ?it/s]

Images:   0%|          | 0/231 [00:00<?, ?it/s]

Images:   0%|          | 0/155 [00:00<?, ?it/s]

Images:   0%|          | 0/327 [00:00<?, ?it/s]

Images:   0%|          | 0/165 [00:00<?, ?it/s]

Images:   0%|          | 0/158 [00:00<?, ?it/s]

Images:   0%|          | 0/253 [00:00<?, ?it/s]

Images:   0%|          | 0/119 [00:00<?, ?it/s]

Images:   0%|          | 0/227 [00:00<?, ?it/s]

Images:   0%|          | 0/136 [00:00<?, ?it/s]

Images:   0%|          | 0/199 [00:00<?, ?it/s]

Images:   0%|          | 0/133 [00:00<?, ?it/s]

Images:   0%|          | 0/172 [00:00<?, ?it/s]

Images:   0%|          | 0/106 [00:00<?, ?it/s]

Images:   0%|          | 0/201 [00:00<?, ?it/s]

Images:   0%|          | 0/195 [00:00<?, ?it/s]

Images:   0%|          | 0/123 [00:00<?, ?it/s]

Images:   0%|          | 0/128 [00:00<?, ?it/s]

Images:   0%|          | 0/102 [00:00<?, ?it/s]

Images:   0%|          | 0/94 [00:00<?, ?it/s]

Images:   0%|          | 0/91 [00:00<?, ?it/s]

# Solar Panel Segmentation

In [16]:
# Move to pred file folder
!cd {pred_file_path}

# Initialize subdirectories
subdirectories   = [name for name in os.listdir(os.path.join(classi_output_path,project_name)) if os.path.isdir(os.path.join(classi_output_path,project_name, name))]
#subdirectories   = ['Arkansas_County']
print(subdirectories)

# Debug mode (0)
debug = 0

# Prediction file directory
predictions_file = pred_file_path+"\\"+seg_result_file
if os.path.exists(predictions_file):
    os.remove(predictions_file)

# Load a custom model
model = YOLO(seg_model_path)  

# Varibles for tracking the total number per blkgp 
blkgp_sp_count    = 0
blkgp_numb_houses = 0
blkgp_sp_area     = 0

# Perform solar panel segmentation by the subdirectory level
for subdirectory in tqdm(subdirectories, desc='Processing counties: '):   
    #print(subdirectory)
    tract_dir = os.path.join(classi_output_path,project_name,subdirectory)
    tracts   = [name for name in os.listdir(tract_dir) if os.path.isdir(os.path.join(tract_dir, name))]

    for tract in tracts:
        #print("\t",tract)
        blkgps_dir = tract_dir + f"\\{tract}"
        blkgps   = [name for name in os.listdir(blkgps_dir) if os.path.isdir(os.path.join(blkgps_dir, name))]
        for blkgp in blkgps:            
            if os.listdir(blkgps_dir+f"\\{blkgp}"):
                #reset
                blkgp_sp_count      = 0
                blkgp_numb_houses   = 0
                blkgp_sp_area       = 0
                total_num_of_houses = 0
                #print("\t\t",blkgp)

                #Create the input and output folder paths
                input_img_folder_path  = os.path.join(classi_dir_path,subdirectory,tract, blkgp)
                #print(classi_dir_path)
                #print(subdirectory)
                #print(tract)
                #print(blkgp)

                dir_items = os.listdir(input_img_folder_path)
                blkgp_total_houses = len(dir_items)
                #print(blkgp_total_houses)

                input_folder_path  = os.path.join(classi_output_path, project_name, subdirectory, tract, blkgp)
                #print("\t\t**Processing: ",blkgp_total_houses, subdirectory, tract, blkgp)
                #print(input_folder_path)

                results = model.predict(source=input_folder_path, verbose=False)
                for result in results:
                    if(result.masks!=None):
                        blkgp_numb_houses+=1
                        for mask in result.masks.masks:
                            blkgp_sp_count+=1
                            mask_np = mask.cpu().numpy()
                            # calculate area of mask
                            mask_area = (mask > 0).sum() * pixel_area
                            blkgp_sp_area +=mask_area
                            if (debug==1):
                                print(result.masks.orig_shape)
                                print("(mask_np > 0).sum():",(mask > 0).sum())
                                print(mask)
                                print(mask.size())
                                plt.imshow(mask_np, cmap='gray')
                                plt.show()
                                print(f"Area of the mask: {mask_area:.2f} square meters\n")  

            num_tract = (tract.replace("_CensusTract",""))
            num_blkgp = (blkgp.replace("_BlockGroup",""))
            county_code = county_numbers[subdirectory.replace("_County","").lower()]
            with open(predictions_file, 'a') as output_file:
                output_file.write(f"{county_code}-{num_tract}-{num_blkgp},{blkgp_total_houses} ,{blkgp_numb_houses}, {blkgp_sp_count}\n")
            #print(f"{county_code}-{num_tract}-{num_blkgp}, {blkgp_numb_houses}, {blkgp_sp_count}\n")

['Arkansas_County', 'Ashley_County', 'Baxter_County', 'Boone_County', 'Bradley_County', 'Calhoun_County', 'Carroll_County', 'Chicot_County', 'Clark_County', 'Clay_County', 'Cleburne_County', 'Cleveland_County', 'Columbia_County', 'Conway_County', 'Craighead_County', 'Crawford_County', 'Crittenden_County', 'Cross_County', 'Dallas_County', 'Desha_County', 'Drew_County', 'Franklin_County', 'Fulton_County', 'Grant_County', 'Greene_County', 'Hempstead_County', 'Hot Spring_County', 'Howard_County', 'Independence_County', 'Izard_County', 'Jackson_County', 'Jefferson_County', 'Johnson_County', 'Lafayette_County', 'Lawrence_County', 'Lee_County', 'Lincoln_County', 'Little River_County', 'Logan_County', 'Lonoke_County', 'Madison_County', 'Marion_County', 'Miller_County', 'Mississippi_County', 'Monroe_County', 'Montgomery_County', 'Nevada_County', 'Newton_County', 'Ouachita_County', 'Perry_County', 'Phillips_County', 'Pike_County', 'Poinsett_County', 'Polk_County', 'Pope_County', 'Prairie_County'

  0%|          | 0/66 [00:00<?, ?it/s]Ultralytics YOLOv8.0.20  Python-3.10.10 torch-1.12.1 CUDA:0 (NVIDIA GeForce RTX 3050 Ti Laptop GPU, 4096MiB)
YOLOv8x-seg summary (fused): 295 layers, 71721619 parameters, 0 gradients, 343.7 GFLOPs
100%|██████████| 66/66 [06:49<00:00,  6.20s/it]
