In [1]:
import cv2
import numpy as np
import geopandas as gpd
from shapely.geometry import Polygon
import json
import os
from skimage.feature import local_binary_pattern
from skimage.feature import graycomatrix,graycoprops
from skimage.measure import regionprops
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score

In [2]:
# Load the xBD dataset images
def load_images(image_path,image_filenames):
    images = []
    for filename in image_filenames:
        image = cv2.imread(image_path + filename)  # Load image using OpenCV
        images.append(image)
    return images

In [3]:
# Load the corresponding GeoJSON files and extract the building polygons
def load_gis_data(geojson_path):
        geojson_filenames = os.listdir(geojson_path)  # Load GeoJSON filenames
    polygons = []
    for filename in geojson_filenames:
        with open(geojson_path + filename) as file:
            data = json.load(file)
            features = data['features']['lng_lat']  # Access the 'lng_lat' array
            for feature in features:
                if 'wkt' in feature:
                    wkt_string = feature['wkt']
                    polygon = wkt.loads(wkt_string)
                    polygons.append(polygon)
    gis_data = gpd.GeoDataFrame(geometry=polygons)
    #print(gis_data)
    return gis_data

In [4]:
def preprocess_image(image):
    # Convert image to grayscale
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Convert the image to 8-bit unsigned integer format
    normalized_image = cv2.normalize(gray_image, None, 0, 255, cv2.NORM_MINMAX, cv2.CV_8U)

    # Resize the image to a desired size
    resized_image = cv2.resize(normalized_image, (256, 256))

    # Apply Gaussian smoothing to reduce noise
    smoothed_image = cv2.GaussianBlur(resized_image, (5, 5), 0)

    return smoothed_image

In [5]:
def extract_texture_features(image):
    # Extract Local Binary Patterns (LBP) texture features
    lbp_image = local_binary_pattern(image, P=8, R=1, method='uniform')
    hist, _ = np.histogram(lbp_image.ravel(), bins=np.arange(0, 10), density=True)
    return hist

In [6]:
def extract_shape_features(image):
    # Check if the image is already single-channel (grayscale)
    if len(image.shape) == 2 or (len(image.shape) == 3 and image.shape[2] == 1):
        gray_image = image
    else:
        # Convert the image to grayscale
        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Convert the image to 8-bit unsigned integer format
    binary_image = cv2.normalize(gray_image, None, 0, 255, cv2.NORM_MINMAX, cv2.CV_8U)

    # Find contours in the binary image
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Extract the number of contours or other shape-related features of interest
    shape_features = len(contours)

    return shape_features

In [7]:
def extract_color_features(image):
    # Convert the image to the BGR format if it's not already
    if len(image.shape) < 3 or image.shape[2] < 3:
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
    elif image.shape[2] > 3:
        image = image[:, :, :3]

    # Extract color histogram features
    hist = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()

    return hist

In [8]:
def image_overlaps_with_damaged_area(image_filename, gis_data):
    # Load the corresponding GeoJSON file for the image
    geojson_filename = image_filename.replace('.png', '.json')
    geojson_filepath = os.path.join(geojson_path, geojson_filename)
    
    # Read the GeoJSON file and extract the polygons
    with open(geojson_filepath) as file:
        data = json.load(file)
        features = data['features']
        
        for feature in features:
            if 'geometry' in feature and 'coordinates' in feature['geometry']:
                geometry = feature['geometry']
                
                if geometry and geometry['type'] == 'Polygon':
                    coordinates = geometry['coordinates'][0]
                    polygon = Polygon(coordinates)
                    
                    # Check if the image overlaps with the polygon
                    if polygon.intersects(gis_data.geometry):
                        return True
    
    return False

In [8]:
# Specify the paths to the xBD dataset images and corresponding GeoJSON files
image_path = 'train/images/'
geojson_path = 'train/labels/'

In [9]:
# Example usage
image_filenames = os.listdir(image_path)
images = load_images(image_path,image_filenames)
gis_data = load_gis_data(geojson_path )

In [10]:
# Assuming you have a list of image paths in 'image_paths' variable

features = []

for filename in image_filenames:
    image = cv2.imread(image_path + filename)
    preprocessed_image = preprocess_image(image)

    texture_features = extract_texture_features(preprocessed_image)
    shape_features = extract_shape_features(preprocessed_image)
    color_features = extract_color_features(preprocessed_image)

    # Convert shape_features to a one-dimensional array
    shape_features = np.array([shape_features])

    # Combine all the features into a single feature vector
    feature_vector = np.concatenate((texture_features, shape_features, color_features))

    features.append(feature_vector)

# Now 'features' list contains the extracted features for all the images in the dataset


In [21]:
# Prepare the training data
labels = []  # List to store the labels for damaged areas
# Assign labels based on overlap between images and polygons
for image_filename in image_filenames:
    # Check if the image overlaps with any polygons
    # Assign a label based on the overlap
    if image_overlaps_with_damaged_area(image_filename, gis_data):
        labels.append(1)  # Damaged area
    else:
        labels.append(0)  # Not damaged area

# Convert the features and labels to NumPy arrays
X = np.array(features)
y = np.array(labels)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Choose and initialize the machine learning algorithm
model = RandomForestClassifier(n_estimators=100)

# Train the model
model.fit(X_train, y_train)

# Predict labels for the testing set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Accuracy: 1.0
Precision: 0.0
Recall: 0.0
F1-score: 0.0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
