This is a script for deploying the pre-trained Object Detection model to detect communal water points in your downloaded dataset of Google Street View imagery. You will need the folllowing:

*  Roboflow API key: ([Tutorial](https://docs.roboflow.com/api-reference/authentication))
* Downloaded Google Street View images with metadata file from desired geographic area ([Tutorial](https://github.com/neildpatel/waterpointCNN/blob/main/ImageDownload_GSV.ipynb))

This script was partially adapted from a tutorial developed by researchers at MIT's Senseable City Lab (Director: Professor Carlo Ratti) for the 11.320 Digital City Design Lab course in Spring 2023. See https://senseable.mit.edu/ to learn more about the lab's work. Special gratitude to Prof. Carlo Ratti, [Claire Gorman](https://www.linkedin.com/in/claire-gorman-5465b7167/), [Rohit Sanatani](https://www.linkedin.com/in/rohit-priyadarshi-sanatani-14311595/), and [Nikita Klimenko](https://www.linkedin.com/in/nikita-klimenko/) for their support in developing this project.

## Setup


In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#@title Imports and function definitions

# For running inference on the TF-Hub module.
import tensorflow as tf
import tarfile
import os
from os import listdir
import json

# For running inference using Roboflow
!pip install roboflow
from roboflow import Roboflow

# For downloading the image.
import matplotlib.pyplot as plt
import tempfile
from six.moves.urllib.request import urlopen
from six import BytesIO

# For drawing onto the image.
import numpy as np
import pandas as pd
from PIL import Image
from PIL import ImageColor
from PIL import ImageDraw
from PIL import ImageFont
from PIL import ImageOps

# For measuring the inference time.
import time

# For data visualization
import geopandas
import folium
import io
import branca.colormap as cm

# Print Tensorflow version
print(tf.__version__)

# Check available GPU devices.
print("The following GPU devices are available: %s" % tf.test.gpu_device_name())

In [None]:
# Load object detection model from Roboflow
rf = Roboflow(api_key='xxx') # Replace 'xxx' with your Roboflow API key
project = rf.workspace("neil-patel").project("lagos-water-point-detections")
dataset = project.version(3).download("yolov5")

In [None]:
import os, sys
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
# Set a path to the Google Drive folder with your downloaded Google Street View images
YOUR_PATH = 'xxx'
os.chdir(YOUR_PATH)

## Apply module

RUN ON DOWNLOADED STREET VIEW DATA

In [None]:
#Load Data
savename = 'detections.json'
image_directory = YOUR_PATH
data_file = "filepath" # Change 'filepath' to the file path for the metadata.csv file from your Google Street View downloads
pano_df = pd.read_csv(data_file, index_col = 0, header=0)
pano_df.head()
pano_df.info()

In [None]:
model = project.version("3").model

In [None]:
#Print sample detections for the first 100 images
import json
for i in range(100):
    panoid = pano_df.iloc[i]['panoID']
    lat = pano_df.iloc[i]['lat']
    lon = pano_df.iloc[i]['lon']
    print(i,'processing panoID:', panoid)

    for heading in [0,90,180,270]:
        filename = panoid+"_"+str(heading)+".jpg"
        img_path = image_directory + filename
        try:
            prediction = model.predict(img_path, confidence=80, overlap=30)
            prediction.plot()
        except Exception as ex:
            print('Error', ex)

In [None]:
#Deploy object detection model on the entire GSV image dataset

detections = []
for i in range(len(pano_df)):
    panoid = pano_df.iloc[i]['panoID']
    lat = pano_df.iloc[i]['lat']
    lon = pano_df.iloc[i]['lon']
    print(i,'processing panoID:', panoid)

    for heading in [0,90,180,270]:
        filename = panoid+"_"+str(heading)+".jpg"
        img_path = image_directory + filename
        try:
            prediction = model.predict(img_path, confidence=80, overlap=30) # Establishing a minimum confidence threshold of 80 percent. Change as needed.
            prediction_json = json.dumps(prediction.json())
            classes = json.loads(prediction_json)['predictions'][0]['class']
            confidence = json.loads(prediction_json)['predictions'][0]['confidence']
            detections.append([panoid, lat, lon, filename, classes, confidence])
            print("Detection added for image:", filename)
        except Exception as ex:
            print('Error', ex)

In [None]:
# Compile a dictionary of the detections including panoID, latitute, longitude, filename, classes, and confidence level

detection_dicts = [{'panoID':detection[0],'lat':detection[1],'lon':detection[2],'filename':detection[3],'classes':detection[4],'confidence':detection[5]} for detection in detections]
print("Detection dict created:", detection_dicts)
with open(savename, "w") as json_file:
    json.dump(detection_dicts, json_file)

detection_dicts

VISUALIZE DETECTIONS

In [None]:
# Load the detections from a json file into a pandas dataframe
detections_df = pd.read_json(savename)

# Replace "filepath" below as appropriate to establish where to save the detections in a .csv file format
detections_df.to_csv("filepath/detections.csv")

In [None]:
# Establish visual features of the map of the detected public water points
zoom_level = 15
center = [pano_df['lat'].mean(), pano_df['lon'].mean()]
theme = 'CartoDB positron'
color = 'green'

In [None]:
# Map the newly-detected public water point sites
map1 = folium.Map(location=center,tiles=theme,zoom_start=15)

for index, row in detections_df.iterrows():
    folium.CircleMarker([row['lat'], row['lon']],radius=.01,fill=True, color=color,opacity=1, fill_color=color,fill_opacity=1).add_to(map1)

map1

MANUAL VALIDATION OF MODEL PERFORMANCE

In [None]:
# Load the 'detections.csv' file listed above, replacing "filepath" below as appropriate
manval_df = pd.read_csv("filepath to detections.csv", index_col = 0, header=0)
manval_df.head()

In [None]:
# Print out all detections to manually validate for false positives or false negatives
for i in range(100):
    panoid = manval_df.iloc[i]['panoID']
    lat = manval_df.iloc[i]['lat']
    lon = manval_df.iloc[i]['lon']
    print(i,'processing panoID:', panoid)

    for heading in [0,90,180,270]:
        filename = panoid+"_"+str(heading)+".jpg"
        img_path = image_directory + filename
        try:
            prediction = model.predict(img_path, confidence=80, overlap=30)
            prediction.plot()
        except Exception as ex:
            print('Error', ex)