In [37]:
import numpy as np
import pandas as pd
import os
import json
from pathlib import Path
import plotly.express as px
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from scipy.spatial import ConvexHull
from scipy import interpolate
from sklearn.cluster import DBSCAN
from sklearn import metrics
import math

def getCentroid(xcoor, ycoor):
    return float(sum(xcoor)/len(xcoor)), float(sum(ycoor)/len(ycoor))

def reproject(latitude, longitude):
    """Returns the x & y coordinates in meters using a sinusoidal projection"""
    from math import pi, cos, radians
    earth_radius = 6371009 # in meters
    lat_dist = pi * earth_radius / 180.0

    y = [lat * lat_dist for lat in latitude]
    x = [long * lat_dist * cos(radians(lat)) 
                for lat, long in zip(latitude, longitude)]
    return x, y

def area_of_polygon(x, y):
    """Calculates the area of an arbitrary polygon given its verticies"""
    area = 0.0
    for i in range(-1, len(x)-1):
        area += x[i] * (y[i+1] - y[i-1])
    return abs(area) / 2.0

def getData(path):
    f = open(path)
    data = json.load(f)
    shelterCount = len(data['features']['lng_lat'])
    xcentroids = []
    ycentroids = []
    damageStatus = []
    featureTypes = []
    shelterArea = []
    for i in range(shelterCount):
        # Code for every shelter
        coordinates = data['features']['lng_lat'][i]['wkt'][10:-2]
        coordinates = coordinates.split(sep = ',')
        coordinates = coordinates[:-1]
        xcoor = []
        ycoor = []
        for points in coordinates:
            x, y = points.split()
            xcoor.append(float(x))
            ycoor.append(float(y))
        x, y = getCentroid(xcoor, ycoor)
        x1, y1 = reproject(xcoor, ycoor)
        shelterArea.append(area_of_polygon(x1, y1))
        xcentroids.append(x)
        ycentroids.append(y)
        damageStatus.append(data['features']['lng_lat'][i]['properties']['subtype'])
        featureTypes.append(data['features']['lng_lat'][i]['properties']['feature_type'])
    return xcentroids, ycentroids, damageStatus, featureTypes, [path[41:]]*shelterCount, shelterArea

dictX = {}
final_xcentroids = []
final_ycentroids = []
final_damageStatus = []
final_featureTypes = []
final_shelterArea = []
final_pathList = []
files = os.listdir('train_images_labels_targets/train/labels')
for i in range(len(files)):
    path = 'train_images_labels_targets/train/labels/' + str(files[i])
    if files[i].endswith("post_disaster.json"):
        xcentroids, ycentroids, damageStatus, featureTypes, pathList, shelterArea = getData(path)
        final_xcentroids.extend(xcentroids)
        final_ycentroids.extend(ycentroids)
        final_damageStatus.extend(damageStatus)
        final_featureTypes.extend(featureTypes)
        final_pathList.extend(pathList)
        final_shelterArea.extend(shelterArea)
dictX['xCentroid'] = final_xcentroids
dictX['yCentroid'] = final_ycentroids
dictX['damageStatus'] = final_damageStatus
dictX['featureTypes'] = final_featureTypes
dictX['File Name'] = final_pathList
dictX['Shelter Area'] = final_shelterArea
disasterName = []
for path in final_pathList:
    disasterName.append(path[:-28])
dictX['Disaster Name'] = disasterName
df = pd.DataFrame(dictX)
df

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'train_images_labels_targets/train/labels'

In [18]:
df['Disaster Name'].unique()

NameError: name 'df' is not defined

In [19]:
def getPlots(disaster_name):
   df[df['Disaster Name'] == disaster_name]
   df1 = df[df['Disaster Name'] == disaster_name]
   df_no_damage = df1[df1['damageStatus'] == 'no-damage']
   df_minor_damage = df1[df1['damageStatus'] == 'minor-damage']
   df_major_damage = df1[df1['damageStatus'] == 'major-damage']
   df_destroyed = df1[df1['damageStatus'] == 'destroyed']
   df_un_classified = df1[df1['damageStatus'] == 'un-classified']
   
   plt.figure(figsize=(15,15))
   plt.plot(df_no_damage['xCentroid'], df_no_damage['yCentroid'], color = 'gray', ls = '', marker = '.', markersize = 2, label = 'no-damage')
   plt.plot(df_minor_damage['xCentroid'], df_minor_damage['yCentroid'], color = 'blue', ls = '', marker = '^', markersize = 3,label = 'minor-damage')
   plt.plot(df_major_damage['xCentroid'], df_major_damage['yCentroid'], color = 'orange', ls = '', marker = 'd', markersize = 4,label = 'major-damage')
   plt.plot(df_destroyed['xCentroid'], df_destroyed['yCentroid'], color = 'red', ls = '', marker = 'o', markersize = 5, label = 'destroyed')
   # plt.plot(df_un_classified['xCentroid'], df_un_classified['yCentroid'], color = 'black', label = 'un-classified')
   plt.title(disaster_name, fontsize = 25)
   plt.xlabel('Longitude', fontsize = 15)
   plt.ylabel('Latitude',fontsize = 15)
   plt.xticks(fontsize = 15, rotation = 30)
   plt.yticks(fontsize = 15, rotation = 30)
   plt.legend()
   # plt.plot()
   # color = 'blue', lw = 2, ls = '-', marker = 'o', markersize = 10, markerfacecolor = 'yellow', markeredgewidth = 3
   plt.savefig(disaster_name)

In [20]:
for disaster in list(df['Disaster Name'].unique()):
    getPlots(disaster)

NameError: name 'df' is not defined

# KMeans Clustering

In [21]:
df_GV = df[df['Disaster Name'] == 'guatemala-volcano'].reset_index()
df_HF = df[df['Disaster Name'] == 'hurricane-florence'].reset_index()
df_HH = df[df['Disaster Name'] == 'hurricane-harvey'].reset_index()
df_HMa = df[df['Disaster Name'] == 'hurricane-matthew'].reset_index()
df_HMi = df[df['Disaster Name'] == 'hurricane-michael'].reset_index()
df_ME = df[df['Disaster Name'] == 'mexico-earthquake'].reset_index()
df_MF = df[df['Disaster Name'] == 'midwest-flooding'].reset_index()
df_PT = df[df['Disaster Name'] == 'palu-tsunami'].reset_index()
df_SRW = df[df['Disaster Name'] == 'santa-rosa-wildfire'].reset_index()
df_SF = df[df['Disaster Name'] == 'socal-fire'].reset_index()

NameError: name 'df' is not defined

In [22]:
for disaster_name in df['Disaster Name'].unique():
    x = []
    y = []
    status = []
    shelterArea = []
    df1 = df[df['Disaster Name'] == disaster_name]

    df_destroyed = df1[df1['damageStatus'] == 'destroyed']
    x.extend(list(df_destroyed['xCentroid']))
    y.extend(list(df_destroyed['yCentroid']))
    status.extend(list(df_destroyed['damageStatus']))
    shelterArea.extend(list(df_destroyed['Shelter Area']))

    df_major_damage = df1[df1['damageStatus'] == 'major-damage']
    x.extend(list(df_major_damage['xCentroid']))
    y.extend(list(df_major_damage['yCentroid']))
    status.extend(list(df_major_damage['damageStatus']))
    shelterArea.extend(list(df_major_damage['Shelter Area']))

    df_minor_damage = df1[df1['damageStatus'] == 'minor-damage']
    x.extend(list(df_minor_damage['xCentroid']))
    y.extend(list(df_minor_damage['yCentroid']))
    status.extend(list(df_minor_damage['damageStatus']))
    shelterArea.extend(list(df_minor_damage['Shelter Area']))

    df_no_damage = df1[df1['damageStatus'] == 'no-damage']
    x.extend(list(df_no_damage['xCentroid']))
    y.extend(list(df_no_damage['yCentroid']))
    status.extend(list(df_no_damage['damageStatus']))
    shelterArea.extend(list(df_no_damage['Shelter Area']))

    # df_un_classified = df1[df1['damageStatus'] == 'un-classified']
    # x.extend(list(df_un_classified['xCentroid']))
    # y.extend(list(df_un_classified['yCentroid']))
    # status.extend(list(df_un_classified['damageStatus']))
    # shelterArea.extend(list(df_un_classified['Shelter Area']))
    
    # count = len(df_destroyed) + len(df_major_damage) + len(df_minor_damage)
    
    dictX = {'xCentroid' : x, 'yCentroid' : y, 'damageStatus' : status, 'Shelter Area' : shelterArea}
    df2 = pd.DataFrame(dictX)
    
    kmeans = KMeans(n_clusters = 5, random_state = 0)
    df2['cluster'] = kmeans.fit_predict(df2[['xCentroid', 'yCentroid']])
    # get centroids
    centroids = kmeans.cluster_centers_
    # cen_x and cen_y indicates coordinate of cluster center 
    cen_x = [i[0] for i in centroids] 
    cen_y = [i[1] for i in centroids]
    ## add to df2
    df2['cen_x'] = df2.cluster.map({0:cen_x[0], 1:cen_x[1], 2:cen_x[2]})
    df2['cen_y'] = df2.cluster.map({0:cen_y[0], 1:cen_y[1], 2:cen_y[2]})
    # define and map colors
    colors = ['red', 'orange', 'blue', 'green', 'black']
    df2['c'] = df2.damageStatus.map({"destroyed":colors[0], "major-damage":colors[1], "minor-damage":colors[2], "no-damage":colors[3], "un-classified":colors[4]})
    
    
    fig, ax = plt.subplots(1, figsize=(8,8))
    # plt.scatter(df2.xCentroid, df2.yCentroid, c=df2.c, alpha = 0.6, s=10)

    # plt.plot(df_un_classified['xCentroid'], df_un_classified['yCentroid'], color = 'black', label = 'un-classified')
    plt.plot(df_no_damage['xCentroid'], df_no_damage['yCentroid'], color = 'gray', ls = '', marker = '.', markersize = 2, label = 'no-damage')
    plt.plot(df_minor_damage['xCentroid'], df_minor_damage['yCentroid'], color = 'blue', ls = '', marker = '^', markersize = 3,label = 'minor-damage')
    plt.plot(df_major_damage['xCentroid'], df_major_damage['yCentroid'], color = 'orange', ls = '', marker = 'd', markersize = 4,label = 'major-damage')
    plt.plot(df_destroyed['xCentroid'], df_destroyed['yCentroid'], color = 'red', ls = '', marker = 'o', markersize = 5, label = 'destroyed')
    plt.title(disaster_name, fontsize = 25)
    plt.xlabel('Longitude', fontsize = 15)
    plt.ylabel('Latitude',fontsize = 15)
    plt.xticks(fontsize = 15, rotation = 30)
    plt.yticks(fontsize = 15, rotation = 30)
    plt.legend()

    cluster_centerX = []
    cluster_centerY = []
    for cluster in kmeans.cluster_centers_:
        cluster_centerX.append(cluster[0])
        cluster_centerY.append(cluster[1])
    plt.plot(cluster_centerX, cluster_centerY, color = 'purple', lw = 2, ls = '', marker = '+', markersize = 30, markerfacecolor = 'yellow', markeredgewidth = 3)
    # plt.scatter(cen_x, cen_y, marker='^', c=colors, s=70)
    
    # Disaster Statistics
    print('Disaster Name: ', disaster_name)
    print(df2['damageStatus'].value_counts())
    print('Total Shelter Area = ', sum(df2['Shelter Area']))
    df_shelterAreaLessThan150 = df2[df2['Shelter Area'] < 150]
    df_temp_destroyed = df_shelterAreaLessThan150[df_shelterAreaLessThan150['damageStatus'] == 'destroyed']
    df_temp_major_damage = df_shelterAreaLessThan150[df_shelterAreaLessThan150['damageStatus'] == 'major-damage']
    df_temp_minor_damage = df_shelterAreaLessThan150[df_shelterAreaLessThan150['damageStatus'] == 'minor-damage']
    df_temp_no_damage = df_shelterAreaLessThan150[df_shelterAreaLessThan150['damageStatus'] == 'no-damage']
    print('Total Shelter Area of shelters which are less than 150 square meteres = ', sum(df_shelterAreaLessThan150['Shelter Area']))
    print('Destroyed Shelter Area of shelters which are less than 150 square meteres = ', sum(df_temp_destroyed[df_temp_destroyed['Shelter Area'] < 150]['Shelter Area']))
    print('Major Damage Shelter Area of shelters which are less than 150 square meteres = ', sum(df_temp_major_damage[df_temp_major_damage['Shelter Area'] < 150]['Shelter Area']))
    print('Minor Damage Shelter Area of shelters which are less than 150 square meteres = ', sum(df_temp_minor_damage[df_temp_minor_damage['Shelter Area'] < 150]['Shelter Area']))
    print('No Damage Shelter Area of shelters which are less than 150 square meteres = ', sum(df_temp_no_damage[df_temp_no_damage['Shelter Area'] < 150]['Shelter Area']))
    df_shelterAreaMoreThan150 = df2[df2['Shelter Area'] >= 150]
    print('Total Shelter Area of shelters which are greater than or equal to 150 square meteres = ', sum(df_shelterAreaMoreThan150['Shelter Area']))
    print('Population Estimate = ', int(len(df_shelterAreaLessThan150) * 4.8))
    print('Estimated number of people affected because of destroyed shelters = ', int(len(df_temp_destroyed[df_temp_destroyed['Shelter Area'] < 150]) * 4.8))
    print('Estimated number of people affected because of major-damage shelters = ', int(len(df_temp_major_damage[df_temp_major_damage['Shelter Area'] < 150]) * 4.8))
    print('Estimated number of people affected because of minor-damage shelters = ', int(len(df_temp_minor_damage[df_temp_minor_damage['Shelter Area'] < 150]) * 4.8))
    print('Estimated number of people affected because of no-damage shelters = ', int(len(df_temp_no_damage[df_temp_no_damage['Shelter Area'] < 150]) * 4.8))
    print()
    
    area = []
    perimeter = []
    for i in df2.cluster.unique():
        # Cluster Statistics
        print('Disaster Name: ', disaster_name, ' - Cluster Label = ', i)
        print('Number of shelters in cluster ', i, ' = ', len(df2[df2['cluster'] == i]))
        print(df2[df2['cluster'] == i]['damageStatus'].value_counts())
        # Shelter Area
        print('Total Shelter Area of shelters in cluster ', i, ' = ', sum(df2[df2['cluster'] == i]['Shelter Area']))
        df_cluster = df2[df2['cluster'] == i]
        df_cluster_destroyed = df_cluster[df_cluster['damageStatus'] == 'destroyed']
        df_cluster_major_damage = df_cluster[df_cluster['damageStatus'] == 'major-damage']
        df_cluster_minor_damage = df_cluster[df_cluster['damageStatus'] == 'minor-damage']
        df_cluster_no_damage = df_cluster[df_cluster['damageStatus'] == 'no-damage']
        print('Total Shelter Area of shelters in cluster ', i, ' which are less than 150 square meteres = ', sum(df_cluster[df_cluster['Shelter Area'] < 150]['Shelter Area']))
        print('Total Shelter Area of shelters in cluster ', i, ' which are greater than or equal to 150 square meteres = ', sum(df_cluster[df_cluster['Shelter Area'] > 150]['Shelter Area']))
        print('Destroyed Shelter Area = ',sum(df_cluster_destroyed['Shelter Area']))
        print('Destroyed Shelter Area of shelters which are less than 150 square meteres = ', sum(df_cluster_destroyed[df_cluster_destroyed['Shelter Area'] < 150]['Shelter Area']))
        print('Major Damage Shelter Area of shelters which are less than 150 square meteres = ', sum(df_cluster_major_damage[df_cluster_major_damage['Shelter Area'] < 150]['Shelter Area']))
        print('Minor Damage Shelter Area of shelters which are less than 150 square meteres = ', sum(df_cluster_minor_damage[df_cluster_minor_damage['Shelter Area'] < 150]['Shelter Area']))
        print('No Damage Shelter Area of shelters which are less than 150 square meteres = ', sum(df_cluster_no_damage[df_cluster_no_damage['Shelter Area'] < 150]['Shelter Area']))
        
        # Population Estimate
        print('Population Estimate in cluster', i, ' = ', int(len(df2[df2['cluster'] == i]) * 4.8))
        print('Estimated number of people affected because of destroyed shelters = ', int(len(df_cluster_destroyed[df_cluster_destroyed['Shelter Area'] < 150]) * 4.8))
        print('Estimated number of people affected because of major-damage shelters = ', int(len(df_cluster_major_damage[df_cluster_major_damage['Shelter Area'] < 150]) * 4.8))
        print('Estimated number of people affected because of minor-damage shelters = ', int(len(df_cluster_minor_damage[df_cluster_minor_damage['Shelter Area'] < 150]) * 4.8))
        print('Estimated number of people affected because of no-damage shelters = ', int(len(df_cluster_no_damage[df_cluster_no_damage['Shelter Area'] < 150]) * 4.8))
        print()
        # get the convex hull
        points = df2[df2.cluster == i][['xCentroid', 'yCentroid']].values
        hull = ConvexHull(points)
        area.append(hull.volume)
        perimeter.append(hull.area)
        x_hull = np.append(points[hull.vertices,0],
                           points[hull.vertices,0][0])
        y_hull = np.append(points[hull.vertices,1],
                           points[hull.vertices,1][0])
        # interpolate
        dist = np.sqrt((x_hull[:-1] - x_hull[1:])**2 + (y_hull[:-1] - y_hull[1:])**2)
        dist_along = np.concatenate(([0], dist.cumsum()))
        spline, u = interpolate.splprep([x_hull, y_hull], 
                                        u=dist_along, s=0, per=1)
        interp_d = np.linspace(dist_along[0], dist_along[-1], 50)
        interp_x, interp_y = interpolate.splev(interp_d, spline)
        # plot shape
        plt.fill(interp_x, interp_y, '--', c=colors[i], alpha=0.2)
    print('----------------------------------------------------------')
    # plt.xlim(-200,200)
    # plt.ylim(-200,200)
    plt.savefig('KMeans 5 Clusters ' + disaster_name)

NameError: name 'df' is not defined

In [23]:
# db = DBSCAN(eps = 0.1, min_samples = 5).fit(df_HMa[['xCentroid', 'yCentroid']])
# labels = db.labels_
# df_HMa['cluster'] = labels

# # Number of clusters in labels, ignoring noise if present.
# n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
# n_noise_ = list(labels).count(-1)

# print("Estimated number of clusters: %d" % n_clusters_)
# print("Estimated number of noise points: %d" % n_noise_)

In [24]:
def getDBSCANclusters(disaster_name,eps):
    X = []
    x = []
    y = []
    status = []
    shelterArea = []
    df1 = df[df['Disaster Name'] == disaster_name]

    df_destroyed = df1[df1['damageStatus'] == 'destroyed']
    x.extend(list(df_destroyed['xCentroid']))
    y.extend(list(df_destroyed['yCentroid']))
    status.extend(list(df_destroyed['damageStatus']))
    shelterArea.extend(list(df_destroyed['Shelter Area']))

    df_major_damage = df1[df1['damageStatus'] == 'major-damage']
    x.extend(list(df_major_damage['xCentroid']))
    y.extend(list(df_major_damage['yCentroid']))
    status.extend(list(df_major_damage['damageStatus']))
    shelterArea.extend(list(df_major_damage['Shelter Area']))

    df_minor_damage = df1[df1['damageStatus'] == 'minor-damage']
    x.extend(list(df_minor_damage['xCentroid']))
    y.extend(list(df_minor_damage['yCentroid']))
    status.extend(list(df_minor_damage['damageStatus']))
    shelterArea.extend(list(df_minor_damage['Shelter Area']))

    df_no_damage = df1[df1['damageStatus'] == 'no-damage']
    x.extend(list(df_no_damage['xCentroid']))
    y.extend(list(df_no_damage['yCentroid']))
    status.extend(list(df_no_damage['damageStatus']))
    shelterArea.extend(list(df_no_damage['Shelter Area']))

    df_un_classified = df1[df1['damageStatus'] == 'un-classified']
    x.extend(list(df_un_classified['xCentroid']))
    y.extend(list(df_un_classified['yCentroid']))
    status.extend(list(df_un_classified['damageStatus']))
    shelterArea.extend(list(df_un_classified['Shelter Area']))
    
    # count = len(df_destroyed) + len(df_major_damage) + len(df_minor_damage)

    dictX = {'xCentroid' : x, 'yCentroid' : y, 'damageStatus' : status, 'Shelter Area' : shelterArea}
    df2 = pd.DataFrame(dictX)

    clusters = DBSCAN(eps = eps, min_samples = 10).fit(df2[['xCentroid', 'yCentroid']])
    df2['cluster'] = clusters.labels_

    fig, ax = plt.subplots(1, figsize=(12,12))
    # plt.scatter(df2.xCentroid, df2.yCentroid, c=df2.c, alpha = 0.6, s=10)

    plt.plot(df_un_classified['xCentroid'], df_un_classified['yCentroid'], color = 'black', ls = '', marker = '.', markersize = 2, label = 'un-classified')
    plt.plot(df_no_damage['xCentroid'], df_no_damage['yCentroid'], color = 'gray', ls = '', marker = '.', markersize = 2, label = 'no-damage')
    plt.plot(df_minor_damage['xCentroid'], df_minor_damage['yCentroid'], color = 'blue', ls = '', marker = '^', markersize = 3,label = 'minor-damage')
    plt.plot(df_major_damage['xCentroid'], df_major_damage['yCentroid'], color = 'orange', ls = '', marker = 'd', markersize = 4,label = 'major-damage')
    plt.plot(df_destroyed['xCentroid'], df_destroyed['yCentroid'], color = 'red', ls = '', marker = 'o', markersize = 5, label = 'destroyed')
    plt.title(disaster_name, fontsize = 25)
    plt.xlabel('Longitude', fontsize = 15)
    plt.ylabel('Latitude',fontsize = 15)
    plt.xticks(fontsize = 15, rotation = 30)
    plt.yticks(fontsize = 15, rotation = 30)
    plt.legend()
    
    # Disaster Statistics
    print('Disaster Name: ', disaster_name)
    print(df2['damageStatus'].value_counts())
    print('Total Shelter Area = ', sum(df2['Shelter Area']))
    df_shelterAreaLessThan150 = df2[df2['Shelter Area'] < 150]
    df_temp_destroyed = df_shelterAreaLessThan150[df_shelterAreaLessThan150['damageStatus'] == 'destroyed']
    df_temp_major_damage = df_shelterAreaLessThan150[df_shelterAreaLessThan150['damageStatus'] == 'major-damage']
    df_temp_minor_damage = df_shelterAreaLessThan150[df_shelterAreaLessThan150['damageStatus'] == 'minor-damage']
    df_temp_no_damage = df_shelterAreaLessThan150[df_shelterAreaLessThan150['damageStatus'] == 'no-damage']
    print('Total Shelter Area of shelters which are less than 150 square meteres = ', sum(df_shelterAreaLessThan150['Shelter Area']))
    print('Destroyed Shelter Area of shelters which are less than 150 square meteres = ', sum(df_temp_destroyed[df_temp_destroyed['Shelter Area'] < 150]['Shelter Area']))
    print('Major Damage Shelter Area of shelters which are less than 150 square meteres = ', sum(df_temp_major_damage[df_temp_major_damage['Shelter Area'] < 150]['Shelter Area']))
    print('Minor Damage Shelter Area of shelters which are less than 150 square meteres = ', sum(df_temp_minor_damage[df_temp_minor_damage['Shelter Area'] < 150]['Shelter Area']))
    print('No Damage Shelter Area of shelters which are less than 150 square meteres = ', sum(df_temp_no_damage[df_temp_no_damage['Shelter Area'] < 150]['Shelter Area']))
    df_shelterAreaMoreThan150 = df2[df2['Shelter Area'] >= 150]
    print('Total Shelter Area of shelters which are greater than or equal to 150 square meteres = ', sum(df_shelterAreaMoreThan150['Shelter Area']))
    print('Population Estimate = ', int(len(df_shelterAreaLessThan150) * 4.8))
    print('Estimated number of people affected because of destroyed shelters = ', int(len(df_temp_destroyed[df_temp_destroyed['Shelter Area'] < 150]) * 4.8))
    print('Estimated number of people affected because of major-damage shelters = ', int(len(df_temp_major_damage[df_temp_major_damage['Shelter Area'] < 150]) * 4.8))
    print('Estimated number of people affected because of minor-damage shelters = ', int(len(df_temp_minor_damage[df_temp_minor_damage['Shelter Area'] < 150]) * 4.8))
    print('Estimated number of people affected because of no-damage shelters = ', int(len(df_temp_no_damage[df_temp_no_damage['Shelter Area'] < 150]) * 4.8))
    print()
    
    colors = ['red', 'orange', 'blue', 'green', 'black', 'violet', 'purple', 'pink', 'yellow', 'gray']
    area = []
    perimeter = []
    for i in df2.cluster.unique():
        # Cluster Statistics
        print('Disaster Name: ', disaster_name, ' - Cluster Label = ', i)
        print('Number of shelters in cluster ', i, ' = ', len(df2[df2['cluster'] == i]))
        print(df2[df2['cluster'] == i]['damageStatus'].value_counts())
        # Shelter Area
        print('Total Shelter Area of shelters in cluster ', i, ' = ', sum(df2[df2['cluster'] == i]['Shelter Area']))
        df_cluster = df2[df2['cluster'] == i]
        df_cluster_destroyed = df_cluster[df_cluster['damageStatus'] == 'destroyed']
        df_cluster_major_damage = df_cluster[df_cluster['damageStatus'] == 'major-damage']
        df_cluster_minor_damage = df_cluster[df_cluster['damageStatus'] == 'minor-damage']
        df_cluster_no_damage = df_cluster[df_cluster['damageStatus'] == 'no-damage']
        print('Total Shelter Area of shelters in cluster ', i, ' which are less than 150 square meteres = ', sum(df_cluster[df_cluster['Shelter Area'] < 150]['Shelter Area']))
        print('Total Shelter Area of shelters in cluster ', i, ' which are greater than or equal to 150 square meteres = ', sum(df_cluster[df_cluster['Shelter Area'] > 150]['Shelter Area']))
        print('Destroyed Shelter Area = ',sum(df_cluster_destroyed['Shelter Area']))
        print('Destroyed Shelter Area of shelters which are less than 150 square meteres = ', sum(df_cluster_destroyed[df_cluster_destroyed['Shelter Area'] < 150]['Shelter Area']))
        print('Major Damage Shelter Area of shelters which are less than 150 square meteres = ', sum(df_cluster_major_damage[df_cluster_major_damage['Shelter Area'] < 150]['Shelter Area']))
        print('Minor Damage Shelter Area of shelters which are less than 150 square meteres = ', sum(df_cluster_minor_damage[df_cluster_minor_damage['Shelter Area'] < 150]['Shelter Area']))
        print('No Damage Shelter Area of shelters which are less than 150 square meteres = ', sum(df_cluster_no_damage[df_cluster_no_damage['Shelter Area'] < 150]['Shelter Area']))
        
        # Population Estimate
        print('Population Estimate in cluster', i, ' = ', int(len(df2[df2['cluster'] == i]) * 4.8))
        print('Estimated number of people affected because of destroyed shelters = ', int(len(df_cluster_destroyed[df_cluster_destroyed['Shelter Area'] < 150]) * 4.8))
        print('Estimated number of people affected because of major-damage shelters = ', int(len(df_cluster_major_damage[df_cluster_major_damage['Shelter Area'] < 150]) * 4.8))
        print('Estimated number of people affected because of minor-damage shelters = ', int(len(df_cluster_minor_damage[df_cluster_minor_damage['Shelter Area'] < 150]) * 4.8))
        print('Estimated number of people affected because of no-damage shelters = ', int(len(df_cluster_no_damage[df_cluster_no_damage['Shelter Area'] < 150]) * 4.8))
        print()
        
        # get the convex hull
        points = df2[df2.cluster == i][['xCentroid', 'yCentroid']].values
        hull = ConvexHull(points)
        area.append(hull.volume)
        perimeter.append(hull.area)
        x_hull = np.append(points[hull.vertices,0],
                           points[hull.vertices,0][0])
        y_hull = np.append(points[hull.vertices,1],
                           points[hull.vertices,1][0])
        # interpolate
        dist = np.sqrt((x_hull[:-1] - x_hull[1:])**2 + (y_hull[:-1] - y_hull[1:])**2)
        dist_along = np.concatenate(([0], dist.cumsum()))
        spline, u = interpolate.splprep([x_hull, y_hull], 
                                        u=dist_along, s=0, per=1)
        interp_d = np.linspace(dist_along[0], dist_along[-1], 50)
        interp_x, interp_y = interpolate.splev(interp_d, spline)
        # plot shape
        plt.fill(interp_x, interp_y, '--', c=colors[i], alpha=0.2)
    plt.savefig('DBSCAN Clusters ' + disaster_name)
    plt.show()
    print(set(clusters.labels_))

In [25]:
df['Disaster Name'].unique()

NameError: name 'df' is not defined

In [26]:
getDBSCANclusters('guatemala-volcano', 0.015)

NameError: name 'df' is not defined

In [27]:
getDBSCANclusters('hurricane-florence', 0.5)

NameError: name 'df' is not defined

In [28]:
getDBSCANclusters('hurricane-harvey', 0.05)

NameError: name 'df' is not defined

In [29]:
getDBSCANclusters('hurricane-matthew', 0.015)

NameError: name 'df' is not defined

In [30]:
getDBSCANclusters('hurricane-michael', 0.05)

NameError: name 'df' is not defined

In [31]:
getDBSCANclusters('mexico-earthquake', 0.015)

NameError: name 'df' is not defined

In [32]:
getDBSCANclusters('midwest-flooding', 0.15)

NameError: name 'df' is not defined

In [33]:
getDBSCANclusters('palu-tsunami', 0.009)

NameError: name 'df' is not defined

In [34]:
getDBSCANclusters('santa-rosa-wildfire', 0.015)

NameError: name 'df' is not defined

In [35]:
getDBSCANclusters('socal-fire', 0.02)

NameError: name 'df' is not defined

In [36]:
# for disaster_name in df['Disaster Name'].unique():
#     getDBSCANclusters(disaster_name, 0.19)