# NSW Liveability Analysis

In [1]:
from sqlalchemy import create_engine
import psycopg2
import psycopg2.extras
import os
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon, MultiPolygon
from geoalchemy2 import Geometry, WKTElement
from keplergl import KeplerGl
from shapely import wkt
import statsmodels
import warnings
from sklearn.preprocessing import StandardScaler
import math

## Introduction

This notebook's main objective is to process seven datasets containing alphanumeric and geospatial data about Greater Region of Sydney, ingest it into a PostgreSQL database and use that data to calculate and visualise the liveability score for each applicable neighbourhood based on the pre-determined criteria.

## Prerequisites and setup

Check out the ```requirements.txt``` file to see all the library you'll need to run this notebook and their respective version. You will also need to setup your own instance of a PostgreSQL database with POSTGIS extension installed. Alternatively, you can run it with the DB I set up myself, but beware that the connection can drop randomly due to availability issues.

In [2]:
# Connect to the database

def pgconnect():
    try:
        db = create_engine('postgresql://kkuznets:sITtDMUk8ic2@ep-calm-river-296507.us-east-2.aws.neon.tech/neondb',
                           echo=False)
        conn = db.connect()
        print('Connected successfully.')
    except Exception as e:
        print("Unable to connect to the database.")
        print(e)
        db, conn = None, None
    return db, conn
db, conn = pgconnect()

Connected successfully.


In [3]:
# Define function to transform polygons into multipolygons to maintain consistent data types

def create_wkt_element(geom, srid):
    if geom.geom_type == 'Polygon':
        geom = MultiPolygon([geom])
    return WKTElement(geom.wkt, srid)

# Data processing and ingestion

We'll start by processing our datasets. This step includes cleaning, tranforming whenever required, and ingesting data into our database. We will try to keep our schema normalised, i.e. remove dupliacte columns, maintain our primary and foreign keys, and keep our table sizes to a minimum.

### Statistical areas

This dataset comes from the Australian Bureau of Statistics (ABS). It contains geospatial data defining area names and borders for all main neighbourhoods (suburbs) in the Greater Sydney Region. We'll need this dataset to connect information about each neighbourhood from other alphanumerical datasets to a specifc geographical area.

In [4]:
# Ingest Statistical Areas dataset into area_names table

# Create dataframe
areas = gpd.read_file("data/Statistical Areas/SA2_2016_AUST.shp")

# Clean
areas = areas[['SA2_MAIN16','SA2_NAME16','geometry']]
areas = areas.astype({'SA2_MAIN16': 'int64'})
areas = areas.rename(columns={'SA2_MAIN16': 'area_id', 'SA2_NAME16': 'area_name', 'geometry' : 'Geometry'})
areas = areas[areas['Geometry'] != None]

# Ingest
srid = 4326
areas['Geometry'] = areas['Geometry'].apply(lambda x: create_wkt_element(geom=x,srid=srid))  # convert to multipolygons
conn.execute('DROP TABLE IF EXISTS area_names CASCADE;')
areas.to_sql('area_names', con=conn, index=False, if_exists='replace', dtype={'Geometry': Geometry('MultiPolygon', srid)})

# Create primary key
dd = conn.execute('ALTER TABLE IF EXISTS area_names ADD PRIMARY KEY (area_id);')

  conn.execute('DROP TABLE IF EXISTS area_names CASCADE;')


### Businesses

This datasets provides information about the financial and social aspects of each suburb such as the number of retail shops available in the area, food stores, etc. We'll need it to calculate some of the socio-economic performance factors of each suburb.

In [5]:
# Ingest Businesses dataset into business_stats table

# Create dataframe
business = pd.read_csv('data/Businesses.csv')

# Clean
business = business.drop(['area_name','number_of_businesses','agriculture_forestry_and_fishing',
               'public_administration_and_safety'], axis = 1, errors = 'ignore')

# Ingest
conn.execute('DROP TABLE IF EXISTS business_stats CASCADE;')
business.to_sql('business_stats', con=conn, index=False, if_exists='replace')

# Create primary key
dd = conn.execute('ALTER TABLE IF EXISTS business_stats ADD PRIMARY KEY (area_id);')

### Neighbourhoods

This dataset contains more information about the financial and social aspects of the suburbs. We'll use it to derive the average household income, median rent prices, population numbers which are needed for some of our performance metrics calculations.

In [6]:
# Ingest Neighbourhoods into neighbourhoods table

# Create dataframe
neighbours = pd.read_csv('data/Neighbourhoods.csv')

# Clean
cols = list(neighbours.columns)

neighbours = neighbours.drop(['area_name', 'Unnamed: 0', 'land_area', 'number_of_dwellings',
                              'number_of_businesses'], axis = 1, errors = 'ignore')

# Convert population column into integer and remove all rows with null population
neighbours['population'] = pd.to_numeric(neighbours['population'], errors='coerce')
neighbours = neighbours.dropna(subset=['population'])
neighbours['population'] = neighbours['population'].astype(int)

# Ingest, generate primary key
conn.execute('DROP TABLE IF EXISTS neighbourhoods CASCADE;')
neighbours.to_sql('neighbourhoods', con=conn, index=False, if_exists='replace')
dd = conn.execute('ALTER TABLE IF EXISTS neighbourhoods ADD PRIMARY KEY (area_id);')

### School catchments

This dataset contains information about the locations of primary and secondary schools across the Greater Sydney region. We will use it to calculate how many schools are located inside each suburb to understand how friendly it is for families with children. Some entries are duplicated between the two files for primary and secondary schools, so we'll need to fix that. We will also need to convert each entry from polygon shape to GPS location so that we can precisely calculate which suburb each school belongs to.

In [7]:
# Ingest School Cathcments into schools table

# Create dataframe from two files by concatinating them
schools_primary = gpd.read_file("data/School Catchments/catchments_primary.shp")
schools_secondary = gpd.read_file("data/School Catchments/catchments_secondary.shp")
schools = pd.concat([schools_primary, schools_secondary])

# Clean
schools = schools[['USE_ID','geometry', 'CATCH_TYPE']]
schools = schools.rename(columns={'USE_ID': 'use_id', 'CATCH_TYPE': 'type', 'geometry': 'Geometry'}).reset_index()

# Ingest
srid = 4326
schools['Geometry'] = schools['Geometry'].apply(lambda x: create_wkt_element(geom=x,srid=srid))
conn.execute('DROP TABLE IF EXISTS schools CASCADE;')
schools.to_sql('schools', con=conn, index=False, if_exists='replace', dtype={'Geometry': Geometry('MultiPolygon', srid)})

# Remove duplicate entries by introducing temporary index
conn.execute("""DELETE FROM schools WHERE "index" IN (
                    SELECT "index"
                    FROM (
                         SELECT "index", row_number() OVER (PARTITION BY use_id ORDER BY "index") rn FROM schools) a
                    WHERE rn = 2);""")

# Drop temporary index and create primary key
conn.execute('ALTER TABLE IF EXISTS schools DROP COLUMN "index"')
dd = conn.execute('ALTER TABLE IF EXISTS schools ADD PRIMARY KEY (use_id)')

### Break and Enter

This dataset contains information about any crimes that occured in NSW from Jan to Dec 2021, and their locations. We will use this data to assess the crime rate in each suburb.

In [8]:
# Ingest break_and_enter into crimes table

# Create dataframe
crime = gpd.read_file("data/Break and Enter/BreakEnterDwelling_JanToDec2021.shp")

# Clean
crime = crime[['OBJECTID', 'Density', 'geometry']]
crime = crime.rename(columns={'OBJECTID': 'crime_id', 'Density': 'density', 'geometry': 'Geometry'}).reset_index()

# Ingest
srid = 4326
crime['Geometry'] = crime['Geometry'].apply(lambda x: create_wkt_element(geom=x,srid=srid))
conn.execute('DROP TABLE IF EXISTS crimes CASCADE;')
crime.to_sql('crimes', con=conn, index=False, if_exists='replace', dtype={'Geometry': Geometry('MultiPolygon', srid)})

# Remove duplicate entries by introducing temporary index
conn.execute("""DELETE FROM crimes WHERE "index" IN (
                    SELECT "index"
                    FROM (
                         SELECT "index", row_number() OVER (PARTITION BY crime_id ORDER BY "index") rn FROM crimes) a
                    WHERE rn = 2);""")

# Drop temporary index and create primary key
conn.execute('ALTER TABLE IF EXISTS crimes drop column "index"')
dd = conn.execute('ALTER TABLE IF EXISTS crimes ADD PRIMARY KEY (crime_id)')

### Playgrounds

This dataset contains information about the locations of all playgrounds in Sydney. We will use it to calculate how many playgrounds are located inside each suburb so that we can understand how friendly it is for families with children. We will need to convert each entry from polygon shape to GPS location so that we can precisely calculate which suburb each playground belongs to.

In [9]:
# Ingest Playgrounds into playgrounds table

# Create dataframe
playgrounds = gpd.read_file("data/Playgrounds.geojson")

# Clean
playgrounds = playgrounds[['OBJECTID', 'Type', 'geometry']]
playgrounds = playgrounds.rename(columns={'OBJECTID': 'object_id', 'Type': 'type', 'geometry': 'Geometry'})

# Ingest
srid = 4326
playgrounds['Geometry'] = playgrounds.Geometry.apply(lambda x: wkt.dumps(x))
conn.execute('DROP TABLE IF EXISTS playgrounds CASCADE;')
playgrounds.to_sql('playgrounds', con=conn, index=False, if_exists='replace')

# Add a new column of type geometry, copy entries from column 'geometry' into it and transform them into POINT geodata
conn.execute('ALTER TABLE IF EXISTS playgrounds ADD location geometry;')
conn.execute('UPDATE playgrounds SET location =  st_geomfromtext("Geometry");')
conn.execute('ALTER TABLE IF EXISTS playgrounds DROP COLUMN "Geometry";')

# Add primary key and geospatial index on the new column
conn.execute('ALTER TABLE IF EXISTS playgrounds ADD PRIMARY KEY ("object_id");')
dd = conn.execute('CREATE INDEX idx_playgrounds_location ON playgrounds USING GIST (location);')

### Cameras

This dataset contains information about the locations of public CCTVs in Sydney. We will use it to calculate how many cameras are located inside each suburb to understand how safe it is. We will need to convert each entry from polygon shape to GPS location so that we can precisely calculate which suburb each playground belongs to.

In [10]:
# Ingest Cameras into safety_cameras table

# Create dataframe
safety_cameras = gpd.read_file("data/Safety Cameras/Street_safety_cameras.shp")

# Cleaning
safety_cameras = safety_cameras[['OBJECTID','geometry']]
safety_cameras = safety_cameras.rename(columns={'OBJECTID': 'camera_id', 'geometry': 'Geometry'})

# Ingesting
srid = 4326
safety_cameras['Geometry'] = safety_cameras.Geometry.apply(lambda x: wkt.dumps(x))
conn.execute('DROP TABLE IF EXISTS safety_cameras CASCADE;')
safety_cameras.to_sql('safety_cameras', con=conn, index=False, if_exists='replace')

# Add a new column of type geometry, copy entries from column 'geometry' into it and transform them into POINT geodata
conn.execute('ALTER TABLE IF EXISTS safety_cameras ADD location geometry;')
conn.execute('UPDATE safety_cameras SET location =  st_geomfromtext("Geometry");')
conn.execute('ALTER TABLE IF EXISTS safety_cameras DROP COLUMN "Geometry";')

# Add primary key and geospatial index on the new column
conn.execute('ALTER TABLE IF EXISTS safety_cameras ADD PRIMARY KEY ("camera_id");')
dd = conn.execute('CREATE INDEX idx_security_cameras_location ON safety_cameras USING GIST (location);')

## Additional cleaning

Some of the tables can list more statistical areas that what we have in the area_names table. It means that we cannot plot them on the map since we don't have any geospatial data on them. To avoid processing unnecessary data, we need to remove such entries from all tables where required.

In [11]:
# Delete all area_id from business_stats that aren't present in area_names and create foreign key between them

conn.execute("""DELETE FROM 
                business_stats
                WHERE area_id NOT IN (SELECT area_id FROM area_names);""")

conn.execute('ALTER TABLE IF EXISTS business_stats DROP CONSTRAINT IF EXISTS business_stats_area_names_area_id_fk;')

dd = conn.execute("""ALTER TABLE IF EXISTS business_stats
                    ADD CONSTRAINT business_stats_area_names_area_id_fk
                    FOREIGN KEY (area_id) references area_names;""")

In [12]:
# Delete all area_id from neighbourhoods that aren't present in area_names and create foreign key between them

conn.execute("""DELETE FROM 
                neighbourhoods
                WHERE area_id NOT IN (SELECT area_id FROM area_names);""")

conn.execute('ALTER TABLE IF EXISTS neighbourhoods DROP CONSTRAINT IF EXISTS neighbourhoods_area_names_area_id_fk;')

dd = conn.execute("""ALTER TABLE IF EXISTS neighbourhoods
                    ADD CONSTRAINT neighbourhoods_area_names_area_id_fk
                    FOREIGN KEY (area_id) references area_names;""")

## Schema

We should now have the following schema in our db:

<img src="data/Schema.png" alt="DB Schema" width="600"/>

# Data analysis

Now that our data is stored in the database and its schema is normalised, we can use it to calculate the suburb rankings in different categories. For convenience, let's call them "scores". We will later use these scores to calculate the final score which will indicate the overall performance of each suburb across all categories.

We have 7 different scores:

- School Score. This score indicates how many school are located in each suburb  per thousand people aged 0-15 years.
- Accom score. This score indicates how many accomodation businesses are located in each suburb per thousand people.
- Retail Score. This score indicates how manu retail shops are located in each suburb per thousand people.
- Crime Score. This score indicates how many crimes on average happen in each suburb in comparison to its total area.
- Health Score. This score indicates how many hospitals are located in each suburb per thousand people.
- Entertainment Score. This score indicates how many playgrounds are located in each suburb per thousand of people aged 0-15 years.
- Safety Score. This score indicates how many cameras are located in each suburb, if any.


In [13]:
# Get per-category scores for each suburb

sql = """
SELECT area_name                                    as "Area Name",
       (CASE
           WHEN young_pop = 0 THEN 0
           ELSE (schools / (young_pop / 500))
        END)                                        as "School Score",
       acc_business / (general_pop / 1000)          as "Accom Score",
       retail_shops / (general_pop / 1000)          as "Retail Score",
       crimes / st_area(geom::geography::geometry)  as "Crime Score",
       hospitals / (general_pop / 1000)             as "Health Score",
       COALESCE(playgrounds / (young_pop / 100), 0) as "Entertainment Score",
       COALESCE(cameras, 0)                         as "Safety Score",
       geom

FROM
(SELECT a.area_name,
        a."Geometry" as geom,
        COUNT(c.crimes)                             as crimes,
        COUNT(s.schools)                            as schools,
        SUM(n."0-4") + SUM(n."5-9") + SUM(n."10-14") +
        SUM(n."15-19")                              as young_pop,
        SUM(n.population)                           as general_pop,
        SUM(bs.accommodation_and_food_services)     as acc_business,
        SUM(bs.retail_trade)                        as retail_shops,
        SUM(bs.health_care_and_social_assistance)   as hospitals,
        SUM(p.playgrounds)                          as playgrounds,
        SUM(sc.cameras)                             as cameras

    FROM area_names a
    LEFT JOIN
-- get the crimes numbers
        (SELECT area_id, COUNT(*) AS crimes FROM crimes c
          INNER JOIN area_names  a
          ON st_contains(a."Geometry", c."Geometry")
          GROUP BY area_id) AS c
        ON a.area_id = c.area_id
    LEFT JOIN
-- get the schools locations
        (SELECT area_id, COUNT(*) AS schools FROM schools s
          INNER JOIN area_names  a
          ON st_contains(a."Geometry", s."Geometry")
          GROUP BY area_id) AS s
        ON a.area_id = s.area_id
    LEFT JOIN
-- get the playgrounds locations
        (SELECT area_id, COUNT(*) AS playgrounds FROM playgrounds p INNER JOIN area_names  a
            ON st_contains(a."Geometry", p.location::geography::geometry) WHERE p.type = 'Playground'
            GROUP BY area_id) AS p
        ON a.area_id = p.area_id
    LEFT JOIN
-- get the cameras locations
        (SELECT area_id, COUNT(*) AS cameras FROM safety_cameras sc INNER JOIN area_names  a
            ON st_contains(a."Geometry", sc.location::geography::geometry)
            GROUP BY area_id) AS sc
        ON a.area_id = sc.area_id
    INNER JOIN neighbourhoods n
-- get the population numbers, etc
        ON a.area_id = n.area_id
    INNER JOIN business_stats bs
-- get the business numbers
        ON a.area_id = bs.area_id
    WHERE accommodation_and_food_services IS NOT NULL
        AND n.population IS NOT NULL
        AND bs.retail_trade IS NOT NULL
        AND bs.health_care_and_social_assistance IS NOT NULL
    GROUP BY a.area_id) AS data
"""

scores = gpd.read_postgis(sql, conn)
scores = scores.to_crs("EPSG:4326") # bring it to proper geography
scores.rename(columns = {'geom':'geometry'}, inplace = True)
scores = scores.set_geometry("geometry")

# Dont forget to close the connection :)
conn.close()

In [14]:
scores.head()

Unnamed: 0,Area Name,School Score,Accom Score,Retail Score,Crime Score,Health Score,Entertainment Score,Safety Score,geometry
0,Avoca Beach - Copacabana,0.0,4.347826,4.611331,0.0,7.905138,0.0,0.0,"MULTIPOLYGON (((151.41373 -33.46559, 151.41361..."
1,Box Head - MacMasters Beach,0.0,2.093574,4.096122,321.174679,3.914072,0.0,0.0,"MULTIPOLYGON (((151.35398 -33.49854, 151.35397..."
2,Calga - Kulnura,0.455373,2.891964,8.882462,0.0,2.478827,0.0,0.0,"MULTIPOLYGON (((151.20460 -33.53298, 151.20456..."
3,Erina - Green Point,0.0,4.776287,10.18473,305.243753,12.572873,0.0,0.0,"MULTIPOLYGON (((151.36795 -33.43822, 151.36791..."
4,Gosford - Springfield,0.0,4.84911,8.614908,609.931424,12.690224,0.0,0.0,"MULTIPOLYGON (((151.31006 -33.42699, 151.31020..."


## Scores Normalisation

Now that we calculated scores for each suburb, we'll need to normalise them to get their z-scores so that the difference in the scores' scales does not affect our future calculations. We also need to calculate the final score for each suburb using the Sigmoid function with the formula:

$$ \mathrm{Total \; Score} = 𝒮(z_{school} + z_{accom} + z_{retail} - z_{crime} + z_{health} + z_{entertainment} + z_{safety}) $$

In [15]:
# Normalise the per-category scores and calculate total score using sigmoid function

# Normalise the scores using sklearn
scaler = StandardScaler()
cols = ['School Score','Accom Score', 'Retail Score', 'Crime Score', 'Health Score', 'Entertainment Score',
        'Safety Score']
scores[cols] = scaler.fit_transform(scores[cols])

## Alternatively, normalise the scores manually
# for each in scores.columns:
#     if each == 'Area ID' or each == 'Area Name' or each == 'geometry':
#         continue
#     mean = statistics.mean(scores[each])
#     stdev = statistics.stdev(scores[each])
#     for i in range(len(scores[each])):
#         x = scores[each][i]
#         norm_score = (x - mean) / stdev
#         scores.at[i, each] = norm_score

# Calculate the total score
s_score = []
for i in range(len(scores['School Score'])):
    x = scores['School Score'][i] + scores['Accom Score'][i] + scores['Retail Score'][i] - scores['Crime Score'][i] + scores['Health Score'][i] + 0.5*scores['Entertainment Score'][i] + 0.5*scores['Safety Score'][i]
    sigmoid = 1 / (1 + math.exp(-x))
    s_score.append(sigmoid)
    
scores['Total Score'] = s_score

In [16]:
scores.head()

Unnamed: 0,Area Name,School Score,Accom Score,Retail Score,Crime Score,Health Score,Entertainment Score,Safety Score,geometry,Total Score
0,Avoca Beach - Copacabana,-0.221168,-0.12248,-0.146265,-0.899975,0.058313,-0.163645,-0.081182,"MULTIPOLYGON (((151.41373 -33.46559, 151.41361...",0.585638
1,Box Head - MacMasters Beach,-0.221168,-0.16424,-0.149335,-0.65154,-0.185041,-0.163645,-0.081182,"MULTIPOLYGON (((151.35398 -33.49854, 151.35397...",0.452479
2,Calga - Kulnura,13.515242,-0.14945,-0.120807,-0.899975,-0.272554,-0.163645,-0.081182,"MULTIPOLYGON (((151.20460 -33.53298, 151.20456...",0.999999
3,Erina - Green Point,-0.221168,-0.114543,-0.113045,-0.663863,0.342926,-0.163645,-0.081182,"MULTIPOLYGON (((151.36795 -33.43822, 151.36791...",0.607215
4,Gosford - Springfield,-0.221168,-0.113194,-0.122402,-0.428181,0.350082,-0.163645,-0.081182,"MULTIPOLYGON (((151.31006 -33.42699, 151.31020...",0.549608


# Visualisation

Now the we finally have all the data ready, we can visualise it. I'm using ```Kepler.gl``` library with a preset config to plot out all the regions in our dataset on real-life maps. The resulting interactive map is saved to the ```Map Visualisation.html``` file.

In [17]:
warnings.filterwarnings('ignore') #remove library version warnings

# Create a config to pre-style the visualisation on the go
config = {
'version': 'v1',
 'config': {'visState': {'filters': [],
   'layers': [{'id': 'j1gpzy',
     'type': 'geojson',
     'config': {'dataId': 'Scores',
      'label': 'Scores',
      'color': [77, 193, 156],
      'highlightColor': [252, 242, 26, 255],
      'columns': {'geojson': 'geometry'},
      'isVisible': True,
      'visConfig': {'opacity': 0.29,
       'strokeOpacity': 0.8,
       'thickness': 0.5,
       'strokeColor': [255, 254, 230],
       'colorRange': {'name': 'ColorBrewer RdYlGn-6',
        'type': 'diverging',
        'category': 'ColorBrewer',
        'colors': ['#d73027',
         '#fc8d59',
         '#fee08b',
         '#d9ef8b',
         '#91cf60',
         '#1a9850']},
       'strokeColorRange': {'name': 'Global Warming',
        'type': 'sequential',
        'category': 'Uber',
        'colors': ['#5A1846',
         '#900C3F',
         '#C70039',
         '#E3611C',
         '#F1920E',
         '#FFC300']},
       'radius': 10,
       'sizeRange': [0, 10],
       'radiusRange': [0, 50],
       'heightRange': [0, 500],
       'elevationScale': 5,
       'enableElevationZoomFactor': True,
       'stroked': True,
       'filled': True,
       'enable3d': False,
       'wireframe': False},
      'hidden': False,
      'textLabel': [{'field': None,
        'color': [255, 255, 255],
        'size': 18,
        'offset': [0, 0],
        'anchor': 'start',
        'alignment': 'center'}]},
     'visualChannels': {'colorField': {'name': 'Total Score', 'type': 'real'},
      'colorScale': 'quantile',
      'strokeColorField': None,
      'strokeColorScale': 'quantile',
      'sizeField': None,
      'sizeScale': 'linear',
      'heightField': None,
      'heightScale': 'linear',
      'radiusField': None,
      'radiusScale': 'linear'}}],
   'interactionConfig': {'tooltip': {'fieldsToShow': {'Scores': [                                                   
       {'name': 'Area Name', 'format': None},
       {'name': 'Total Score', 'format': None},
       {'name': 'School Score', 'format': None},
       {'name': 'Accom Score', 'format': None},
       {'name': 'Retail Score', 'format': None},
       {'name': 'Crime Score', 'format': None},
       {'name': 'Health Score', 'format': None},
       {'name': 'Entertainment Score', 'format': None},
       {'name': 'Safety Score', 'format': None}]},
     'compareMode': False,
     'compareType': 'absolute',
     'enabled': True},
    'brush': {'size': 0.5, 'enabled': False},
    'geocoder': {'enabled': True},
    'coordinate': {'enabled': False}},
   'layerBlending': 'normal',
   'splitMaps': [],
   'animationConfig': {'currentTime': None, 'speed': 1}},
  'mapState': {'bearing': 0,
   'dragRotate': False,
   'latitude': -33.95918960088992,
   'longitude': 150.8342661979497,
   'pitch': 0,
   'zoom': 8.897228743611274,
   'isSplit': False},
  'mapStyle': {'styleType': 'light',
   'topLayerGroups': {},
   'visibleLayerGroups': {'label': True,
    'road': True,
    'border': False,
    'building': True,
    'water': True,
    'land': True,
    '3d building': False},
   'threeDBuildingColor': [218.82023004728686,
    223.47597962276103,
    223.47597962276103],
   'mapStyles': {}}}}

# Configure the visualition's datasets
try:
    map = KeplerGl(height = 600, width = 800)
    map.add_data(data = scores, name = "Scores")
    map.config = config
except:
    print('Map is already configured')

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


In [18]:
# Plot the visualiation for liveability scores of NSW suburbs

map.save_to_html(file_name='Map Visualisation.html')

Map saved to Map Visualisation.html!
