In [1]:
 !pip install folium

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import pandas as pd
from datetime import datetime
from datetime import timedelta
import json
import requests
import time
import folium
from folium.plugins import HeatMap

Retrieving and Filtering Dataset

In [3]:
url = 'https://data.nsw.gov.au/data/dataset/aefcde60-3b0c-4bc0-9af1-6fe652944ec2/resource/21304414-1ff1-4243-a5d2-f52778048b29/download/confirmed_cases_table1_location.csv'  

df = pd.read_csv(url)

df = df[['notification_date', 'postcode', 'lga_name19']]
df

Unnamed: 0,notification_date,postcode,lga_name19
0,2020-01-25,2134,Burwood (A)
1,2020-01-25,2121,Parramatta (C)
2,2020-01-25,2071,Ku-ring-gai (A)
3,2020-01-27,2033,Randwick (C)
4,2020-03-01,2077,Hornsby (A)
...,...,...,...
973407,2022-02-07,2283,Lake Macquarie (C)
973408,2022-02-07,2019,Bayside (A)
973409,2022-02-07,2076,Ku-ring-gai (A)
973410,2022-02-07,2760,Penrith (C)


In [4]:
for i, row in df.iterrows():
  df.at[i, 'notification_date'] = datetime.strptime(df.at[i, 'notification_date'], '%Y-%m-%d')

df

Unnamed: 0,notification_date,postcode,lga_name19
0,2020-01-25 00:00:00,2134,Burwood (A)
1,2020-01-25 00:00:00,2121,Parramatta (C)
2,2020-01-25 00:00:00,2071,Ku-ring-gai (A)
3,2020-01-27 00:00:00,2033,Randwick (C)
4,2020-03-01 00:00:00,2077,Hornsby (A)
...,...,...,...
973407,2022-02-07 00:00:00,2283,Lake Macquarie (C)
973408,2022-02-07 00:00:00,2019,Bayside (A)
973409,2022-02-07 00:00:00,2076,Ku-ring-gai (A)
973410,2022-02-07 00:00:00,2760,Penrith (C)


In [5]:
current_date = datetime.now()
N_days_ago = 365 #CHANGE THIS VARIABLE TO CHANGE THE TIMEFRAME

filter_date = current_date - timedelta(days=N_days_ago)

working_data = df[df.notification_date > filter_date]

working_data

Unnamed: 0,notification_date,postcode,lga_name19
693455,2022-01-19 00:00:00,2443,Port Macquarie-Hastings (A)
693456,2022-01-19 00:00:00,2761,Blacktown (C)
693457,2022-01-19 00:00:00,2176,Fairfield (C)
693458,2022-01-19 00:00:00,2259,Central Coast (C) (NSW)
693459,2022-01-19 00:00:00,2286,Lake Macquarie (C)
...,...,...,...
973407,2022-02-07 00:00:00,2283,Lake Macquarie (C)
973408,2022-02-07 00:00:00,2019,Bayside (A)
973409,2022-02-07 00:00:00,2076,Ku-ring-gai (A)
973410,2022-02-07 00:00:00,2760,Penrith (C)


In [24]:
test = working_data
test = test.groupby("postcode")["postcode"].count()
test.index.name = 'y'
test = test.reset_index()
test = test.rename(columns={"postcode" : "count", "y" : "postcode"})
test.drop(test.tail(1).index, inplace = True)
test

Unnamed: 0,postcode,count
0,2000,703
1,2002,1
2,2004,1
3,2006,3
4,2007,203
...,...,...
696,2927,1
697,2931,1
698,2936,1
699,2957,1


Determining Longitude and Latitude




In [25]:
working_data = test
working_data.reset_index(drop=True, inplace=True)
working_data

working_data['latitude'] = None
working_data['longitude'] = None

working_data

Unnamed: 0,postcode,count,latitude,longitude
0,2000,703,,
1,2002,1,,
2,2004,1,,
3,2006,3,,
4,2007,203,,
...,...,...,...,...
696,2927,1,,
697,2931,1,,
698,2936,1,,
699,2957,1,,


In [27]:
base_url = 'https://nominatim.openstreetmap.org/search'

for i, row in working_data.iterrows():
  location = str(working_data.at[i, 'postcode']) 
  my_params= {'q': location + ', NSW, Australia','format':'json','limit':'1'}
  response = requests.get(base_url, params = my_params)
  match = response.json()

  try:
    longitude = float(match[0]['lon'])
    latitude = float(match[0]['lat'])
  except Exception:
    longitude = None
    latitude = None

  working_data.at[i, 'longitude'] = longitude
  working_data.at[i, 'latitude'] = latitude

  time.sleep(1)



working_data

Unnamed: 0,postcode,count,latitude,longitude
0,2000,703,-33.869576,151.206744
1,2002,1,-31.952603,152.555127
2,2004,1,-31.888061,148.109086
3,2006,3,-33.888129,151.190618
4,2007,203,-33.880021,151.197534
...,...,...,...,...
696,2927,1,-31.952603,152.555127
697,2931,1,-31.952603,152.555127
698,2936,1,-31.952603,152.555127
699,2957,1,-31.952603,152.555127


In [33]:
working_data = working_data.dropna()

Filtering data based on desired radius

In [28]:
desired_suburb = 'Manly' #CHANGE THIS TO CHANGE THE SUBURB

base_url = 'https://nominatim.openstreetmap.org/search'

my_params= {'q': desired_suburb + ', NSW, Australia','format':'json','limit':'1'}
response = requests.get(base_url, params = my_params)
match = response.json()

desired_longitude = float(match[0]['lon'])
desired_latitude = float(match[0]['lat'])


In [29]:
from math import sin, cos, sqrt, atan2, radians

def calculate_distance(desired_latitude, desired_longitude, latitude, longitude):
# approximate radius of earth in km
  R = 6373.0

  lat1 = radians(latitude)
  lon1 = radians(longitude)
  lat2 = radians(desired_latitude)
  lon2 = radians(desired_longitude)

  dlon = lon2 - lon1
  dlat = lat2 - lat1

  a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
  c = 2 * atan2(sqrt(a), sqrt(1 - a))

  distance = R * c

  return distance

In [34]:
for i, row in working_data.iterrows():
  distance = calculate_distance(desired_latitude, desired_longitude, working_data.at[i, 'latitude'], working_data.at[i, 'longitude'])

  if distance > 50: #CHANGE THIS TO CHANGE THE RADIUS
    working_data.drop(i, inplace=True)

working_data

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


Unnamed: 0,postcode,count,latitude,longitude
0,2000,703,-33.869576,151.206744
3,2006,3,-33.888129,151.190618
4,2007,203,-33.880021,151.197534
5,2008,268,-33.887343,151.197534
6,2009,243,-33.872247,151.194282
...,...,...,...,...
596,2766,750,-33.786053,150.847596
597,2767,863,-33.765952,150.870072
598,2768,977,-33.730171,150.93101
599,2769,552,-33.710225,150.897901


In [35]:
working_data.reset_index(drop=True, inplace=True)
working_data

Unnamed: 0,postcode,count,latitude,longitude
0,2000,703,-33.869576,151.206744
1,2006,3,-33.888129,151.190618
2,2007,203,-33.880021,151.197534
3,2008,268,-33.887343,151.197534
4,2009,243,-33.872247,151.194282
...,...,...,...,...
217,2766,750,-33.786053,150.847596
218,2767,863,-33.765952,150.870072
219,2768,977,-33.730171,150.93101
220,2769,552,-33.710225,150.897901


Visualising Data

In [36]:
m = folium.Map(location = [-33.8, 151.2], tiles ='OpenStreetMap',  zoom_start=11)


for i, row in working_data.iterrows():
    lat = working_data.at[i, 'latitude']
    lng = working_data.at[i, 'longitude']

    folium.Circle(location = [lat, lng], popup = working_data.at[i, "postcode"],  color='crimson', fill = True, fill_color = "crimson", radius=float(working_data.at[i, "count"])).add_to(m)

m

In [37]:
hm = folium.Map(location = [-33.8, 151.2], tiles ='OpenStreetMap',  zoom_start=11)
map_values = working_data[["latitude", "longitude", "count"]].values.tolist()

HeatMap(map_values,
                min_opacity=0.05, 
                max_opacity=0.9, 
                radius=25,
                use_local_extrema=False).add_to(hm)

hm