# Read in LGA list and clean up for use in google api loops

In [1]:
# Dependencies
import requests
import json
import pandas as pd
import numpy as np
import time
from bs4 import BeautifulSoup as bs
# Google API Key
from config import gkey

#file path to saved csv dataframe
file = 'Raw_Data/Vic-LGA-List.csv'

In [2]:
# Establishing a connection to postgreSQL
import psycopg2
from sqlalchemy import create_engine
from entrancekey import postgresqlkey
from sqlalchemy import create_engine  
from sqlalchemy import Column, String  
from sqlalchemy.ext.declarative import declarative_base  
from sqlalchemy.orm import sessionmaker

In [3]:
# Creating a search engine
engine = create_engine(f'postgresql+psycopg2://postgres:{postgresqlkey}@localhost:5432/victorian_vacation_db')
connection = engine.connect()
base = declarative_base()

In [4]:
#bring csv file in as pd dataframe
council_city_df = pd.read_csv(file, encoding="ISO-8859-1")
council_city_df

Unnamed: 0,ï»¿COUNCIL,CEO NAME,DEAR CEO,CEO TITLE,MAYOR NAME,DEAR MAYOR,MAYOR TITLE,POSTAL,POSTAL SUBURB,PCODE,DX,ADDRESS,SUBURB/ TOWN,CODE,TEL,FAX,EMAIL,WWW SITE
0,Alpine Shire Council,Mr Charlie Bird,Mr Bird,Chief Executive Officer,Cr Peter Roper,Cr Roper,Mayor,PO Box 139,BRIGHT,3741,,Great Alpine Road,BRIGHT,3741,5755 0555,5755 1811,info@alpineshire.vic.gov.au,www.alpineshire.vic.gov.au
1,Ararat Rural City Council,Dr Tim Harrison,Dr Harrison,Chief Executive Officer,Cr Jo Armstrong,Cr Armstrong,Mayor,PO Box 246,ARARAT,3377,,59 Vincent Street,ARARAT,3377,5355 0200,5352 1695,council@ararat.vic.gov.au,www.ararat.vic.gov.au
2,Ballarat City Council,Ms Janet Dore,Ms Dore,Chief Executive Officer,Cr Ben Taylor,Cr Taylor,Mayor,PO Box 655,BALLARAT,3353,,25 Armstrong Street South,BALLARAT,3350,5320 5500,5333 4061,info@ballarat.vic.gov.au,www.ballarat.vic.gov.au
3,Banyule City Council,Ms Allison Beckwith,Ms Beckwith,Chief Executive Officer,Cr Alison Champion,Cr Champion,Mayor,PO Box 94,GREENSBOROUGH,3088,DX 97904 Ivanhoe,1 Flintoff Street,GREENSBOROUGH,3088,9490 4222,9499 9475,enquiries@banyule.vic.gov.au,www.banyule.vic.gov.au
4,Bass Coast Shire Council,Ms Ali Wastie,Ms Wastie,Chief Executive Officer,Cr Brett Tessari,Cr Tessari,Mayor,PO Box 118,WONTHAGGI,3995,DX 34903 Wonthaggi,76 McBride Avenue,WONTHAGGI,3995,1300 BCOAST (226278) or 5671 2211,5671 2222,basscoast@basscoast.vic.gov.au,www.basscoast.vic.gov.au
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74,Wodonga City Council,Mr Mark Dixon,Mr Dixon,Chief Executive Officer,Cr Anna Speedie,Cr Speedie,Mayor,PO Box 923,WODONGA,3689,,104 Hovell Street,WODONGA,3690,02 6022 9300,02 6022 9322,info@wodonga.vic.gov.au,www.wodonga.vic.gov.au
75,Wyndham City Council,Ms Kelly Grigsby,Ms Grigsby,Chief Executive Officer,Cr Josh Gilligan,Cr Gilligan,Mayor,PO Box 197,WERRIBEE,3030,,45 Princes Highway,WERRIBEE,3030,9742 0777,9741 6237,mail@wyndham.vic.gov.au,www.wyndham.vic.gov.au
76,Yarra City Council,Ms Vijaya Vaidyanath,Ms Vaidyanath,Chief Executive Officer,Cr Misha Coleman,Cr Coleman,Mayor,PO Box 168,RICHMOND,3121,DX 30205 Richmond,333 Bridge Road,RICHMOND,3121,9205 5555,,info@yarracity.vic.gov.au,www.yarracity.vic.gov.au
77,Yarra Ranges Shire Council,Ms Tammi Rose,Ms Rose,Chief Executive Officer,Cr Richard Higgins,Cr Higgins,Mayor,PO Box 105,LILYDALE,3140,DX 34051 Lilydale,Anderson Street,LILYDALE,3140,1300 368 333,9735 4249,mail@yarraranges.vic.gov.au,www.yarraranges.vic.gov.au


In [5]:
print('The number of Local Government Areas (councils) in Vic is: '+str(len(council_city_df)))

The number of Local Government Areas (councils) in Vic is: 79


In [6]:
# Get list of column headings for cleaning up df
council_city_df.columns.unique()

Index(['ï»¿COUNCIL', 'CEO NAME', 'DEAR CEO', 'CEO TITLE', 'MAYOR NAME',
       'DEAR MAYOR', 'MAYOR TITLE', 'POSTAL', 'POSTAL SUBURB', 'PCODE', 'DX',
       'ADDRESS', 'SUBURB/ TOWN', 'CODE', 'TEL', 'FAX', 'EMAIL', 'WWW SITE'],
      dtype='object')

In [7]:
# Clean council_city_df to have just the columns wanted
council_city_df=council_city_df.drop(['CEO NAME', 'DEAR CEO', 'CEO TITLE', 
                                      'MAYOR NAME','DEAR MAYOR', 'MAYOR TITLE', 
                                      'POSTAL', 'POSTAL SUBURB', 'PCODE', 'DX',
                                      'ADDRESS','TEL', 'FAX', 'EMAIL'], axis=1)
# Re-name council column
council_city_df = council_city_df.rename(columns={"ï»¿COUNCIL": "COUNCIL"})
council_city_df

Unnamed: 0,COUNCIL,SUBURB/ TOWN,CODE,WWW SITE
0,Alpine Shire Council,BRIGHT,3741,www.alpineshire.vic.gov.au
1,Ararat Rural City Council,ARARAT,3377,www.ararat.vic.gov.au
2,Ballarat City Council,BALLARAT,3350,www.ballarat.vic.gov.au
3,Banyule City Council,GREENSBOROUGH,3088,www.banyule.vic.gov.au
4,Bass Coast Shire Council,WONTHAGGI,3995,www.basscoast.vic.gov.au
...,...,...,...,...
74,Wodonga City Council,WODONGA,3690,www.wodonga.vic.gov.au
75,Wyndham City Council,WERRIBEE,3030,www.wyndham.vic.gov.au
76,Yarra City Council,RICHMOND,3121,www.yarracity.vic.gov.au
77,Yarra Ranges Shire Council,LILYDALE,3140,www.yarraranges.vic.gov.au


In [8]:
# Make city list
cities = council_city_df["SUBURB/ TOWN"].tolist()
print(cities)

['BRIGHT', 'ARARAT', 'BALLARAT', 'GREENSBOROUGH', 'WONTHAGGI', 'DROUIN', 'SANDRINGHAM', 'BENALLA', 'CAMBERWELL', 'SUNSHINE', 'WYCHEPROOF', 'ECHUCA', 'OFFICER', 'NARRE WARREN', 'MARYBOROUGH', 'COLAC', 'CAMPERDOWN', 'PRESTON', 'BAIRNSDALE', 'FRANKSTON', 'KERANG', 'CAULFIELD SOUTH', 'PORTLAND', 'BANNOCKBURN', 'BENDIGO', 'DANDENONG', 'GEELONG', 'SHEPPARTON', 'DAYLESFORD', 'NHILL', 'ALTONA', 'HORSHAM', 'BROADMEADOWS', 'BEECHWORTH', 'CHELTENHAM', 'WANTIRNA SOUTH', 'MORWELL', 'WEDDERBURN', 'KYNETON', 'DONCASTER', 'MANSFIELD', 'FOOTSCRAY', 'RINGWOOD', 'MELBOURNE', 'MELTON', 'MILDURA', 'BROADFORD', 'COBRAM', 'GLEN WAVERLEY', 'MOONEE PONDS', 'BALLAN', 'COBURG', 'ROSEBUD', 'CASTLEMAINE', 'PORT FAIRY', 'ALEXANDRA', 'GREENSBOROUGH', 'STAWELL', 'ST KILDA', 'BEAUFORT', 'QUEENSCLIFF', 'LEONGATHA', 'HAMILTON', 'MALVERN', 'EUROA', 'TORQUAY', 'SWAN HILL', 'TALLANGATTA', 'WANGARATTA', 'WARRNAMBOOL', 'SALE', 'EDENHOPE', 'NUNAWADING', 'SOUTH MORANG', 'WODONGA', 'WERRIBEE', 'RICHMOND', 'LILYDALE', 'WARRACKNA

In [9]:
cities_list = np.array(cities)
print(cities_list)

['BRIGHT' 'ARARAT' 'BALLARAT' 'GREENSBOROUGH' 'WONTHAGGI' 'DROUIN'
 'SANDRINGHAM' 'BENALLA' 'CAMBERWELL' 'SUNSHINE' 'WYCHEPROOF' 'ECHUCA'
 'OFFICER' 'NARRE WARREN' 'MARYBOROUGH' 'COLAC' 'CAMPERDOWN' 'PRESTON'
 'BAIRNSDALE' 'FRANKSTON' 'KERANG' 'CAULFIELD SOUTH' 'PORTLAND'
 'BANNOCKBURN' 'BENDIGO' 'DANDENONG' 'GEELONG' 'SHEPPARTON' 'DAYLESFORD'
 'NHILL' 'ALTONA' 'HORSHAM' 'BROADMEADOWS' 'BEECHWORTH' 'CHELTENHAM'
 'WANTIRNA SOUTH' 'MORWELL' 'WEDDERBURN' 'KYNETON' 'DONCASTER' 'MANSFIELD'
 'FOOTSCRAY' 'RINGWOOD' 'MELBOURNE' 'MELTON' 'MILDURA' 'BROADFORD'
 'COBRAM' 'GLEN WAVERLEY' 'MOONEE PONDS' 'BALLAN' 'COBURG' 'ROSEBUD'
 'CASTLEMAINE' 'PORT FAIRY' 'ALEXANDRA' 'GREENSBOROUGH' 'STAWELL'
 'ST KILDA' 'BEAUFORT' 'QUEENSCLIFF' 'LEONGATHA' 'HAMILTON' 'MALVERN'
 'EUROA' 'TORQUAY' 'SWAN HILL' 'TALLANGATTA' 'WANGARATTA' 'WARRNAMBOOL'
 'SALE' 'EDENHOPE' 'NUNAWADING' 'SOUTH MORANG' 'WODONGA' 'WERRIBEE'
 'RICHMOND' 'LILYDALE' 'WARRACKNABEAL']


# Get Tourist Attractions using loop through cities and calling google api

In [10]:
# use google api to search things to do for each city

#lists to append to
name_ta = []
address_ta = []
rating_ta = []
city_ta = []
city_searched = []
for city in cities_list:
    citi = city
    city_ta.append(citi)
    my_phrase = f"tourist attraction establishment in {city} VIC, AU"
    target_url = "https://maps.googleapis.com/maps/api/place/textsearch/json"
    params = {
    "query": my_phrase,
    "key": gkey
    }
    response = requests.get(target_url, params)
    to_dos = response.json()
    counter = 0
    # print the response URL, avoid doing for public GitHub repos in order to avoid exposing key
    print("----------------------")
    #print(response.url)
    #print(json.dumps(to_dos, indent=4, sort_keys=True))
    for do in to_dos["results"]:
        counter += 1
        try:
            print("-------------------")
            print(f'{citi}')
            city_searched.append(citi)
            called = do["name"]
            locate = do["formatted_address"]
            rated = do["rating"]
            if(called, locate and rated):
                print("-------------------")
                name_ta.append(called)
                print(called)
                address_ta.append(locate)
                print(locate)
                rating_ta.append(rated)
                print(rated)
        except:
            print('not found')           
    
    if counter == 20:
        time.sleep(1.01)
        counter = 0


a
0
-------------------
SALE
-------------------
Sale and Districts Aeromodellers Club
Centre Rd, Sale VIC 3850, Australia
4.9
-------------------
SALE
-------------------
Landmark
23 Cunninghame St, Sale VIC 3850, Australia
0
-------------------
SALE
-------------------
Saint Mary's Cathedral
47-57 Foster St, Sale VIC 3850, Australia
4
-------------------
SALE
-------------------
Sale Garden Supplies & Mini-Mix Concrete
40-42 Princes Hwy, Sale VIC 3850, Australia
4.2
-------------------
SALE
-------------------
Wurruk Community Park
Wurruk VIC 3850, Australia
0
----------------------
-------------------
EDENHOPE
-------------------
Edenhope Information Centre
96 Elizabeth St, Edenhope VIC 3318, Australia
4.8
-------------------
EDENHOPE
-------------------
Kurrayah Swamp W.R
Powers Creek Rd, Edenhope VIC 3318, Australia
0
-------------------
EDENHOPE
-------------------
Parsons (Collins) Lake L.R
Sullivans Rd, Edenhope VIC 3318, Australia
0
-------------------
EDENHOPE
---------------

# Create and clean df from tourist attraction data gathered 

In [11]:
print("Total returned Tourist Attractions: "+ str(len(name_ta)))

Total returned Tourist Attractions: 1550


In [12]:
# Create a df from tourise attractions
tourist_site_df = pd.DataFrame(name_ta)
tourist_site_df["tourist_site_address"] = pd.Series(address_ta)
tourist_site_df["tourist_site_rating"] = pd.Series(rating_ta)
tourist_site_df["tourist_site_city"] = pd.Series(city_searched) # To overcome the issues of lenght not matching index


In [13]:
# Rename column 0 tourist_sites_df
tourist_site_df = tourist_site_df.rename(columns={0: "tourist_site_name"})
tourist_site_df

Unnamed: 0,tourist_site_name,tourist_site_address,tourist_site_rating,tourist_site_city
0,Huggins Lookout Point,"Huggins Rd, Bright VIC 3741, Australia",4.4,BRIGHT
1,Apex Lookout,"Bright VIC 3741, Australia",4.4,BRIGHT
2,Howitt Park,"7 Riverside Ave, Bright VIC 3741, Australia",4.6,BRIGHT
3,Tower Hill Lookout,"Bright VIC 3741, Australia",4.4,BRIGHT
4,Bright Centenary Park,"Canyon Walk, Bright VIC 3741, Australia",4.7,BRIGHT
...,...,...,...,...
1545,Warracknabeal Primary School,"5 Werrigar St, Warracknabeal VIC 3393, Australia",5.0,WARRACKNABEAL
1546,Werrigar Roadhouse & Motel,"213 Henty Hwy, Warracknabeal VIC 3393, Australia",4.3,WARRACKNABEAL
1547,Warracknabeal Secondary College,"1 Tregear St, Warracknabeal VIC 3393, Australia",0.0,WARRACKNABEAL
1548,Warracknabeal Early Learning Centre,"11 Werrigar St, Warracknabeal VIC 3393, Australia",0.0,WARRACKNABEAL


In [14]:
#drop duplicates in tourist_sites_df
tourist_site_xdbl_df=tourist_site_df.drop_duplicates(subset=['tourist_site_name', 'tourist_site_city'],keep='last')
tourist_site_xdbl_df

Unnamed: 0,tourist_site_name,tourist_site_address,tourist_site_rating,tourist_site_city
0,Huggins Lookout Point,"Huggins Rd, Bright VIC 3741, Australia",4.4,BRIGHT
1,Apex Lookout,"Bright VIC 3741, Australia",4.4,BRIGHT
2,Howitt Park,"7 Riverside Ave, Bright VIC 3741, Australia",4.6,BRIGHT
3,Tower Hill Lookout,"Bright VIC 3741, Australia",4.4,BRIGHT
4,Bright Centenary Park,"Canyon Walk, Bright VIC 3741, Australia",4.7,BRIGHT
...,...,...,...,...
1545,Warracknabeal Primary School,"5 Werrigar St, Warracknabeal VIC 3393, Australia",5.0,WARRACKNABEAL
1546,Werrigar Roadhouse & Motel,"213 Henty Hwy, Warracknabeal VIC 3393, Australia",4.3,WARRACKNABEAL
1547,Warracknabeal Secondary College,"1 Tregear St, Warracknabeal VIC 3393, Australia",0.0,WARRACKNABEAL
1548,Warracknabeal Early Learning Centre,"11 Werrigar St, Warracknabeal VIC 3393, Australia",0.0,WARRACKNABEAL


In [15]:
# save csv of tourist sites
tourist_site_xdbl_df.to_csv("Cleaned_data/tourist_sites.csv", encoding="utf-8", index=False)

# Get Resturants and cafes using loop through cities and calling google api

In [18]:
# Loop through each city to search for resturants and cafes 
#lists to append to
name_eat = []
address_eat = []
rating_eat =[]
city_search_eat = []
for city in cities_list:
    citi = city
    my_phrase = f"resturant cafe establishment in {city} VIC, AU"
    target_url = "https://maps.googleapis.com/maps/api/place/textsearch/json"
    params = {
    "query": my_phrase,
    "key": gkey
    }
    response = requests.get(target_url, params)
    foodies = response.json()
    counter = 0
    # print the response URL, avoid doing for public GitHub repos in order to avoid exposing key
    print("----------------------")
    #print(response.url)
    #print(json.dumps(to_dos, indent=4, sort_keys=True))
    for food in foodies["results"]:
        counter += 1
        try:
            print("-------------------")
            print(f'{citi}')
            city_search_eat.append(citi)
            city_eat = food
            print("-------------------")
            print(f'{counter}')
            print("-------------------")
            called = food["name"]
            locate = food["formatted_address"]
            rated = food["rating"]
            if(city_eat, called, locate and rated):
                print("-------------------")
                name_eat.append(called)
                print(called)
                address_eat.append(locate)
                print(locate)
                rating_eat.append(rated)
                print(rated)
        except:
            print('not found')            
    
    if counter == 20:
        time.sleep(1.01)
        counter = 0



UNAWADING
-------------------
8
-------------------
-------------------
Tabla
141 Springvale Rd, Nunawading VIC 3131, Australia
4.3
-------------------
NUNAWADING
-------------------
9
-------------------
-------------------
Mr President Cafe & Eatery
2 Wood St, Nunawading VIC 3131, Australia
4.5
-------------------
NUNAWADING
-------------------
10
-------------------
-------------------
Little Hugh
150 Rooks Rd, Nunawading VIC 3131, Australia
4.5
-------------------
NUNAWADING
-------------------
11
-------------------
-------------------
Toast Nunawading
144 Junction Rd, Nunawading VIC 3131, Australia
4.9
-------------------
NUNAWADING
-------------------
12
-------------------
-------------------
Mayura Thai Restaurant
291A Springfield Rd, Nunawading VIC 3131, Australia
4.5
-------------------
NUNAWADING
-------------------
13
-------------------
-------------------
Flavours of Mekong Vietnamese & Southeast Asian cuisine
156 Springvale Rd, Nunawading VIC 3131, Australia
4.6
-------

# Create and clean df from eatery data gathered 

In [19]:
print("Total returned Eateries: "+ str(len(name_eat)))

Total returned Eateries: 1452


In [20]:
#create a df from eateries
eateries_df = pd.DataFrame(name_eat)
eateries_df["eatery_address"]= pd.Series(address_eat)
eateries_df["eatery_rating"]= pd.Series(rating_eat)
eateries_df["eatery_city"]= pd.Series(city_search_eat)
# Rename column 0 eateries_df
eateries_df = eateries_df.rename(columns={0: "eatery_name"})
eateries_df

Unnamed: 0,eatery_name,eatery_address,eatery_rating,eatery_city
0,Suganya's Thai Restaurant,"2/11 Ireland St, Bright VIC 3741, Australia",4.2,BRIGHT
1,The Riverdeck Kitchen,"16 Howitt Ln, Bright VIC 3741, Australia",4.2,BRIGHT
2,Bright Chinese Restaurant,"108 Gavan St, Bright VIC 3741, Australia",3.5,BRIGHT
3,Elm Dining,"98 Gavan St, Bright VIC 3741, Australia",4.6,BRIGHT
4,Alpine Hotel Bright,"7-9 Anderson St, Bright VIC 3741, Australia",4.0,BRIGHT
...,...,...,...,...
1447,Cafe Pharmacino,"106-108 Scott St, Warracknabeal VIC 3393, Aust...",0.0,WARRACKNABEAL
1448,Cafe Peppercorn,"122 Scott St, Warracknabeal VIC 3393, Australia",4.4,WARRACKNABEAL
1449,Down Town Takeaway,"77 Scott St, Warracknabeal VIC 3393, Australia",4.0,WARRACKNABEAL
1450,MKS Warracknabeal Kebab & Pizza,"167 Scott St, Warracknabeal VIC 3393, Australia",4.4,WARRACKNABEAL


In [21]:
# Drop duplicates in eateries_df
eatery_xdbl_df=eateries_df.drop_duplicates(subset=['eatery_name', 'eatery_city'],keep='last')
eatery_xdbl_df

Unnamed: 0,eatery_name,eatery_address,eatery_rating,eatery_city
0,Suganya's Thai Restaurant,"2/11 Ireland St, Bright VIC 3741, Australia",4.2,BRIGHT
1,The Riverdeck Kitchen,"16 Howitt Ln, Bright VIC 3741, Australia",4.2,BRIGHT
2,Bright Chinese Restaurant,"108 Gavan St, Bright VIC 3741, Australia",3.5,BRIGHT
3,Elm Dining,"98 Gavan St, Bright VIC 3741, Australia",4.6,BRIGHT
4,Alpine Hotel Bright,"7-9 Anderson St, Bright VIC 3741, Australia",4.0,BRIGHT
...,...,...,...,...
1447,Cafe Pharmacino,"106-108 Scott St, Warracknabeal VIC 3393, Aust...",0.0,WARRACKNABEAL
1448,Cafe Peppercorn,"122 Scott St, Warracknabeal VIC 3393, Australia",4.4,WARRACKNABEAL
1449,Down Town Takeaway,"77 Scott St, Warracknabeal VIC 3393, Australia",4.0,WARRACKNABEAL
1450,MKS Warracknabeal Kebab & Pizza,"167 Scott St, Warracknabeal VIC 3393, Australia",4.4,WARRACKNABEAL


In [22]:
# Save csv of eateries
eatery_xdbl_df.to_csv("Cleaned_data/eateries.csv", encoding="utf-8", index=False)

# Get accommodation using loop through cities and calling google api

In [24]:
# Loop through each city to search for accommodation
#lists to append to
name_stay = []
address_stay = []
rating_stay =[]
city_search_stay = []

for city in cities_list:
    citi = city
    my_phrase = f"hotel motel accommodation near {city} VIC, AU"
    target_url = "https://maps.googleapis.com/maps/api/place/textsearch/json"
    params = {
    "query": my_phrase,
    "key": gkey
    }
    response = requests.get(target_url, params)
    stays = response.json()
    counter = 0
    # print the response URL, avoid doing for public GitHub repos in order to avoid exposing key
    print("----------------------")
    #print(response.url)
    #print(json.dumps(to_dos, indent=4, sort_keys=True))
    for stay in stays["results"]:
        counter += 1
        try:
            print("-------------------")
            print(f'{citi}')
            city_search_stay.append(citi)
            city_eat = stay
            print("-------------------")
            print(f'{counter}')
            print("-------------------")
            called = stay["name"]
            locate = stay["formatted_address"]
            rated = stay["rating"]
            if(city_eat, called, locate and rated):
                print("-------------------")
                name_stay.append(called)
                print(called)
                address_stay.append(locate)
                print(locate)
                rating_stay.append(rated)
                print(rated)
        except:
            print('not found')           
    
    if counter == 20:
        time.sleep(1.01)
        counter = 0

l VIC 3131, Australia
4.3
-------------------
NUNAWADING
-------------------
4
-------------------
-------------------
Quality Hotel Manor
669 Maroondah Hwy, Mitcham VIC 3132, Australia
3.8
-------------------
NUNAWADING
-------------------
5
-------------------
-------------------
Best Western Plus The Tudor
1101 Whitehorse Rd, Box Hill VIC 3128, Australia
3.6
-------------------
NUNAWADING
-------------------
6
-------------------
-------------------
Beau Monde International
934 Doncaster Rd, Doncaster East VIC 3109, Australia
3.9
-------------------
NUNAWADING
-------------------
7
-------------------
-------------------
Burwood East Motel
355 Blackburn Road, Melbourne (burwood East) VIC 3151, Australia
4
-------------------
NUNAWADING
-------------------
8
-------------------
-------------------
Motel Maroondah
768 Whitehorse Rd, Mont Albert VIC 3127, Australia
2.5
-------------------
NUNAWADING
-------------------
9
-------------------
-------------------
Nightcap at Manhattan Hot

# Create and clean df from accommodation data gathered 

In [25]:
print("Total returned Accommodation Options: "+ str(len(name_stay)))

Total returned Accommodation Options: 1297


In [26]:
#create a df from acommodation
accommodation_df = pd.DataFrame(name_stay)
accommodation_df["accommodation_address"]= pd.Series(address_stay)
accommodation_df["accommodation_rating"]= pd.Series(rating_stay)
accommodation_df["accommodation_city"]= pd.Series(city_search_stay)
# Rename column 0 accommodation_df
accommodation_df = accommodation_df.rename(columns={0: "accommodation_name"})
accommodation_df

Unnamed: 0,accommodation_name,accommodation_address,accommodation_rating,accommodation_city
0,Ovens Valley Motor Inn,"Great Alpine Rd, Bright VIC 3741, Australia",4.1,BRIGHT
1,Bright Motor Inn,"1-5 Delany Ave, Bright VIC 3741, Australia",4.4,BRIGHT
2,Bright Colonial Motel,"56 Gavan St, Bright VIC 3741, Australia",4.6,BRIGHT
3,John Bright Motor Inn,"10-12 Wood St, Bright VIC 3741, Australia",4.3,BRIGHT
4,Riverbank Park Motel,"69 Gavan St, Bright VIC 3741, Australia",4.1,BRIGHT
...,...,...,...,...
1292,Wild Cattle Creek Estate,"473 Warburton Hwy, Wandin North VIC 3139, Aust...",4.4,LILYDALE
1293,CountryPlace,"180 Olinda Creek Rd, Kalorama VIC 3766, Australia",4.4,LILYDALE
1294,Mount Evelyn Retreat,"4 Borang Ave, Mount Evelyn VIC 3796, Australia",4.9,LILYDALE
1295,Country Roads Motor Inn,"197 Henty Hwy, Warracknabeal VIC 3393, Australia",4.0,WARRACKNABEAL


In [27]:
#drop duplicates in accommodation_df
accommodation_xdbl_df=accommodation_df.drop_duplicates(subset=['accommodation_name', 'accommodation_city'],keep='last')
accommodation_xdbl_df

Unnamed: 0,accommodation_name,accommodation_address,accommodation_rating,accommodation_city
0,Ovens Valley Motor Inn,"Great Alpine Rd, Bright VIC 3741, Australia",4.1,BRIGHT
1,Bright Motor Inn,"1-5 Delany Ave, Bright VIC 3741, Australia",4.4,BRIGHT
2,Bright Colonial Motel,"56 Gavan St, Bright VIC 3741, Australia",4.6,BRIGHT
3,John Bright Motor Inn,"10-12 Wood St, Bright VIC 3741, Australia",4.3,BRIGHT
4,Riverbank Park Motel,"69 Gavan St, Bright VIC 3741, Australia",4.1,BRIGHT
...,...,...,...,...
1292,Wild Cattle Creek Estate,"473 Warburton Hwy, Wandin North VIC 3139, Aust...",4.4,LILYDALE
1293,CountryPlace,"180 Olinda Creek Rd, Kalorama VIC 3766, Australia",4.4,LILYDALE
1294,Mount Evelyn Retreat,"4 Borang Ave, Mount Evelyn VIC 3796, Australia",4.9,LILYDALE
1295,Country Roads Motor Inn,"197 Henty Hwy, Warracknabeal VIC 3393, Australia",4.0,WARRACKNABEAL


In [28]:
# save csv of accommodation
accommodation_xdbl_df.to_csv("Cleaned_data/lga_accommodation.csv", encoding="utf-8", index=False)

# Get average high temps for LGA cities

In [31]:
#loop through the cities to scrape climate history data for each city
weather= []
city_weather = []

for city in cities:
    #set base url
    url = f'https://www.eldersweather.com.au/climate-history/vic/{city}'
    # Retrieve page with the requests module
    response = requests.get(url)
    # Create a Beautiful Soup object
    soup = bs(response.text, 'html.parser')
    # set to find children in html table
    tables = soup.findChildren('table')
    my_table = tables[0]
    rows = my_table.findChildren(['th', 'tr'])
    for row in rows:
        cells = row.findChildren('td')       
        for cell in cells:
            value = cell.string
            value = value.replace('\n','')
            weather.append(value)
            city_weather.append(city)
            print(f'City: {city}, Weather {value}\n---------------------------------\n')
    

-----------------

City: SOUTH MORANG, Weather 14.4
---------------------------------

City: SOUTH MORANG, Weather 13.9
---------------------------------

City: SOUTH MORANG, Weather 15.2
---------------------------------

City: SOUTH MORANG, Weather 17.7
---------------------------------

City: SOUTH MORANG, Weather 20.4
---------------------------------

City: SOUTH MORANG, Weather 23.5
---------------------------------

City: SOUTH MORANG, Weather 25.8
---------------------------------

City: SOUTH MORANG, Weather 20.9
---------------------------------

City: SOUTH MORANG, Weather 14.5
---------------------------------

City: SOUTH MORANG, Weather 14.6
---------------------------------

City: SOUTH MORANG, Weather 12.9
---------------------------------

City: SOUTH MORANG, Weather 10.1
---------------------------------

City: SOUTH MORANG, Weather 7.9
---------------------------------

City: SOUTH MORANG, Weather 6.3
---------------------------------

City: SOUTH MORANG, Weather 6.0

In [32]:
print(len(weather))

4108


In [33]:
# Determining the number of observations required
obs_needed = 4108/79
obs_needed

52.0

In [34]:
# Temperature observations for each month in the year
obs = ['jan_max','feb_max','mar_max','apr_max','may_max','jun_max','jul_max','aug_max','sep_max',
       'oct_max','nov_max','dec_max','ann_max', 
       'jan_min','feb_min','mar_min','apr_min','may_min','jun_min','jul_min','aug_min','sep_min',
       'oct_min','nov_min','dec_min','ann_min',
       'jan_mm','feb_mm','mar_mm','apr_mm','may_mm','jun_mm','jul_mm','aug_mm','sep_mm','oct_mm',
       'nov_mm','dec_mm','ann_mm',
       'jan_rain_days','feb_rain_days','mar_rain_days','apr_rain_days','may_rain_days',
       'jun_rain_days','jul_rain_days','aug_rain_days','sep_rain_days','oct_rain_days',
       'nov_rain_days','dec_rain_days','ann_rain_days']

In [35]:
# loop to create enough sets of obs to match weather data to make df
obs_list=[]
counter = 0
for x in range(79):
    for ob in obs:
        obs_list.append(ob)

In [36]:
print(len(obs_list))

4108


In [37]:
#create a weather obs df

#create a df from weather
av_weather_df = pd.DataFrame(obs_list)
av_weather_df['averages'] = pd.Series(weather)
av_weather_df['city'] = pd.Series(city_weather)

# Rename column 0 accommodation_df
av_weather_df = av_weather_df.rename(columns={0: "long-term_avg_obs"})
av_weather_df

Unnamed: 0,long-term_avg_obs,averages,city
0,jan_max,29.2,BRIGHT
1,feb_max,29.1,BRIGHT
2,mar_max,25.8,BRIGHT
3,apr_max,20.8,BRIGHT
4,may_max,16.0,BRIGHT
...,...,...,...
4103,sep_rain_days,12.0,WARRACKNABEAL
4104,oct_rain_days,8.5,WARRACKNABEAL
4105,nov_rain_days,7.0,WARRACKNABEAL
4106,dec_rain_days,5.7,WARRACKNABEAL


In [38]:
# Save csv of weather obs
av_weather_df.to_csv("Cleaned_data/weather.csv", encoding="utf-8", index=False)

# Hotel Pricing extraction

In [15]:
from splinter import Browser
from bs4 import BeautifulSoup
import time
import pandas

In [None]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [None]:
url = 'https://www.holidu.com.au/s/Victoria--Australia?checkin=2020-12-19&checkout=2021-01-03&adults=2'
browser.visit(url)

In [None]:
# Iterate through all hotels

    # HTML object
html = browser.html
    # Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')
    # Retrieve all elements that contain hotel information
hotels = soup.find_all('div', class_='css-1nh1zh gallery-list-item-info')
    
hotel_price =[]
hotel_name =[]
hotel_city =[]
hotel_rating =[]
    
    # Iterate through each hotel
for hotel in hotels:
    
        # Use Beautiful Soup's find() method to navigate and retrieve attributes

    try:
        
        price = hotel.find('div', class_="css-149dwz").text
        name = hotel.find('div', class_='css-w7fdsd').find('h3').find('a')['title']
        city = hotel.find('div', class_='css-w7fdsd').find('span', class_="locationLabel").text
        rating = hotel.find('div', class_='css-7h0wkm has-number').text
        
        if (price and name and city and rating):
            hotel_price.append(price)
            hotel_name.append(name)
            hotel_city.append(city)
            hotel_rating.append(rating)
            
            # Dictionary to be inserted as a MongoDB document

        
            print(price)
            print(name)
            print(city)
            print(rating)
            print('----------------------')
    
    except Exception as e:
        print(e)

In [None]:
import pandas as pd
hotel_price_df = pd.DataFrame(
    {
    "Hotel_name":hotel_name,
    "Hotel_city":hotel_city,
    "Hotel_price":hotel_price,
    "Hotel_rating":hotel_rating
})
hotel_price_df.head()

In [None]:
hotel_price_df.to_csv('Cleaned_data/Holidu_rating.csv', index=False, encoding='utf-8')

In [None]:
hotel_price_df = pd.read_csv("Cleaned_data/Holidu_rating.csv")

In [None]:
hotel_price_df['Hotel_price'] = hotel_price_df['Hotel_price'].str.replace('A','')
hotel_price_df['Hotel_price'] = hotel_price_df['Hotel_price'].str.replace('$','')
hotel_price_df.head()

In [None]:
hotel_price_df['Hotel_price'] = hotel_price_df['Hotel_price'].str.split("/", n=0, expand=True)
hotel_price_df.head()

In [None]:
hotel_price_df.loc[hotel_price_df['Hotel_city']=="Victoria"].count()

In [None]:
hotel_price_df.count()

In [None]:
hotel_price_df= hotel_price_df[hotel_price_df.Hotel_city !='Victoria']
hotel_price_df.count()

In [None]:
hotel_price_df.head()

In [None]:
hotel_price_df['Hotel_name'] = hotel_price_df['Hotel_name'].str.replace(',','')
hotel_price_df['Hotel_city'] = hotel_price_df['Hotel_city'].str.replace(',','')

hotel_price_df.head()

In [None]:
hotel_price_df=hotel_price_df.drop(hotel_price_df.index[0])

In [None]:
hotel_price_df.to_csv('hotel_price.csv', index=False, encoding='utf-8')

In [17]:
# Removing commas from the price column
hotel_price_1 = pd.read_csv('Cleaned_data/hotel_price.csv')

In [18]:
hotel_price_1['Hotel_price'] = hotel_price_1['Hotel_price'].str.replace(',','')

In [22]:
hotel_price_1['Hotel_name'] = hotel_price_1['Hotel_name'].str.replace('~',' ')

In [26]:
# function to remove non-ASCII 
def remove_non_ascii(Hotel_name): 
    return ''.join(i for i in Hotel_name if ord(i)<128) 
 
hotel_price_1['Hotel_name'] = hotel_price_1['Hotel_name'].apply(remove_non_ascii)

In [27]:
hotel_price_1

Unnamed: 0,Hotel_name,Hotel_city,Hotel_price,Hotel_rating
0,Roses of Daylesford - Rosemary Cottage Beaut...,Daylesford,235,3.0
1,New Modern luxurious studio appartment 5mins w...,Cape Woolamai,279,5.0
2,"Maple Cottage ""The Studio"" Trentham - $99 mid ...",Trentham,172,4.8
3,Bella Loft: luxury studio apartment in the hea...,Woodend,270,4.9
4,The Gatehouse - Light filled cottage in the ...,Melbourne,200,4.9
...,...,...,...,...
631,The Sebel Residences Melbourne Docklands,Melbourne,194,4.0
632,Summer Inn Holiday Apartments,Melbourne,200,3.8
633,CBD Victoria Harbour Service Apartments,Melbourne,421,3.4
634,Clunes Guest Home,Hepburn Shire,140,3.3


In [29]:
# Exporting back to a CSV
hotel_price_1.to_csv('Cleaned_data/hotel_price.csv', index=False)