# Crime Statistics

In [None]:
# Setup Web Driver: Initialize the Chrome WebDriver.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import numpy as np

### Suburb List
suburb.xlsx is filtered from all_perth_310121.csv by removing duplicates in excel

In [None]:
# Load the Excel file
file_path = 'Resources/suburb.xlsx'
df = pd.read_excel(file_path)

# Convert a specific column to a list
column_list = df['SUBURB'].tolist()

# List of suburbs to search
suburbs = column_list

In [None]:
# Set up the web driver (make sure you have the correct path to your web driver)
driver = webdriver.Chrome()

# Define url
url = 'https://www.police.wa.gov.au/Crime/CrimeStatistics#/start'

# Open the website
driver.get(url)

### Crime Data
- The crime data selected is by calender year instead of financial year. This is to ensure the annual timeline starts from January to December rather than June to July the following year.

- The crime data is also filtered by each suburb.


In [None]:
# Wait for the Calendar Year button to be clickable and click it
calendar_year_button = WebDriverWait(driver, 20).until(
    EC.element_to_be_clickable((By.CSS_SELECTOR, 'button.btn.btn-primary.btn-xs.btn-primary-crimestats[data-ng-click="selectCalendarYear()"]'))
)
calendar_year_button.click()

In [None]:
# Click on Suburb
driver.find_element(By.CSS_SELECTOR, '.btn-locality-group[ng-model="location"][uib-btn-radio="\'Suburb\'"]').click()

# Scrape data from WA Police Website
The data from 2024 is removed as it is still incomplete and might impact the mean crime data across the years if required.

In [None]:
# Function to get crime statistics for a given suburb
def get_crime_statistics(suburb):
    try:   

        # Find the input field by its ID and input the suburb name
        input_field = driver.find_element(By.ID, 'searchLocality_value')
        input_field.clear()  # Clear any previous input
        input_field.send_keys(suburb)
        
        # Wait for the dropdown list to load
        time.sleep(0.5)
        
        # Find and select the correct item from the dropdown
        dropdown_item = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, f"//div[@class='angucomplete-title ng-binding ng-scope' and contains(text(), '{suburb}')]"))
        )
        dropdown_item.click()  

        # Wait for the results to load
        time.sleep(0.5)
        
        # Now use pandas to read the HTML source
        tables = pd.read_html(driver.page_source, attrs={'class': 'table table-striped table-condensed table-offences-stats'})

        # Check if any tables were found
        if tables:
            df = tables[0]
        else:
            print("Table not found.")

        # Filter data frame
        df_filter = df.iloc[0:18, [0,11,12,13,14,15,16,17,18,19]]

        # Remove '-' from all string columns
        df_clean = df_filter.apply(lambda x: x.str.replace('-', '') if x.dtype == "object" else x)

        # Convert data types from object to integer
        df_clean = df_clean.astype({
            '2015': int,
            '2016': int,
            '2017': int,
            '2018': int,
            '2019': int,
            '2020': int,
            '2021': int,
            '2022': int,
            '2023': int
        })

        # Replace 0 with NaN
        df_final = df_clean.replace(0, np.nan, inplace=False)

        # Assuming 'df' is your DataFrame
        filename = f'Resources/suburb_crime/{suburb}.csv'
        df_final.to_csv(filename, index=True)

    except Exception as e:
        print(f"An error occurred for suburb {suburb}: {e}")
        return None

### Convert to .csv and saved
The suburb crime files are saved under Resources/suburb_crime

In [None]:
# Loop through the list of suburbs and get the crime statistics
for suburb in suburbs:
    print(f"Getting data for {suburb}...")
    get_crime_statistics(suburb)

In [None]:
# Close the driver
driver.quit()

In [None]:
# # Function to get crime statistics for a given suburb
# def get_crime_statistics(suburb):
#     try:     
#         # Find the input field by its ID and input the suburb name
#         input_field = driver.find_element(By.ID, 'searchLocality_value')
#         input_field.clear()  # Clear any previous input
#         input_field.send_keys(suburb)
        
#         # Wait for the dropdown list to load
#         time.sleep(1)
        
#         # Find and select the correct item from the dropdown
#         dropdown_item = WebDriverWait(driver, 10).until(
#             EC.presence_of_element_located((By.XPATH, f"//div[@class='angucomplete-title ng-binding ng-scope' and contains(text(), '{suburb}')]"))
#         )
#         dropdown_item.click()  

#         # Wait for the results to load
#         time.sleep(1)
        
#         # Extract the crime statistics data
#         crime_table = driver.find_element(By.ID, 'offences-year')
#         data = []
#         for row in crime_table.find_elements(By.TAG_NAME, 'tr'):
#             cols = row.find_elements(By.TAG_NAME, 'td')
#             cols = [ele.text for ele in cols]
#             data.append(cols)
        
#         return data

#     except Exception as e:
#         print(f"An error occurred for suburb {suburb}: {e}")
#         return None

In [None]:
# # Loop through the list of suburbs and get the crime statistics
# all_data = {}
# for suburb in suburbs:
#     print(f"Getting data for {suburb}...")
#     data = get_crime_statistics(suburb)
#     if data:
#         all_data[suburb] = data

In [None]:
# # Close the driver
# driver.quit()

In [None]:
# # Convert the data to a Pandas DataFrame
# df = pd.DataFrame.from_dict(all_data, orient='index')
# df

In [None]:
# # Assuming 'df' is your DataFrame
# df.to_csv('path_to_file.csv', index=True)