In [None]:
# Dependencies
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as st
import requests
import seaborn as sns
from pprint import pprint
import numpy as np
from api_keys import geoapify_key

In [None]:
# Import CSV as a Dataframe.
city_data_df = pd.read_csv("cleaned_perth_data.csv")
city_data_df

In [None]:
# 1. How does the age of a property and its distance from the CBD relate to its selling price?

In [None]:

# Grid of subplots
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 12))

# Scatter plot of property age
sns.scatterplot(data=city_data_df, x='BUILD_YEAR', y='PRICE', ax=axes[0])
axes[0].set_title('Property Age vs. Price')

# Scatter plot of CBD distance
sns.scatterplot(data=city_data_df, x='CBD_DIST', y='PRICE', ax=axes[1])
axes[1].set_title('CBD Distance vs. Price')

plt.tight_layout()
plt.show()


In [None]:
selected_data

In [None]:
# Assuming you have a DataFrame named city_data_df
selected_columns = ['NEAREST_SCH_RANK', 'PRICE']
selected_data = city_data_df[selected_columns]

# Drop rows with missing values in 'NEAREST_SCH_RANK' or 'PRICE' columns
selected_data = selected_data.dropna()

# Grouping data by NEAREST_SCH_RANK and calculating the mean price
average_prices_by_rank = selected_data.groupby('NEAREST_SCH_RANK')['PRICE'].mean()

# Sorting the data by rank
average_prices_by_rank = average_prices_by_rank.sort_index()

# Calculate Pearson correlation coefficient (r value) and p-value
corr_coeff, p_value = st.pearsonr(selected_data['NEAREST_SCH_RANK'], selected_data['PRICE'])

# Create a regression plot with correlation line
plt.figure(figsize=(10, 6))
sns.regplot(data=average_prices_by_rank.reset_index(), x='NEAREST_SCH_RANK', y='PRICE', scatter_kws={'s': 50}, line_kws={'color': 'red'})
plt.title('Impact of Nearest School Rank on Average Housing Prices')
plt.xlabel('Nearest School Rank')
plt.ylabel('Average Housing Price')
plt.annotate(f'Correlation Coefficient (r) = {corr_coeff:.2f}', xy=(0.05, 0.85), xycoords='axes fraction')
plt.tight_layout()
plt.show()


In [None]:
selected_columns 


In [None]:
# 2. What is the impact of school proximity and transportation availability on housing prices?

In [None]:

# Selecting columns for visualization
selected_columns = ['NEAREST_STN_DIST', 'CBD_DIST', 'PRICE']
selected_data = city_data_df[selected_columns]

# Create a regression plot with correlation lines
plt.figure(figsize=(10, 6))

# Regression plot for NEAREST_STN_DIST (green)
sns.regplot(data=selected_data, x='NEAREST_STN_DIST', y='PRICE', scatter_kws={'s': 50}, line_kws={'color': 'green'}, label='Nearest Train Station Distance')

# Regression plot for CBD_DIST (blue)
sns.regplot(data=selected_data, x='CBD_DIST', y='PRICE', scatter_kws={'s': 50}, line_kws={'color': 'blue'}, label='Distance from CBD')

plt.title('Impact of Nearest Train Station Distance and CBD Distance on Housing Prices')
plt.xlabel('Distance')
plt.ylabel('Housing Price')

# Annotate the plot with correlation information
corr_coeff_stn, p_value_stn = st.pearsonr(selected_data['NEAREST_STN_DIST'], selected_data['PRICE'])
corr_coeff_cbd, p_value_cbd = st.pearsonr(selected_data['CBD_DIST'], selected_data['PRICE'])

plt.annotate(f'Correlation (Train Station Dist) = {corr_coeff_stn:.2f}', xy=(0.05, 0.85), xycoords='axes fraction')
plt.annotate(f'Correlation (CBD Dist) = {corr_coeff_cbd:.2f}', xy=(0.05, 0.78), xycoords='axes fraction')

plt.legend()
plt.tight_layout()
plt.show()


In [None]:
# . How do housing design features and land size influence property prices?

In [None]:

# Selecting columns for analysis
selected_columns = ['PRICE', 'LAND_AREA', 'FLOOR_AREA', 'BEDROOMS', 'BATHROOMS']
selected_data = city_data_df[selected_columns]

# Removing rows with missing values in selected columns
selected_data = selected_data.dropna()

# EDA: Scatter plots for housing design features vs. price
plt.figure(figsize=(12, 8))

plt.subplot(2, 2, 1)
sns.scatterplot(data=selected_data, x='BEDROOMS', y='PRICE')

plt.subplot(2, 2, 2)
sns.scatterplot(data=selected_data, x='BATHROOMS', y='PRICE')

plt.subplot(2, 2, 3)
sns.scatterplot(data=selected_data, x='FLOOR_AREA', y='PRICE')

plt.subplot(2, 2, 4)
sns.scatterplot(data=selected_data, x='LAND_AREA', y='PRICE')

plt.tight_layout()
plt.show()

# Correlation analysis
correlation_matrix = selected_data.corr()
print(correlation_matrix)

# Regression analysis
X = selected_data[['LAND_AREA', 'FLOOR_AREA', 'BEDROOMS', 'BATHROOMS']]
X = sm.add_constant(X)  # Add constant term for intercept
y = selected_data['PRICE']

model = sm.OLS(y, X).fit()
print(model.summary())


In [None]:
import pandas as pd
import folium
from folium.plugins import MarkerCluster

# Load property data
# Assuming you have the data loaded in the DataFrame city_data_df

# Selecting columns for analysis
selected_columns = ['POSTCODE', 'LATITUDE', 'LONGITUDE', 'PRICE', 'BEDROOMS', 'BATHROOMS', 'LAND_AREA', 'ADDRESS', 'NEAREST_SCH_RANK', 'NEAREST_SCH']
selected_data = city_data_df[selected_columns].copy()

# Define a function to categorize property prices
def categorize_price(price):
    if price <= 500000:
        return 'Low'
    elif price <= 1000000:
        return 'Medium'
    else:
        return 'High'

selected_data['PRICE_CATEGORY'] = selected_data['PRICE'].apply(categorize_price)

# Define color mapping for price categories
color_mapping = {
    'Low': 'yellow',
    'Medium': 'green',
    'High': 'red'
}

# Create a folium map centered on Perth
perth_map = folium.Map(location=[-31.953512, 115.857048], zoom_start=12)

# Add colored postal code areas to the map
marker_cluster = MarkerCluster().add_to(perth_map)

for _, row in selected_data.iterrows():
    folium.CircleMarker(
        location=[row['LATITUDE'], row['LONGITUDE']],
        radius=8,
        color=None,
        fill=True,
        fill_color=color_mapping[row['PRICE_CATEGORY']],
        fill_opacity=0.7,
        popup=f"Address: {row['ADDRESS']}<br>"
              f"Price: ${row['PRICE']:,}<br>"
              f"Bedrooms: {row['BEDROOMS']}<br>"
              f"Bathrooms: {row['BATHROOMS']}<br>"
              f"Land Area: {row['LAND_AREA']} sqft<br>"
              f"Nearest School: {row['NEAREST_SCH']}<br>"
              f"School Ranking: {row['NEAREST_SCH_RANK']}"
    ).add_to(marker_cluster)

# Show the map
perth_map.save('perth_property_map.html')  # Save as an HTML file
perth_map


In [None]:
# import folium


# # Create a folium map centered on Perth
# perth_map = folium.Map(location=[-31.953512, 115.857048], zoom_start=12)

# # Define color mapping for price categories
# color_mapping = {
#     'Low': 'yellow',
#     'Medium': 'green',
#     'High': 'red'
# }

# for _, row in selected_data.iterrows():
#     # Create a custom CircleMarker for each property
#     folium.CircleMarker(
#         location=[row['LATITUDE'], row['LONGITUDE']],
#         radius=8,
#         color=None,
#         fill=True,
#         fill_color=color_mapping[row['PRICE_CATEGORY']],
#         fill_opacity=0.7,
#         popup=f"Address: {row['ADDRESS']}<br>"
#               f"Price: ${row['PRICE']:,}<br>"
#               f"Bedrooms: {row['BEDROOMS']}<br>"
#               f"Bathrooms: {row['BATHROOMS']}<br>"
#               f"Land Area: {row['LAND_AREA']} sqmeter<br>"
#               f"Nearest School: {row['NEAREST_SCH']}<br>"
#               f"School Ranking: {row['NEAREST_SCH_RANK']}"
#     ).add_to(perth_map)

# # Add the legend to the map
# legend_html = """
# <div style="position: fixed; bottom: 16px; right: 16px; background-color: white; padding: 4px; border: 1px solid black; z-index: 1000;">
#   <h4 style="font-size: 14px;">Price Range</h4>
#   <i style="background: yellow; border-radius: 50%; display: inline-block; width: 9px; height: 9px;"></i> Low (< $500,000)<br>
#   <i style="background: green; border-radius: 50%; display: inline-block; width: 9px; height: 9px;"></i> Medium ($500,001 - $1,000,000)<br>
#   <i style="background: red; border-radius: 50%; display: inline-block; width: 9px; height: 9px;"></i> High (> $1,000,000)<br>
# </div>
# """

# perth_map.get_root().html.add_child(folium.Element(legend_html))

# # Show the map
# # perth_map.save('perth_property_map.html')  # Save as an HTML file
# perth_map
