In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns

# File paths
airbnb_file = "/Users/mustafa/Documents/GitHub/assessment-rent-airbnb/data/airbnb.csv"
rentals_file = "/Users/mustafa/Documents/GitHub/assessment-rent-airbnb/data/rentals.json"
geo_postcodes_file = "/Users/mustafa/Documents/GitHub/assessment-rent-airbnb/data/geo/post_codes.geojson"

# Load data
airbnb_data = pd.read_csv(airbnb_file)
rental_data = pd.read_json(rentals_file)
geojson_data = gpd.read_file(geo_postcodes_file)



In [None]:
# Data cleaning
airbnb_data["price"] = airbnb_data["price"].str.replace("€", "").str.replace(",", "").astype(float)
rental_data["rent"] = rental_data["rent"].str.replace("€", "").str.replace(",", "").str.extract("(\d+)").astype(float)

# Calculate potential revenue per postcode
airbnb_revenue = airbnb_data.groupby("zipcode")["price"].mean().reset_index()
rental_revenue = rental_data.groupby("postalCode")["rent"].mean().reset_index()

# Merge Airbnb and rental revenue with GeoJSON
geojson_data = geojson_data.rename(columns={"pc4_code": "zipcode"})
geojson_data["zipcode"] = geojson_data["zipcode"].astype(str)
airbnb_geo = geojson_data.merge(airbnb_revenue, left_on="zipcode", right_on="zipcode", how="left")
rental_geo = geojson_data.merge(rental_revenue, left_on="zipcode", right_on="postalCode", how="left")



In [None]:
# Plot Airbnb revenue per postcode
plt.figure(figsize=(12, 8))
airbnb_geo.plot(column="price", cmap="YlGnBu", legend=True, edgecolor="black")
plt.title("Airbnb Revenue per Postcode", fontsize=16)
plt.axis("off")
plt.show()

# Plot Rental revenue per postcode
plt.figure(figsize=(12, 8))
rental_geo.plot(column="rent", cmap="OrRd", legend=True, edgecolor="black")
plt.title("Rental Revenue per Postcode", fontsize=16)
plt.axis("off")
plt.show()

# Combined Bar Plot for Comparison
combined_data = airbnb_revenue.merge(rental_revenue, left_on="zipcode", right_on="postalCode", how="inner")
combined_data.columns = ["Postcode", "Airbnb Revenue", "Rental Revenue"]
combined_data = combined_data.melt(id_vars=["Postcode"], var_name="Revenue Type", value_name="Revenue")

plt.figure(figsize=(14, 8))
sns.barplot(data=combined_data, x="Postcode", y="Revenue", hue="Revenue Type")
plt.title("Comparison of Airbnb and Rental Revenue per Postcode", fontsize=16)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()