In [6]:
import os
import json
import zipfile
import numpy as np
from collections import defaultdict
from pathlib import Path
import matplotlib.pyplot as plt
matplotlib.use('TkAgg')

def create_directory(directory):
    if not directory.exists():
        directory.mkdir(parents=True)

def visualize_data(postcodes, revenues, show_visualization=True):
    print('Analysis initiated...')
    
    plt.figure(figsize=(14, 8))
    plt.bar(postcodes, revenues, color='blue')
    plt.xlabel('Postcode')
    plt.ylabel('Average Revenue (€)')
    plt.title('Average Revenue per House per Postcode')
    plt.xticks(rotation=45)
    plt.tight_layout()

    # Save the plot as an image
    image_path = images_dir / 'avg_revenue_per_hse_per_postcode.png'
    plt.savefig(image_path)

    if show_visualization:
        plt.show()
    else:
        plt.close()

try:
    # Load data from files
    rentals_file_path = Path('../src/data/data_cleaned/cleaned_rentals_data.json')
    airbnb_file_path = Path('../src/data/data_cleaned/cleaned_airbnb_data.json')

    with open(rentals_file_path, 'r') as rental_file:
        rental_data = json.load(rental_file)

    with open(airbnb_file_path, 'r') as airbnb_file:
        airbnb_data = json.load(airbnb_file)

    # Initialize dictionaries
    revenue_per_postcode = defaultdict(list)
    entry_count_per_postcode = defaultdict(int)

    # Process Airbnb data
    for entry in airbnb_data:
        postcode = entry.get('zipcode', None)
        if postcode:
            revenue_per_postcode[postcode].append(entry)
            entry_count_per_postcode[postcode] += 1

    # Process rentals data
    for entry in rental_data:
        postcode = entry.get('postalCode', None)
        if postcode:
            revenue_per_postcode[postcode].append(entry)
            entry_count_per_postcode[postcode] += 1

    # Calculate average revenue per house per postcode
    average_revenue_per_postcode = {}
    for postcode, entries in revenue_per_postcode.items():
        revenues = [entry.get('rent', entry.get('price')) for entry in entries if '_id' in entry]
        if revenues:
            average_revenue_per_postcode[postcode] = sum(revenues) / len(revenues)

    # Prepare the JSON output
    output_list = []
    for postcode, avg_revenue in average_revenue_per_postcode.items():
        formatted_avg_revenue = f"€{avg_revenue:.1f}"
        entry_count = entry_count_per_postcode[postcode]

        ids = [entry['_id'] for entry in revenue_per_postcode[postcode] if '_id' in entry]
        ids_str = ", ".join([f'"{id}"' for id in ids])

        output_dict = {
            "postalcode": postcode,
            "avg": formatted_avg_revenue,
            "_id": f"[{ids_str}]",
            "entry_count": entry_count
        }
        output_list.append(output_dict)

    # Define output file paths
    cwd = os.getcwd()
    root_path = Path(cwd).parent
    images_dir = root_path / 'scratch' / 'v_images' 
    output_directory = Path('../src/data/calculations/')
    output_file_path = output_directory / 'avg_revenue_per_hse_per_postcode.json'
    zip_file_path = output_directory / 'avg_revenue_per_hse_per_postcode.zip'

    # Create directories if they don't exist
    create_directory(output_directory)
    create_directory(images_dir)

    # Write the JSON output to a file
    with open(output_file_path, 'w') as output_file:
        json.dump(output_list, output_file)

    # Visualize average revenue per postcode
    postcodes = list(average_revenue_per_postcode.keys())
    revenues = list(average_revenue_per_postcode.values())

    # ===================================================
    # ============== Show/Hide Visualisation ===========
    visualize_data(postcodes, revenues, show_visualization=False)
    # ============== Show/Hide Visualisation ===========
    # ===================================================

    # Zip the JSON file
    with zipfile.ZipFile(zip_file_path, 'w') as zip_file:
        zip_file.write(output_file_path, arcname=output_file_path.name)

    # Delete the JSON file
    output_file_path.unlink()

    print("Image and zip file created successfully.")

except Exception as e:
    print("An error occurred:", e)

Analysis initiated...
Image and zip file created successfully.


<h2 style="text-align:center;font-weight:bold;color: #FFFFFF;background: #FFFFFF;
text-shadow: 1px 3px 0 #969696, 1px 13px 5px #aba8a8;background:linear-gradient(to bottom, rgba(40, 40, 40, 0.05), rgba(40, 40, 40, 0.1));box-shadow: 0px 4px 8px rgba(0, 0, 0, 0.1);padding:10px;">Average Revenue per House per Postcode</h2>

![Average Revenue per House per Postcode](v_images/avg_revenue_per_hse_per_postcode.png)

<p style="font-family:italic"><strong><blockquote style="font-style:italic; font-weight:bold;">The bar chart visualizes the average revenue per house across different postcodes. Each bar represents a postcode, and the height of the bar corresponds to the average revenue. This visualization helps to identify areas with higher and lower average revenues, guiding investment decisions based on potential profitability. <hr> 
Investing in properties located in postcodes with higher average revenue offers promising returns. These postcodes are likely to have a higher demand for housing, indicating a strong market and potential for appreciation. It's crucial for investors to consider these insights when strategizing their property investments. Focusing on postcodes with higher average revenues can lead to better rental yields and capital appreciation, making it a strategic choice for maximizing returns.</blockquote></strong></p>


In [31]:
def visualize_data(is_data_ready=True, show_visualization=True):
    print('data visualization initiated...')
    # Create directories if they don't exist
    if not images_dir.exists():
        images_dir.mkdir(parents=True)
    
    # Read Airbnb data
    with open(airbnb_file_path, 'r') as f:
        airbnb_data = json.load(f)
    
    # Read Rentals data
    with open(rentals_file_path, 'r') as f:
        rentals_data = json.load(f)
    
    if show_visualization:
        print('Visualizing Airbnb data...')
        visualize_airbnb(airbnb_data, images_dir)
            
        print('Visualizing Rentals data...')
        visualize_rentals(rentals_data, images_dir)
            
        plt.pause(0.1)
        plt.show()
    else:
        print('Deactivating rendering...')
        print('Visualization plot points saved as images')
        

def visualize_airbnb(data, images_dir):
    # Visualizing 'room_type' distribution
    room_types = [entry['room_type'] for entry in data]
    room_type_counts = {room_type: room_types.count(room_type) for room_type in set(room_types)}

    labels = room_type_counts.keys()
    colors = ['blue', 'green', 'red', 'purple'][:len(labels)]  # Color palette

    plt.figure(figsize=(10, 6))
    plt.pie(room_type_counts.values(), labels=labels, colors=colors, autopct='%1.1f%%', pctdistance=0.85)
    plt.title('Distribution of Room Types in Airbnb Data')
    plt.tight_layout()
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), labels=[f'{l} - {c}' for l, c in zip(labels, colors)])
    plt.savefig(images_dir / 'airbnb_room_type_distribution.png')
    plt.show()
    plt.close()
    
    # Visualizing 'price' distribution
    prices = [entry['price'] for entry in data]
    plt.figure(figsize=(12, 6))
    n, bins, patches = plt.hist(prices, bins=30, color='blue', edgecolor='black')
    plt.title('Distribution of Prices in Airbnb Data')
    plt.xlabel('Price (€)')
    plt.ylabel('Frequency')
    plt.tight_layout()
    plt.text(0.95, 0.95, 'Price', transform=plt.gca().transAxes, fontsize=12, verticalalignment='top', horizontalalignment='right')
    plt.savefig(images_dir / 'airbnb_price_distribution.png')
    plt.show()
    plt.close()

def visualize_rentals(data, images_dir):
    # Visualizing 'propertyType' distribution
    property_types = [entry['propertyType'] for entry in data]
    property_type_counts = {property_type: property_types.count(property_type) for property_type in set(property_types)}

    labels = property_type_counts.keys()
    colors = ['green', 'blue', 'red', 'purple'][:len(labels)]  # Color palette

    plt.figure(figsize=(10, 6))
    bars = plt.bar(labels, property_type_counts.values(), color=colors)
    plt.title('Distribution of Property Types in Rentals Data')
    plt.xlabel('Property Type')
    plt.ylabel('Frequency')
    plt.xticks(rotation=45)
    
    # Custom legend for bar graphs
    plt.legend(bars, [f'{l} - {c}' for l, c in zip(labels, colors)], loc='upper right')
    
    plt.tight_layout()
    plt.savefig(images_dir / 'rentals_property_type_distribution.png')
    plt.show()
    plt.close()
    
    # Visualizing 'rent' distribution
    rents = [entry['rent'] for entry in data]
    plt.figure(figsize=(12, 6))
    n, bins, patches = plt.hist(rents, bins=30, color='orange', edgecolor='black')
    plt.title('Distribution of Rents in Rentals Data')
    plt.xlabel('Rent (€)')
    plt.ylabel('Frequency')
    plt.tight_layout()
    plt.text(0.95, 0.95, 'Rent', transform=plt.gca().transAxes, fontsize=12, verticalalignment='top', horizontalalignment='right')
    plt.savefig(images_dir / 'rentals_rent_distribution.png')
    plt.show()
    plt.close()


visualize_data(is_data_ready=True, show_visualization=False)


data visualization initiated...
Deactivating rendering...
Visualization plot points saved as images


<!-- <h2 style="text-align:center;font-weight:bold;color: #FFFFFF;background: #FFFFFF;
text-shadow: 1px 3px 0 #969696, 1px 13px 5px #aba8a8;background:linear-gradient(to bottom, rgba(40, 40, 40, 0.05), rgba(40, 40, 40, 0.1));box-shadow: 0px 4px 8px rgba(0, 0, 0, 0.1);padding:10px;">Distribution of Room Types in Airbnb Data</h2>

![Distribution of Room Types in Airbnb Data](v_images/airbnb_room_type_distribution.png)

<p style="font-family:italic"><strong><blockquote style="font-style:italic; font-weight:bold;">The pie chart visualizes the distribution of different room types in the Airbnb listings dataset. Each slice represents a room type, and the size of the slice corresponds to the percentage of that room type in the dataset. This visualization helps to understand the variety and prevalence of different room types.</blockquote></strong></p> 

<h2 style="text-align:center;font-weight:bold;color: #FFFFFF;background: #FFFFFF;
text-shadow: 1px 3px 0 #969696, 1px 13px 5px #aba8a8;background:linear-gradient(to bottom, rgba(40, 40, 40, 0.05), rgba(40, 40, 40, 0.1));box-shadow: 0px 4px 8px rgba(0, 0, 0, 0.1);padding:10px;">Distribution of Prices in Airbnb Data</h2>

![Distribution of Prices in Airbnb Data](v_images/airbnb_price_distribution.png)

<p style="font-family:italic"><strong><blockquote style="font-style:italic; font-weight:bold;">This histogram shows the distribution of prices for Airbnb listings. It provides insights into the range and frequency of listing prices in the dataset. The x-axis represents the price range, and the y-axis represents the frequency of listings within each price range.</blockquote></strong></p> 

<h2 style="text-align:center;font-weight:bold;color: #FFFFFF;background: #FFFFFF;
text-shadow: 1px 3px 0 #969696, 1px 13px 5px #aba8a8;background:linear-gradient(to bottom, rgba(40, 40, 40, 0.05), rgba(40, 40, 40, 0.1));box-shadow: 0px 4px 8px rgba(0, 0, 0, 0.1);padding:10px;">Distribution of Property Types in Rentals Data</h2>

![Distribution of Property Types in Rentals Data](v_images/rentals_property_type_distribution.png)

 -->

<h2 style="text-align:center;font-weight:bold;color: #FFFFFF;background: #FFFFFF;
text-shadow: 1px 3px 0 #969696, 1px 13px 5px #aba8a8;background:linear-gradient(to bottom, rgba(40, 40, 40, 0.05), rgba(40, 40, 40, 0.1));box-shadow: 0px 4px 8px rgba(0, 0, 0, 0.1);padding:10px;">Business Overview</h2>

The Airbnb dataset offers valuable insights into the rental property market, focusing on price trends across different property types. Understanding these statistics can provide significant advantages for potential investors looking to make informed decisions in the rental property market.

<h3 style="text-decoration:underline">Room Types Distribution</h3>

![Room Types Distribution](v_images/airbnb_room_type_distribution.png)

<p style="font-family:italic"><strong><blockquote style="font-style:italic; font-weight:bold;">Majority of listings are 'Entire home/apt' and 'Private room', indicating a significant preference for more private accommodation types among guests.</blockquote> </strong></p> 

<h3 style="text-decoration:underline">Price Analysis for Room Types</h3>

![Price Analysis for Room Types](v_images/airbnb_price_distribution.png)

<p style="font-family:italic"><strong><blockquote style="font-style:italic; font-weight:bold;">Higher prices are observed for 'Entire home/apt' listings, suggesting a correlation between room type and pricing. Additionally, most listings are priced between €50 and €200, indicating a focus on mid-range pricing.</blockquote> </strong></p>


<h3 style="text-decoration:underline">Distribution of Property Types in Rentals Data</h3>

![Distribution of Property Types](v_images/rentals_property_type_distribution.png)

<p style="font-family:italic"><strong><blockquote style="font-style:italic; font-weight:bold;">'Room' and 'Apartment' are the most common property types, suggesting a preference for compact and private rental options.</blockquote> </strong></p> 

<h3 style="text-decoration:underline">Distribution of Rents in Rentals Data</h3>

![Distribution of Rents](v_images/rentals_rent_distribution.png)

<p style="font-family:italic"><strong><blockquote style="font-style:italic; font-weight:bold;">Most rental prices range between €200 and €800, with fewer high-end options available.</blockquote> </strong></p> 

<h3 style="text-decoration:underline">Investment Implications</h3>

- **Targeting Strategy**: Focus on promoting private accommodation types and emphasize 'Room' and 'Apartment' listings.
  
- **Pricing Strategy**: Consider pricing strategies based on room type and target mid-range pricing to attract a wider audience.
  
- **Market Gap**: Explore potential opportunities in the luxury or high-end market segment.
  
- **Risk Assessment**: Evaluate the market's volatility and adjust investment strategies accordingly.

<h3 style="text-decoration:underline">Conclusion</h3>
<p style="font-family:italic"><strong><blockquote style="font-style:italic; font-weight:bold;">The insights derived from the Airbnb and rentals datasets provide a comprehensive understanding of the rental property market. By leveraging these insights, businesses can tailor their strategies to meet market demand, optimize pricing, and capitalize on growth opportunities. Whether it's targeting specific accommodation types, adjusting pricing strategies, or exploring new market segments, these insights serve as a valuable guide to inform business decisions.</blockquote> </strong></p>
