# RENTA Quickstart Tutorial

This notebook provides an interactive introduction to RENTA (Real Estate Network and Trend Analyzer). You'll learn how to:

1. Set up and configure RENTA
2. Download and process Airbnb data
3. Scrape property listings from Zonaprop
4. Enrich properties with rental market data
5. Generate AI-powered investment summaries
6. Export and analyze results

## Prerequisites

- Python 3.10+
- RENTA installed (`pip install renta`)
- AWS credentials configured
- AWS Bedrock model access enabled

## 1. Setup and Imports

In [None]:
# Install RENTA if not already installed
# !pip install renta

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Configure pandas display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 50)

# Configure matplotlib
plt.style.use('default')
plt.rcParams['figure.figsize'] = (12, 8)

print("✓ Libraries imported successfully")

## 2. Initialize RENTA Analyzer

In [None]:
from renta import RealEstateAnalyzer
from renta.exceptions import *

# Initialize analyzer with default configuration
try:
    analyzer = RealEstateAnalyzer()
    print("✓ RealEstateAnalyzer initialized successfully")
    
    # Show configuration summary
    config_summary = {
        'AWS Region': analyzer.config.get('aws.region'),
        'Bedrock Model': analyzer.config.get('aws.bedrock.model_id'),
        'Cache Directory': analyzer.config.get('data.cache_dir'),
        'Matching Radius': f"{analyzer.config.get('airbnb.matching.radius_km')} km"
    }
    
    print("\nConfiguration Summary:")
    for key, value in config_summary.items():
        print(f"  {key}: {value}")
        
except ConfigurationError as e:
    print(f"❌ Configuration error: {e}")
    print("Please check your configuration file")
except Exception as e:
    print(f"❌ Initialization failed: {e}")

## 3. Download Airbnb Data

First, we'll download and process Airbnb rental data for Buenos Aires from InsideAirbnb.

In [None]:
# Download Airbnb data (this may take a few minutes on first run)
try:
    print("Downloading Airbnb data...")
    airbnb_data = analyzer.download_airbnb_data(force=False)  # Use cached if available
    
    print(f"✓ Airbnb data loaded: {len(airbnb_data):,} listings")
    print(f"  Memory usage: {airbnb_data.memory_usage(deep=True).sum() / 1024 / 1024:.1f} MB")
    
    # Show data overview
    print("\nAirbnb Data Overview:")
    print(f"  Columns: {len(airbnb_data.columns)}")
    print(f"  Date range: {airbnb_data['last_review'].min()} to {airbnb_data['last_review'].max()}")
    print(f"  Price range: ${airbnb_data['price_usd_per_night'].min():.0f} - ${airbnb_data['price_usd_per_night'].max():.0f} per night")
    
except AirbnbDataError as e:
    print(f"❌ Airbnb data download failed: {e}")
    print("You can continue with the tutorial using sample data")
    airbnb_data = None

### Explore Airbnb Data

In [None]:
if airbnb_data is not None:
    # Show sample data
    print("Sample Airbnb Listings:")
    display(airbnb_data[[
        'id', 'room_type', 'price_usd_per_night', 'neighbourhood',
        'review_score_rating', 'estimated_nights_booked'
    ]].head())
    
    # Visualize price distribution
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Price distribution
    axes[0, 0].hist(airbnb_data['price_usd_per_night'], bins=50, alpha=0.7, color='skyblue')
    axes[0, 0].set_title('Price Distribution (USD per night)')
    axes[0, 0].set_xlabel('Price (USD)')
    axes[0, 0].set_ylabel('Frequency')
    
    # Room type distribution
    room_type_counts = airbnb_data['room_type'].value_counts()
    axes[0, 1].pie(room_type_counts.values, labels=room_type_counts.index, autopct='%1.1f%%')
    axes[0, 1].set_title('Room Type Distribution')
    
    # Occupancy distribution
    occupancy_counts = airbnb_data['estimated_nights_booked'].value_counts()
    axes[1, 0].bar(occupancy_counts.index, occupancy_counts.values, color=['red', 'orange', 'green'])
    axes[1, 0].set_title('Occupancy Level Distribution')
    axes[1, 0].set_xlabel('Occupancy Level')
    axes[1, 0].set_ylabel('Number of Listings')
    
    # Top neighborhoods
    top_neighborhoods = airbnb_data['neighbourhood'].value_counts().head(10)
    axes[1, 1].barh(range(len(top_neighborhoods)), top_neighborhoods.values)
    axes[1, 1].set_yticks(range(len(top_neighborhoods)))
    axes[1, 1].set_yticklabels(top_neighborhoods.index)
    axes[1, 1].set_title('Top 10 Neighborhoods by Listing Count')
    axes[1, 1].set_xlabel('Number of Listings')
    
    plt.tight_layout()
    plt.show()
    
    # Summary statistics
    print("\nAirbnb Data Summary:")
    summary_stats = {
        'Total Listings': len(airbnb_data),
        'Average Price': f"${airbnb_data['price_usd_per_night'].mean():.2f}",
        'Median Price': f"${airbnb_data['price_usd_per_night'].median():.2f}",
        'Entire Homes': len(airbnb_data[airbnb_data['room_type'] == 'Entire home/apt']),
        'Private Rooms': len(airbnb_data[airbnb_data['room_type'] == 'Private room']),
        'High Occupancy': len(airbnb_data[airbnb_data['estimated_nights_booked'] == 'high']),
        'Average Rating': f"{airbnb_data['review_score_rating'].mean():.1f}/5.0"
    }
    
    for key, value in summary_stats.items():
        print(f"  {key}: {value}")

## 4. Scrape Property Listings

Now we'll scrape property listings from Zonaprop. We'll use a search for 2-bedroom apartments in Palermo.

In [None]:
# Define search URL - 2 bedroom apartments in Palermo, $50k-$130k USD
search_url = "https://www.zonaprop.com.ar/inmuebles-venta-palermo-2-dormitorios-50000-130000-dolar.html"

print(f"Scraping properties from: {search_url}")
print("Note: This may take a few minutes...")

try:
    properties = analyzer.scrape_zonaprop(search_url)
    
    print(f"✓ Properties scraped: {len(properties):,} listings")
    
    if len(properties) > 0:
        print("\nProperty Data Overview:")
        print(f"  Columns: {len(properties.columns)}")
        print(f"  Price range: ${properties['price_usd'].min():,.0f} - ${properties['price_usd'].max():,.0f}")
        print(f"  Surface range: {properties['surface_m2'].min():.0f} - {properties['surface_m2'].max():.0f} m²")
        
        # Show sample properties
        print("\nSample Properties:")
        display(properties[[
            'title', 'price_usd', 'surface_m2', 'rooms', 'bathrooms', 'views_per_day'
        ]].head())
    else:
        print("⚠️ No properties found - this may be due to anti-bot protection")
        
except ZonapropAntiBotError as e:
    print(f"❌ Anti-bot protection detected: {e}")
    print("\nRecommended solutions:")
    print("1. Save search results manually as HTML file")
    print("2. Use: analyzer.scrape_zonaprop(url, html_path='saved_file.html')")
    print("3. Try again later when protection may be relaxed")
    
    # Create sample data for demonstration
    print("\nCreating sample data for demonstration...")
    properties = pd.DataFrame({
        'id': ['prop_1', 'prop_2', 'prop_3'],
        'title': ['2 ambientes en Palermo', 'Depto 2 amb con balcón', 'Palermo Hollywood 2 amb'],
        'price_usd': [95000, 110000, 85000],
        'surface_m2': [45, 52, 40],
        'rooms': [2, 2, 2],
        'bathrooms': [1, 1, 1],
        'latitude': [-34.5875, -34.5901, -34.5823],
        'longitude': [-58.4050, -58.4123, -58.4089],
        'views_per_day': [66, 45, 78],
        'address': ['Av. Santa Fe 3500', 'Thames 1200', 'Av. Córdoba 5800']
    })
    print(f"✓ Sample data created: {len(properties)} properties")
    
except ScrapingError as e:
    print(f"❌ Scraping failed: {e}")
    properties = pd.DataFrame()  # Empty dataframe

### Analyze Property Data

In [None]:
if not properties.empty:
    # Calculate price per m²
    properties['price_per_m2'] = properties['price_usd'] / properties['surface_m2']
    
    # Visualize property data
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Price distribution
    axes[0, 0].hist(properties['price_usd'], bins=20, alpha=0.7, color='lightcoral')
    axes[0, 0].set_title('Property Price Distribution')
    axes[0, 0].set_xlabel('Price (USD)')
    axes[0, 0].set_ylabel('Frequency')
    
    # Price per m² distribution
    axes[0, 1].hist(properties['price_per_m2'], bins=20, alpha=0.7, color='lightgreen')
    axes[0, 1].set_title('Price per m² Distribution')
    axes[0, 1].set_xlabel('Price per m² (USD)')
    axes[0, 1].set_ylabel('Frequency')
    
    # Surface area vs price
    axes[1, 0].scatter(properties['surface_m2'], properties['price_usd'], alpha=0.6, color='purple')
    axes[1, 0].set_title('Surface Area vs Price')
    axes[1, 0].set_xlabel('Surface Area (m²)')
    axes[1, 0].set_ylabel('Price (USD)')
    
    # Views per day distribution
    if 'views_per_day' in properties.columns:
        axes[1, 1].hist(properties['views_per_day'], bins=20, alpha=0.7, color='orange')
        axes[1, 1].set_title('Market Interest (Views per Day)')
        axes[1, 1].set_xlabel('Views per Day')
        axes[1, 1].set_ylabel('Frequency')
    
    plt.tight_layout()
    plt.show()
    
    # Property summary statistics
    print("\nProperty Summary Statistics:")
    summary_stats = {
        'Total Properties': len(properties),
        'Average Price': f"${properties['price_usd'].mean():,.0f}",
        'Median Price': f"${properties['price_usd'].median():,.0f}",
        'Average Surface': f"{properties['surface_m2'].mean():.0f} m²",
        'Average Price/m²': f"${properties['price_per_m2'].mean():,.0f}",
        'High Interest (>60 views/day)': len(properties[properties['views_per_day'] > 60]) if 'views_per_day' in properties.columns else 'N/A'
    }
    
    for key, value in summary_stats.items():
        print(f"  {key}: {value}")

## 5. Enrich Properties with Airbnb Data

Now we'll match properties with nearby Airbnb listings to estimate rental potential.

In [None]:
if not properties.empty and airbnb_data is not None:
    print("Enriching properties with Airbnb data...")
    print("This process matches properties with nearby Airbnb listings...")
    
    try:
        enriched_properties = analyzer.enrich_with_airbnb(properties)
        
        # Calculate match statistics
        matched_count = len(enriched_properties[enriched_properties['match_status'] == 'matched'])
        match_rate = matched_count / len(enriched_properties) * 100
        
        print(f"✓ Properties enriched: {len(enriched_properties)} total")
        print(f"  Matched with Airbnb: {matched_count} ({match_rate:.1f}%)")
        
        # Show enriched data sample
        if matched_count > 0:
            print("\nSample Enriched Properties:")
            enriched_sample = enriched_properties[enriched_properties['match_status'] == 'matched'][[
                'title', 'price_usd', 'airbnb_avg_price_entire_home', 
                'airbnb_occupancy_probability', 'airbnb_avg_review_score'
            ]].head()
            display(enriched_sample)
            
            # Calculate potential rental yields
            enriched_properties['estimated_monthly_income'] = enriched_properties['airbnb_avg_price_entire_home'] * 20  # Assume 20 nights/month
            enriched_properties['estimated_annual_yield'] = (
                (enriched_properties['estimated_monthly_income'] * 12) / enriched_properties['price_usd'] * 100
            )
            
            print("\nRental Yield Analysis:")
            matched_props = enriched_properties[enriched_properties['match_status'] == 'matched']
            if len(matched_props) > 0:
                print(f"  Average estimated yield: {matched_props['estimated_annual_yield'].mean():.1f}%")
                print(f"  Best yield: {matched_props['estimated_annual_yield'].max():.1f}%")
                print(f"  Properties with >8% yield: {len(matched_props[matched_props['estimated_annual_yield'] > 8])}")
        
    except MatchingError as e:
        print(f"❌ Enrichment failed: {e}")
        enriched_properties = properties
        
elif properties.empty:
    print("⚠️ No properties to enrich")
    enriched_properties = properties
    
elif airbnb_data is None:
    print("⚠️ No Airbnb data available for enrichment")
    enriched_properties = properties
else:
    enriched_properties = properties

### Visualize Enrichment Results

In [None]:
if not enriched_properties.empty and 'airbnb_avg_price_entire_home' in enriched_properties.columns:
    # Filter matched properties for visualization
    matched_props = enriched_properties[enriched_properties['match_status'] == 'matched'].copy()
    
    if len(matched_props) > 0:
        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        
        # Property price vs Airbnb price
        axes[0, 0].scatter(matched_props['price_usd'], matched_props['airbnb_avg_price_entire_home'], alpha=0.6)
        axes[0, 0].set_title('Property Price vs Airbnb Nightly Rate')
        axes[0, 0].set_xlabel('Property Price (USD)')
        axes[0, 0].set_ylabel('Airbnb Rate (USD/night)')
        
        # Estimated annual yield distribution
        if 'estimated_annual_yield' in matched_props.columns:
            axes[0, 1].hist(matched_props['estimated_annual_yield'], bins=15, alpha=0.7, color='gold')
            axes[0, 1].set_title('Estimated Annual Yield Distribution')
            axes[0, 1].set_xlabel('Annual Yield (%)')
            axes[0, 1].set_ylabel('Frequency')
            axes[0, 1].axvline(x=8, color='red', linestyle='--', label='8% Target')
            axes[0, 1].legend()
        
        # Occupancy probability distribution
        occupancy_counts = matched_props['airbnb_occupancy_probability'].value_counts()
        axes[1, 0].pie(occupancy_counts.values, labels=occupancy_counts.index, autopct='%1.1f%%')
        axes[1, 0].set_title('Occupancy Probability Distribution')
        
        # Review scores vs rental rates
        axes[1, 1].scatter(matched_props['airbnb_avg_review_score'], matched_props['airbnb_avg_price_entire_home'], alpha=0.6, color='teal')
        axes[1, 1].set_title('Review Score vs Rental Rate')
        axes[1, 1].set_xlabel('Average Review Score')
        axes[1, 1].set_ylabel('Airbnb Rate (USD/night)')
        
        plt.tight_layout()
        plt.show()
        
        # Top investment opportunities
        if 'estimated_annual_yield' in matched_props.columns:
            top_yields = matched_props.nlargest(5, 'estimated_annual_yield')[[
                'title', 'price_usd', 'airbnb_avg_price_entire_home', 'estimated_annual_yield'
            ]]
            
            print("\nTop 5 Investment Opportunities by Yield:")
            display(top_yields)

## 6. Generate AI Investment Summaries

Now we'll use AWS Bedrock to generate intelligent investment summaries for the most promising properties.

In [None]:
if not enriched_properties.empty:
    # Select top properties for AI analysis (limit to 3 for demo)
    if 'views_per_day' in enriched_properties.columns:
        top_properties = enriched_properties.nlargest(3, 'views_per_day')
    else:
        top_properties = enriched_properties.head(3)
    
    print(f"Generating AI summaries for top {len(top_properties)} properties...")
    print("This may take a minute or two...")
    
    try:
        summaries = analyzer.generate_summaries(top_properties)
        
        print(f"✓ AI summaries generated: {len(summaries)} summaries")
        
        # Display summaries
        for i, summary in enumerate(summaries, 1):
            property_data = top_properties[top_properties['id'] == summary['property_id']].iloc[0]
            
            print(f"\n{'='*60}")
            print(f"PROPERTY {i}: {property_data['title'][:50]}...")
            print(f"{'='*60}")
            print(f"Price: ${property_data['price_usd']:,.0f}")
            print(f"Surface: {property_data['surface_m2']:.0f} m²")
            if 'airbnb_avg_price_entire_home' in property_data:
                print(f"Airbnb Rate: ${property_data['airbnb_avg_price_entire_home']:.0f}/night")
            print(f"Confidence: {summary['confidence']:.2f}")
            print(f"\nAI ANALYSIS:")
            print(summary['summary'])
            
        # Summary statistics
        avg_confidence = sum(s['confidence'] for s in summaries) / len(summaries)
        print(f"\n\nSummary Statistics:")
        print(f"  Average confidence: {avg_confidence:.2f}")
        print(f"  High confidence summaries (>0.8): {len([s for s in summaries if s['confidence'] > 0.8])}")
        
    except AIServiceConfigurationError as e:
        print(f"❌ AI analysis failed: {e}")
        print("\nCommon solutions:")
        print("1. Check AWS credentials: aws sts get-caller-identity")
        print("2. Verify Bedrock model access in AWS Console")
        print("3. Check AWS region configuration")
        summaries = []
        
else:
    print("⚠️ No properties available for AI analysis")
    summaries = []

## 7. Export Results

Finally, we'll export our analysis results in multiple formats for further use.

In [None]:
if not enriched_properties.empty:
    print("Exporting analysis results...")
    
    try:
        # Export as CSV
        csv_path = analyzer.export(enriched_properties, format="csv", path="tutorial_results.csv")
        print(f"✓ CSV export: {csv_path}")
        
        # Export as JSON
        json_path = analyzer.export(enriched_properties, format="json", path="tutorial_results.json")
        print(f"✓ JSON export: {json_path}")
        
        # Show export summary
        print(f"\nExport Summary:")
        print(f"  Properties exported: {len(enriched_properties)}")
        print(f"  Columns included: {len(enriched_properties.columns)}")
        
        # Show file sizes
        csv_size = Path(csv_path).stat().st_size / 1024
        json_size = Path(json_path).stat().st_size / 1024
        print(f"  CSV file size: {csv_size:.1f} KB")
        print(f"  JSON file size: {json_size:.1f} KB")
        
    except ExportFormatError as e:
        print(f"❌ Export failed: {e}")
        
        # Fallback to in-memory export
        print("Falling back to in-memory export...")
        df_result = analyzer.export(enriched_properties, format="dataframe")
        print(f"✓ In-memory DataFrame: {len(df_result)} rows, {len(df_result.columns)} columns")
        
else:
    print("⚠️ No data to export")

## 8. Analysis Summary and Next Steps

In [None]:
# Get operation statistics
stats = analyzer.get_operation_stats()

print("\n" + "="*60)
print("RENTA TUTORIAL COMPLETED SUCCESSFULLY!")
print("="*60)

print(f"\nOperation Summary:")
print(f"  Airbnb downloads: {stats['downloads']}")
print(f"  Property scrapes: {stats['scrapes']}")
print(f"  Enrichment operations: {stats['enrichments']}")
print(f"  AI analyses: {stats['ai_analyses']}")
print(f"  Export operations: {stats['exports']}")

if not enriched_properties.empty:
    print(f"\nData Summary:")
    print(f"  Properties analyzed: {len(enriched_properties)}")
    
    if 'match_status' in enriched_properties.columns:
        matched_count = len(enriched_properties[enriched_properties['match_status'] == 'matched'])
        print(f"  Properties with Airbnb matches: {matched_count}")
    
    if summaries:
        print(f"  AI summaries generated: {len(summaries)}")
        avg_confidence = sum(s['confidence'] for s in summaries) / len(summaries)
        print(f"  Average AI confidence: {avg_confidence:.2f}")

print(f"\nNext Steps:")
print(f"  1. Explore the exported CSV/JSON files")
print(f"  2. Try different search URLs and neighborhoods")
print(f"  3. Customize configuration for your specific needs")
print(f"  4. Implement batch processing for multiple searches")
print(f"  5. Create custom matching strategies or export formats")

print(f"\nResources:")
print(f"  📚 Documentation: https://renta.readthedocs.io")
print(f"  💻 GitHub: https://github.com/renta-dev/renta")
print(f"  📝 Examples: Check the examples/ directory")
print(f"  🐛 Issues: https://github.com/renta-dev/renta/issues")

print("\n" + "="*60)

## Cleanup

In [None]:
# Clean up resources
if 'analyzer' in locals():
    analyzer.close()
    print("✓ RENTA resources cleaned up")

print("\nTutorial completed! Thank you for trying RENTA.")