In [1]:
!pip install geopandas shapely openpyxl folium




In [2]:
# Import all required libraries
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from shapely import wkb
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import folium
from folium import plugins
from google.colab import files
import warnings
warnings.filterwarnings('ignore')

print("✓ All libraries imported successfully!")

✓ All libraries imported successfully!


In [3]:
# Upload the shapefile components
print("Please upload ALL 6 shapefile files for statistical areas:")
print("- statistical_areas_2022.shp")
print("- statistical_areas_2022.shx")
print("- statistical_areas_2022.dbf")
print("- statistical_areas_2022.prj")
print("- statistical_areas_2022.cpg")
print("- statistical_areas_2022.xml")
print()

uploaded = files.upload()

Please upload ALL 6 shapefile files for statistical areas:
- statistical_areas_2022.shp
- statistical_areas_2022.shx
- statistical_areas_2022.dbf
- statistical_areas_2022.prj
- statistical_areas_2022.cpg
- statistical_areas_2022.xml



Saving statistical_areas_2022.cpg to statistical_areas_2022.cpg
Saving statistical_areas_2022.dbf to statistical_areas_2022.dbf
Saving statistical_areas_2022.prj to statistical_areas_2022.prj
Saving statistical_areas_2022.shp to statistical_areas_2022.shp
Saving statistical_areas_2022.shx to statistical_areas_2022.shx
Saving statistical_areas_2022.xml to statistical_areas_2022.xml


In [4]:
# Load statistical areas shapefile
gdf_areas = gpd.read_file('statistical_areas_2022.shp')

print(f"✓ Loaded {len(gdf_areas)} statistical areas")
print(f"✓ CRS: {gdf_areas.crs}")
print(f"✓ Statistical area codes: {sorted(gdf_areas['STAT_2022'].unique())}")

✓ Loaded 25 statistical areas
✓ CRS: EPSG:2039
✓ Statistical area codes: [np.int32(11), np.int32(12), np.int32(13), np.int32(14), np.int32(15), np.int32(16), np.int32(17), np.int32(18), np.int32(19), np.int32(21), np.int32(22), np.int32(23), np.int32(24), np.int32(25), np.int32(26), np.int32(31), np.int32(32), np.int32(33), np.int32(41), np.int32(42), np.int32(43), np.int32(44), np.int32(45), np.int32(46), np.int32(47)]


In [5]:
# Upload Airbnb CSV file
print("Please upload Eilat_airbnb_data_2025.csv")
uploaded = files.upload()

# Load Airbnb data
airbnb_data = pd.read_csv('Eilat_airbnb_data_2025.csv')

print(f"\n✓ Loaded {len(airbnb_data)} Airbnb records")
print(f"\nColumns: {airbnb_data.columns.tolist()}")
print("\nFirst few rows:")
print(airbnb_data.head())

Please upload Eilat_airbnb_data_2025.csv


Saving Eilat_airbnb_data_2025.csv to Eilat_airbnb_data_2025.csv

✓ Loaded 833 Airbnb records

Columns: ['id', 'url', 'title', 'description', 'price/price', 'price/qualifier', 'rating/value', 'personCapacity', 'locationSubtitle', 'coordinates/latitude', 'coordinates/longitude', 'num_nights', 'price_numeric', 'price_per_night']

First few rows:
             id                                                url  \
0  1.499420e+18  https://www.airbnb.com/rooms/14994217859877177...   
1  1.448320e+18  https://www.airbnb.com/rooms/14483227389476541...   
2  1.296030e+18  https://www.airbnb.com/rooms/12960264087150125...   
3  9.798270e+17  https://www.airbnb.com/rooms/97982658737104870...   
4  1.459360e+18  https://www.airbnb.com/rooms/14593565068694520...   

                                               title  \
0  Sapphire luxury vacation, private pool heated ...   
1                                   The Desert House   
2                                 Red Sea Vacation 1   
3  Family 

In [6]:
print("Processing Airbnb data for spatial matching...\n")
print("="*80)

# Check for missing coordinates
missing_coords = airbnb_data[
    airbnb_data['coordinates/latitude'].isna() |
    airbnb_data['coordinates/longitude'].isna()
]
print(f"Records with missing coordinates: {len(missing_coords)}")
print(f"Records with valid coordinates: {len(airbnb_data) - len(missing_coords)}")

# Create point geometries from lat/lon
airbnb_data['geometry'] = airbnb_data.apply(
    lambda row: Point(row['coordinates/longitude'], row['coordinates/latitude'])
    if pd.notna(row['coordinates/latitude']) and pd.notna(row['coordinates/longitude'])
    else None,
    axis=1
)

# Create GeoDataFrame with WGS84 (lat/lon coordinate system)
gdf_airbnb = gpd.GeoDataFrame(
    airbnb_data,
    geometry='geometry',
    crs='EPSG:4326'  # lat/lon coordinate system
)

# Remove records without coordinates
gdf_airbnb = gdf_airbnb[gdf_airbnb['geometry'].notna()]

print(f"\n✓ Created {len(gdf_airbnb)} point geometries")

# Convert to the same CRS as statistical areas (EPSG:2039)
print(f"\nConverting coordinate systems...")
print(f"  Airbnb CRS: {gdf_airbnb.crs} (WGS84 lat/lon)")
print(f"  Statistical areas CRS: {gdf_areas.crs} (Israeli TM Grid)")

gdf_airbnb_transformed = gdf_airbnb.to_crs(gdf_areas.crs)
print(f"✓ Airbnb data converted to {gdf_airbnb_transformed.crs}")

print("\n" + "="*80)
print("Performing spatial join (matching Airbnb listings to statistical areas)...")

# Spatial join
airbnb_result = gpd.sjoin(
    gdf_airbnb_transformed,
    gdf_areas[['STAT_2022', 'geometry']],
    how='left',
    predicate='within'
)

# Count results
matched = airbnb_result['STAT_2022'].notna().sum()
unmatched = airbnb_result['STAT_2022'].isna().sum()

print(f"\n✓✓✓ Spatial join complete!")
print(f"\n  ✓ Matched: {matched} Airbnb listings")
print(f"  ✗ Unmatched: {unmatched} Airbnb listings")

if unmatched > 0:
    print(f"\n⚠ Warning: {unmatched} listings could not be matched")
    print("   (These might be outside Eilat's statistical area boundaries)")

print("\n" + "="*80)
print("Distribution of Airbnb listings by statistical area:")
distribution = airbnb_result['STAT_2022'].value_counts().sort_index()
print(distribution)

print("\n" + "="*80)
print("Summary statistics:")
print(f"  Total Airbnb listings: {len(airbnb_result)}")
print(f"  Statistical areas with Airbnb: {airbnb_result['STAT_2022'].nunique()}")
print(f"  Average listings per area: {matched / airbnb_result['STAT_2022'].nunique():.1f}")
print(f"  Max listings in one area: {distribution.max()}")
print(f"  Min listings in one area: {distribution.min()}")

# Check which statistical areas have NO Airbnb listings
all_stat_areas = set(gdf_areas['STAT_2022'].values)
areas_with_airbnb = set(airbnb_result['STAT_2022'].dropna().values)
empty_areas = sorted(all_stat_areas - areas_with_airbnb)

if empty_areas:
    print(f"\n⚠ Statistical areas with NO Airbnb listings: {empty_areas}")
else:
    print(f"\n✓ All statistical areas have at least one Airbnb listing")

Processing Airbnb data for spatial matching...

Records with missing coordinates: 0
Records with valid coordinates: 833

✓ Created 833 point geometries

Converting coordinate systems...
  Airbnb CRS: EPSG:4326 (WGS84 lat/lon)
  Statistical areas CRS: EPSG:2039 (Israeli TM Grid)
✓ Airbnb data converted to EPSG:2039

Performing spatial join (matching Airbnb listings to statistical areas)...

✓✓✓ Spatial join complete!

  ✓ Matched: 696 Airbnb listings
  ✗ Unmatched: 137 Airbnb listings

   (These might be outside Eilat's statistical area boundaries)

Distribution of Airbnb listings by statistical area:
STAT_2022
11.0     93
12.0     20
13.0     24
14.0     32
15.0     13
16.0     62
17.0     10
18.0      3
21.0     17
22.0      3
23.0      4
24.0     16
25.0      9
26.0      7
31.0    114
32.0     77
33.0     10
41.0     15
42.0     50
43.0      6
44.0      3
45.0      9
46.0     17
47.0     82
Name: count, dtype: int64

Summary statistics:
  Total Airbnb listings: 833
  Statistical area

In [7]:
print("Creating interactive map with Airbnb listings...\n")

# Convert back to WGS84 for Folium
gdf_areas_wgs84 = gdf_areas.to_crs('EPSG:4326')

# Calculate center
center_lat = airbnb_data['coordinates/latitude'].mean()
center_lon = airbnb_data['coordinates/longitude'].mean()

print(f"Map center: {center_lat:.6f}, {center_lon:.6f}")

# Create base map
m = folium.Map(
    location=[center_lat, center_lon],
    zoom_start=13,
    tiles='OpenStreetMap'
)

# Color scheme
colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred', 'lightred',
          'beige', 'darkblue', 'darkgreen', 'cadetblue', 'darkpurple', 'white',
          'pink', 'lightblue', 'lightgreen', 'gray', 'black', 'lightgray',
          'yellow', 'lightpink', 'olive', 'teal', 'navy', 'brown']

print("Adding statistical areas...")

# Add statistical areas
for idx, row in gdf_areas_wgs84.iterrows():
    stat_code = int(row['STAT_2022'])
    color = colors[idx % len(colors)]

    # Count Airbnb in this area
    airbnb_count = len(airbnb_result[airbnb_result['STAT_2022'] == stat_code])

    popup_text = f"""
    <b>Statistical Area: {stat_code}</b><br>
    Airbnb Listings: {airbnb_count}<br>
    Area: {row['Shape__Are']:.0f} m²
    """

    folium.GeoJson(
        row.geometry,
        style_function=lambda x, color=color: {
            'fillColor': color,
            'color': 'black',
            'weight': 2,
            'fillOpacity': 0.3
        },
        popup=folium.Popup(popup_text, max_width=300)
    ).add_to(m)

    # Add label
    centroid = row.geometry.centroid
    folium.Marker(
        location=[centroid.y, centroid.x],
        icon=folium.DivIcon(html=f"""
            <div style="
                font-size: 12px;
                font-weight: bold;
                color: black;
                background-color: white;
                border: 2px solid black;
                border-radius: 5px;
                padding: 2px 5px;
            ">{stat_code}</div>
        """)
    ).add_to(m)

print("Adding Airbnb listings...")

# Add matched Airbnb listings (blue markers)
matched_airbnb = airbnb_result[airbnb_result['STAT_2022'].notna()]
for idx, row in matched_airbnb.iterrows():
    popup_text = f"""
    <b>{row['title'][:50]}...</b><br>
    Price/night: ₪{row['price_per_night']:.0f}<br>
    Rating: {row['rating/value']}<br>
    Capacity: {row['personCapacity']} people<br>
    <b>Statistical Area: {int(row['STAT_2022'])}</b><br>
    <a href="{row['url']}" target="_blank">View Listing</a>
    """

    folium.CircleMarker(
        location=[row['coordinates/latitude'], row['coordinates/longitude']],
        radius=4,
        popup=folium.Popup(popup_text, max_width=300),
        color='blue',
        fill=True,
        fillColor='blue',
        fillOpacity=0.6,
        weight=1
    ).add_to(m)

# Add unmatched Airbnb listings (red markers)
unmatched_airbnb = airbnb_result[airbnb_result['STAT_2022'].isna()]
for idx, row in unmatched_airbnb.iterrows():
    popup_text = f"""
    <b>{row['title'][:50]}...</b><br>
    Price/night: ₪{row['price_per_night']:.0f}<br>
    Rating: {row['rating/value']}<br>
    <b>⚠ Outside statistical areas</b><br>
    <a href="{row['url']}" target="_blank">View Listing</a>
    """

    folium.CircleMarker(
        location=[row['coordinates/latitude'], row['coordinates/longitude']],
        radius=4,
        popup=folium.Popup(popup_text, max_width=300),
        color='red',
        fill=True,
        fillColor='red',
        fillOpacity=0.8,
        weight=1
    ).add_to(m)

# Save map
print("\nSaving map...")
m.save('eilat_airbnb_map.html')
print("✓ Map saved as: eilat_airbnb_map.html")

files.download('eilat_airbnb_map.html')

print("\n✓✓✓ Interactive map created!")
print("\nMap legend:")
print("  - Blue dots = Matched Airbnb listings")
print("  - Red dots = Unmatched Airbnb listings (outside boundaries)")
print("  - Click markers for details")

# Display in notebook
m


Creating interactive map with Airbnb listings...

Map center: 29.549018, 34.954835
Adding statistical areas...
Adding Airbnb listings...

Saving map...
✓ Map saved as: eilat_airbnb_map.html


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


✓✓✓ Interactive map created!

Map legend:
  - Blue dots = Matched Airbnb listings
  - Red dots = Unmatched Airbnb listings (outside boundaries)
  - Click markers for details


In [8]:
print("Filtering Airbnb data to keep only matched listings...\n")
print("="*80)

# Keep only listings that were matched to a statistical area
airbnb_matched_only = airbnb_result[airbnb_result['STAT_2022'].notna()].copy()

print(f"Original Airbnb listings: {len(airbnb_result)}")
print(f"Matched listings (kept): {len(airbnb_matched_only)}")
print(f"Unmatched listings (removed): {len(airbnb_result) - len(airbnb_matched_only)}")

print("\n" + "="*80)
print("Distribution after filtering:")
distribution = airbnb_matched_only['STAT_2022'].value_counts().sort_index()
print(distribution)

print("\n" + "="*80)
print("Updated statistics:")
print(f"  Total Airbnb listings: {len(airbnb_matched_only)}")
print(f"  Statistical areas with Airbnb: {airbnb_matched_only['STAT_2022'].nunique()}")
print(f"  Average listings per area: {len(airbnb_matched_only) / airbnb_matched_only['STAT_2022'].nunique():.1f}")

# Check which statistical area has NO Airbnb
all_stat_areas = set(gdf_areas['STAT_2022'].values)
areas_with_airbnb = set(airbnb_matched_only['STAT_2022'].values)
empty_areas = sorted(all_stat_areas - areas_with_airbnb)

if empty_areas:
    print(f"\n⚠ Statistical area with NO Airbnb listings: {empty_areas}")

print("\n" + "="*80)
print("Sample of filtered data:")
print(airbnb_matched_only[['id', 'title', 'price_per_night', 'rating/value',
                           'coordinates/latitude', 'coordinates/longitude', 'STAT_2022']].head(10))

Filtering Airbnb data to keep only matched listings...

Original Airbnb listings: 833
Matched listings (kept): 696
Unmatched listings (removed): 137

Distribution after filtering:
STAT_2022
11.0     93
12.0     20
13.0     24
14.0     32
15.0     13
16.0     62
17.0     10
18.0      3
21.0     17
22.0      3
23.0      4
24.0     16
25.0      9
26.0      7
31.0    114
32.0     77
33.0     10
41.0     15
42.0     50
43.0      6
44.0      3
45.0      9
46.0     17
47.0     82
Name: count, dtype: int64

Updated statistics:
  Total Airbnb listings: 696
  Statistical areas with Airbnb: 24
  Average listings per area: 29.0

⚠ Statistical area with NO Airbnb listings: [np.int32(19)]

Sample of filtered data:
             id                                              title  \
0  1.499420e+18  Sapphire luxury vacation, private pool heated ...   
1  1.448320e+18                                   The Desert House   
2  1.296030e+18                                 Red Sea Vacation 1   
3  9.79827

In [9]:
print("Creating interactive map with matched Airbnb listings only...\n")

# Convert back to WGS84 for Folium
gdf_areas_wgs84 = gdf_areas.to_crs('EPSG:4326')

# Calculate center from matched listings only
center_lat = airbnb_matched_only['coordinates/latitude'].mean()
center_lon = airbnb_matched_only['coordinates/longitude'].mean()

print(f"Map center: {center_lat:.6f}, {center_lon:.6f}")

# Create base map
m = folium.Map(
    location=[center_lat, center_lon],
    zoom_start=13,
    tiles='OpenStreetMap'
)

# Color scheme
colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred', 'lightred',
          'beige', 'darkblue', 'darkgreen', 'cadetblue', 'darkpurple', 'white',
          'pink', 'lightblue', 'lightgreen', 'gray', 'black', 'lightgray',
          'yellow', 'lightpink', 'olive', 'teal', 'navy', 'brown']

print("Adding statistical areas...")

# Add statistical areas
for idx, row in gdf_areas_wgs84.iterrows():
    stat_code = int(row['STAT_2022'])
    color = colors[idx % len(colors)]

    # Count Airbnb in this area (from filtered data)
    airbnb_count = len(airbnb_matched_only[airbnb_matched_only['STAT_2022'] == stat_code])

    popup_text = f"""
    <b>Statistical Area: {stat_code}</b><br>
    Airbnb Listings: {airbnb_count}<br>
    Area: {row['Shape__Are']:.0f} m²
    """

    folium.GeoJson(
        row.geometry,
        style_function=lambda x, color=color: {
            'fillColor': color,
            'color': 'black',
            'weight': 2,
            'fillOpacity': 0.4
        },
        popup=folium.Popup(popup_text, max_width=300)
    ).add_to(m)

    # Add label
    centroid = row.geometry.centroid
    folium.Marker(
        location=[centroid.y, centroid.x],
        icon=folium.DivIcon(html=f"""
            <div style="
                font-size: 12px;
                font-weight: bold;
                color: black;
                background-color: white;
                border: 2px solid black;
                border-radius: 5px;
                padding: 2px 5px;
            ">{stat_code}</div>
        """)
    ).add_to(m)

print(f"Adding {len(airbnb_matched_only)} matched Airbnb listings...")

# Add ONLY matched Airbnb listings (blue markers)
for idx, row in airbnb_matched_only.iterrows():
    popup_text = f"""
    <b>{row['title'][:60]}...</b><br>
    Price/night: ₪{row['price_per_night']:.0f}<br>
    Rating: {row['rating/value']}<br>
    Capacity: {row['personCapacity']} people<br>
    <b>Statistical Area: {int(row['STAT_2022'])}</b><br>
    <a href="{row['url']}" target="_blank">View Listing</a>
    """

    folium.CircleMarker(
        location=[row['coordinates/latitude'], row['coordinates/longitude']],
        radius=5,
        popup=folium.Popup(popup_text, max_width=300),
        color='blue',
        fill=True,
        fillColor='blue',
        fillOpacity=0.7,
        weight=2
    ).add_to(m)

# Add layer control
folium.LayerControl().add_to(m)

# Save map
print("\nSaving map...")
m.save('eilat_airbnb_matched_only_map.html')
print("✓ Map saved as: eilat_airbnb_matched_only_map.html")

files.download('eilat_airbnb_matched_only_map.html')

print("\n✓✓✓ Interactive map created!")
print("\nMap features:")
print(f"  - {len(airbnb_matched_only)} Airbnb listings (blue dots)")
print("  - All listings have valid statistical areas")
print("  - Click on dots for listing details")
print("  - Click on areas for statistics")

# Display in notebook
m

Creating interactive map with matched Airbnb listings only...

Map center: 29.551113, 34.945299
Adding statistical areas...
Adding 696 matched Airbnb listings...

Saving map...
✓ Map saved as: eilat_airbnb_matched_only_map.html


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


✓✓✓ Interactive map created!

Map features:
  - 696 Airbnb listings (blue dots)
  - All listings have valid statistical areas
  - Click on dots for listing details
  - Click on areas for statistics


In [10]:
print("Preparing final Airbnb dataset for export...\n")
print("="*80)

# Create final dataframe from matched data only
final_airbnb = airbnb_data.copy()

# Add stat_2022 column - set to NaN for all first
final_airbnb['stat_2022'] = np.nan

# Fill in stat_2022 for matched listings
# Match by id to ensure correct alignment
for idx, row in airbnb_matched_only.iterrows():
    final_airbnb.loc[final_airbnb['id'] == row['id'], 'stat_2022'] = row['STAT_2022']

# Keep only rows that have stat_2022 (remove unmatched)
final_airbnb = final_airbnb[final_airbnb['stat_2022'].notna()].copy()

# Convert stat_2022 to integer
final_airbnb['stat_2022'] = final_airbnb['stat_2022'].astype(int)

# Reorder columns - put stat_2022 after coordinates
columns_order = [
    'id',
    'url',
    'title',
    'description',
    'price/price',
    'price/qualifier',
    'rating/value',
    'personCapacity',
    'locationSubtitle',
    'coordinates/latitude',
    'coordinates/longitude',
    'stat_2022',  # New foreign key column
    'num_nights',
    'price_numeric',
    'price_per_night'
]

final_airbnb = final_airbnb[columns_order]

print("Final dataset preview:")
print(final_airbnb.head(10))

print("\n" + "="*80)
print("Summary:")
print(f"✓ Total Airbnb listings: {len(final_airbnb)}")
print(f"✓ Columns: {len(final_airbnb.columns)}")
print(f"✓ New 'stat_2022' column added (foreign key to statistical areas)")
print(f"✓ Unmatched listings removed: {len(airbnb_data) - len(final_airbnb)}")

# Show distribution
print("\n" + "="*80)
print("Distribution of Airbnb listings by statistical area:")
print(final_airbnb['stat_2022'].value_counts().sort_index())

# Export to Excel
print("\n" + "="*80)
print("Exporting files...")

excel_filename = 'Eilat_airbnb_with_stat_2022.xlsx'
final_airbnb.to_excel(excel_filename, index=False, engine='openpyxl')
print(f"✓ Excel file created: {excel_filename}")

# Export to CSV (with UTF-8 encoding)
csv_filename = 'Eilat_airbnb_with_stat_2022.csv'
final_airbnb.to_csv(csv_filename, index=False, encoding='utf-8-sig')
print(f"✓ CSV file created: {csv_filename}")

print("\n" + "="*80)
print("Downloading files...")

# Download both files
files.download(excel_filename)

Preparing final Airbnb dataset for export...

Final dataset preview:
             id                                                url  \
0  1.499420e+18  https://www.airbnb.com/rooms/14994217859877177...   
1  1.448320e+18  https://www.airbnb.com/rooms/14483227389476541...   
2  1.296030e+18  https://www.airbnb.com/rooms/12960264087150125...   
3  9.798270e+17  https://www.airbnb.com/rooms/97982658737104870...   
4  1.459360e+18  https://www.airbnb.com/rooms/14593565068694520...   
5  1.430350e+18  https://www.airbnb.com/rooms/14303549037574124...   
6  1.398920e+18  https://www.airbnb.com/rooms/13989204638669426...   
7  9.486550e+17  https://www.airbnb.com/rooms/94865505819569775...   
8  1.496320e+18  https://www.airbnb.com/rooms/14963175024986920...   
9  1.476580e+18  https://www.airbnb.com/rooms/14765758917888972...   

                                               title  \
0  Sapphire luxury vacation, private pool heated ...   
1                                   The Desert H

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

NameError: name 'f' is not defined