# Libraries

In [9]:
import os
import math
import numpy as np
import concurrent.futures
import tensorflow as tf
from PIL import Image
from io import BytesIO
import requests
import geopandas as gpd
from tensorflow.keras import layers, models, optimizers, callbacks
from tensorflow.keras.applications import EfficientNetB0
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import shutil

# Code

In [None]:
# API and processing parameters
API_KEY = "API_KEY"
ZOOM_LEVEL = 18
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 128 if len(tf.config.list_physical_devices('GPU')) > 0 else 64 # Adjust based on GPU availability
EPOCHS = 20
PATIENCE = 5
CLASS_NAMES = ['class0', 'class1', 'class2', 'class3']
NUM_CLASSES = len(CLASS_NAMES)

output_dir = "GuadalaHacks 2025"

# Loading pre-adjusted POI data and gdf with POI points and labels
def load_poi_data(geojson_path):
    gdf = gpd.read_file(geojson_path)
    return gdf[gdf.geometry.type == 'Point']

# Convert geographic coordinates to tile indices
def latlon_to_tile(lat, lon, zoom):
    lat_rad = math.radians(lat)
    n = 2 ** zoom
    x = int((lon + 180.0) / 360.0 * n)
    y = int((1.0 - math.asinh(math.tan(lat_rad)) / (2 * math.pi)) * n)
    return x, y

# Download satellite tile for given coordinates
def download_tile_image(lon, lat):
    try:
        x, y = latlon_to_tile(lat, lon, ZOOM_LEVEL)
        url = f"https://maps.hereapi.com/v3/base/mc/{ZOOM_LEVEL}/{x}/{y}/png?apiKey={API_KEY}&style=satellite.day&size=512"
        response = requests.get(url, timeout=15)
        response.raise_for_status()
        return Image.open(BytesIO(response.content)).convert('RGB')
    except Exception as e:
        print(f"Failed to download image for ({lon:.4f}, {lat:.4f}): {str(e)}")
        return None

### Dataset generation ###
# Process and save POI data
def process_poi(poi, output_dir, idx):
    try:
        # Extract coordinates from geometry
        lon = poi.geometry.x
        lat = poi.geometry.y
        
        # Download and process image
        img = download_tile_image(lon, lat)
        if img:
            # Resize and normalize
            img = img.resize(IMAGE_SIZE)
            img_array = np.array(img) / 255.0
            
            # Get label and create directory
            label = int(poi['LABEL'])
            class_dir = os.path.join(output_dir, CLASS_NAMES[label])
            os.makedirs(class_dir, exist_ok=True)
            
            # Save compressed data
            np.savez_compressed(
                os.path.join(class_dir, f'poi_{idx:06d}.npz'),
                image=img_array,
                label=label
            )
    except Exception as e:
        print(f"Error processing POI {idx}: {str(e)}")

# Create datset using parallel processing
def generate_dataset(pois, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    
    # Process in chunks to handle large datasets
    chunk_size = 1000
    total_pois = len(pois)
    
    with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
        for chunk_start in range(0, total_pois, chunk_size):
            chunk_end = min(chunk_start + chunk_size, total_pois)
            futures = []
            
            # Submit chunk tasks
            for idx in range(chunk_start, chunk_end):
                futures.append(executor.submit(
                    process_poi,
                    pois.iloc[idx],
                    output_dir,
                    idx
                ))
            
            # Monitor progress
            for future in concurrent.futures.as_completed(futures):
                try:
                    future.result()
                except Exception as e:
                    print(f"Processing error: {str(e)}")

### Model definition ###
# Build classification model using EfficientNetB0
def build_classification_model():
    base_model = EfficientNetB0(
        weights='imagenet',
        include_top=False,
        input_shape=(*IMAGE_SIZE, 3)
    )
    
    # Fine-tuning configuration
    for layer in base_model.layers[:-30]:
        layer.trainable = False

    return models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dense(512, activation='relu', 
                    kernel_regularizer=tf.keras.regularizers.l2(0.01)),
        layers.Dropout(0.5),
        layers.Dense(NUM_CLASSES, activation='softmax')
    ])

### Training Pipeline ###
# Calculate class weights for imbalanced datasets
def calculate_class_weights(data_dir):
    class_counts = [len(os.listdir(os.path.join(data_dir, cls))) 
                   for cls in CLASS_NAMES]
    total_samples = sum(class_counts)
    return {i: total_samples/(NUM_CLASSES * count) 
           for i, count in enumerate(class_counts) if count > 0}

# Optimized training workflow
def train_model(data_dir):
    # Enable hardware accelerations
    tf.config.optimizer.set_jit(True)
    tf.keras.mixed_precision.set_global_policy('mixed_float16')

    # Create data pipelines
    def preprocess(image, label):
        return image, tf.one_hot(label, NUM_CLASSES)

    train_ds = tf.keras.utils.image_dataset_from_directory(
        data_dir,
        image_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        label_mode='int',
        validation_split=0.2,
        subset='training',
        shuffle=True,
        seed=42).map(preprocess).cache().prefetch(tf.data.AUTOTUNE)

    val_ds = tf.keras.utils.image_dataset_from_directory(
        data_dir,
        image_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        label_mode='int',
        validation_split=0.2,
        subset='validation',
        shuffle=False
    ).map(preprocess).cache().prefetch(tf.data.AUTOTUNE)

    # Model configuration
    model = build_classification_model()
    model.compile(
        optimizer=optimizers.Nadam(learning_rate=1e-3),
        loss='categorical_crossentropy',
        metrics=[tf.keras.metrics.CategoricalAccuracy(name='accuracy')]
    )

    # Training callbacks
    callbacks_list = [
        callbacks.EarlyStopping(
            monitor='val_loss',
            patience=PATIENCE,
            restore_best_weights=True
        ),
        callbacks.ModelCheckpoint(
            'best_model.h5',
            monitor='val_accuracy',
            save_best_only=True
        ),
        callbacks.TensorBoard(
            log_dir='training_logs',
            histogram_freq=1
        )
    ]

    # Calculate class weights
    class_weights = calculate_class_weights(data_dir)

    # Start training
    history = model.fit(
        train_ds,
        epochs=EPOCHS,
        validation_data=val_ds,
        callbacks=callbacks_list,
        class_weight=class_weights,
        verbose=2
    )
    
    return model, history

### Prediction and evaluation ###
# Load trained model with custom objects
def load_trained_model(model_path='poi_classifier_final.h5'):
    return tf.keras.models.load_model(model_path)

# Make predictions on new unlabeled POIs
def predict_new_pois(model, geojson_path, output_csv='predictions.csv'):
    # Load new POIs
    new_pois = load_poi_data(geojson_path)
    
    # Temporary directory for processing
    temp_dir = 'temp_predictions'
    os.makedirs(temp_dir, exist_ok=True)
    
    # Create dummy structure for dataset loader
    dummy_class_dir = os.path.join(temp_dir, 'dummy_class')
    os.makedirs(dummy_class_dir, exist_ok=True)
    
    # Process and save images
    for idx, poi in new_pois.iterrows():
        lon = poi.geometry.x
        lat = poi.geometry.y
        img = download_tile_image(lon, lat)
        if img:
            img = img.resize(IMAGE_SIZE)
            img_array = np.array(img) / 255.0
            np.savez_compressed(
                os.path.join(dummy_class_dir, f'pred_{idx}.npz'),
                image=img_array
            )
    
    # Create dataset
    pred_ds = tf.keras.utils.image_dataset_from_directory(
        temp_dir,
        image_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        label_mode=None,
        shuffle=False
    )
    
    # Make predictions
    probabilities = model.predict(pred_ds)
    predictions = np.argmax(probabilities, axis=1)
    
    # Cleanup
    shutil.rmtree(temp_dir)
    
    # Add predictions to GeoDataFrame
    new_pois['prediction'] = predictions
    new_pois['confidence'] = np.max(probabilities, axis=1)
    
    # Save results
    new_pois.to_csv(output_csv)
    return new_pois

# Evaluate model performance on test data
def evaluate_model(model, test_geojson_path):
    # Load and process test data
    test_pois = load_poi_data(test_geojson_path)
    test_dir = 'temp_evaluation'
    
    # Create labeled dataset
    generate_dataset(test_pois, test_dir)
    
    # Load evaluation dataset
    test_ds = tf.keras.utils.image_dataset_from_directory(
        test_dir,
        image_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        label_mode='int',
        shuffle=False
    ).map(lambda x, y: (x, tf.one_hot(y, NUM_CLASSES)))
    
    # Get true labels and predictions
    y_true = np.concatenate([y for x, y in test_ds], axis=0)
    y_true_labels = np.argmax(y_true, axis=1)
    y_pred = model.predict(test_ds)
    y_pred_labels = np.argmax(y_pred, axis=1)
    
    # Generate metrics
    report = classification_report(
        y_true_labels,
        y_pred_labels,
        target_names=CLASS_NAMES,
        output_dict=True
    )
    
    # Confusion matrix
    cm = confusion_matrix(y_true_labels, y_pred_labels)
    
    # Visualization
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=CLASS_NAMES,
                yticklabels=CLASS_NAMES)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.savefig('confusion_matrix.png')
    plt.close()
    
    # Cleanup
    shutil.rmtree(test_dir)
    
    return report, cm

### Implementation ###
# Load trained model
model = load_trained_model()
    
# OPTION 1: Predict new POIs
predictions = predict_new_pois(
    model,
    'new_pois.geojson',
    output_csv='new_predictions.csv'
    )
    
# OPTION 2: Evaluate on test data
test_report, test_cm = evaluate_model(
    model,
    'test_pois.geojson'
    )
    
# Print evaluation results
print("\nClassification Report:")
print(pd.DataFrame(test_report).transpose())
print("\nConfusion Matrix:")
print(test_cm)