# Radio Spotting API - Neural Network Feature Extraction

This notebook extracts data from a ham radio spotting API and prepares normalized feature vectors suitable for neural network input.

**API:** http://api.jxqz.org:8080/api/spots  
**Goal:** Create a feature vector for predicting or analyzing radio activity patterns

## 1. Import Required Libraries

Import necessary libraries for API calls, data manipulation, and preprocessing.

In [None]:
import requests
import pandas as pd
import numpy as np
import json
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import warnings

warnings.filterwarnings('ignore')

print("✓ All libraries imported successfully")

## 2. Fetch Data from API

Make a GET request to retrieve the ham radio spotting data.

In [None]:
API_URL = "http://api.jxqz.org:8080/api/spots"

try:
    print(f"Fetching data from {API_URL}...")
    response = requests.get(API_URL, timeout=10)
    response.raise_for_status()
    data = response.json()
    
    spots = data.get("spots", [])
    pagination = data.get("pagination", {})
    
    print(f"✓ Successfully fetched {len(spots)} spots")
    print(f"  Total spots available: {pagination.get('total', 'Unknown')}")
    print(f"  Pagination info: Limit={pagination.get('limit')}, Offset={pagination.get('offset')}")
    
except requests.RequestException as e:
    print(f"✗ Error fetching data: {e}")
    spots = []

## 3. Parse and Explore the Data

Examine the JSON structure and display sample records.

In [None]:
# Display first record to understand structure
if spots:
    print("Sample record:")
    print(json.dumps(spots[0], indent=2))
    print(f"\nRecord keys: {list(spots[0].keys())}")

In [None]:
# Convert to DataFrame for easier exploration
df_raw = pd.DataFrame(spots)

print(f"Dataset shape: {df_raw.shape}")
print(f"\nColumn data types:\n{df_raw.dtypes}")
print(f"\nFirst few rows:")
df_raw.head()

In [None]:
# Check for missing values
print("Missing values per column:")
print(df_raw.isnull().sum())
print(f"\nPercentage of missing values:")
print((df_raw.isnull().sum() / len(df_raw) * 100).round(2))

In [None]:
# Explore unique values in categorical columns
print("Unique bands:")
print(df_raw['band'].unique())
print(f"\nBand distribution:")
print(df_raw['band'].value_counts())

print(f"\nUnique modes:")
print(df_raw['mode'].unique())
print(f"\nMode distribution:")
print(df_raw['mode'].value_counts(dropna=False))

## 4. Handle Missing Values and Engineer Features

Create numerical features from raw data and handle missing values.

In [None]:
df = df_raw.copy()

# Convert frequency to float
df['frequency'] = pd.to_numeric(df['frequency'], errors='coerce')

# Encode bands
bands_list = sorted(df['band'].unique())
band_to_id = {band: idx for idx, band in enumerate(bands_list)}
df['band_id'] = df['band'].map(band_to_id).fillna(-1)

print(f"Band encoding: {band_to_id}")
print(f"\nBand ID distribution:")
print(df['band_id'].value_counts().sort_index())

In [None]:
# Parse timestamps
def parse_timestamp(ts_str):
    try:
        dt = datetime.strptime(str(ts_str), "%a, %d %b %Y %H:%M:%S %Z")
        return dt
    except:
        return None

df['datetime'] = df['timestamp'].apply(parse_timestamp)
df['hour'] = df['datetime'].dt.hour
df['day_of_week'] = df['datetime'].dt.dayofweek
df['day_name'] = df['datetime'].dt.day_name()

print(f"Hour distribution:")
print(df['hour'].value_counts().sort_index())
print(f"\nDay of week distribution:")
print(df['day_name'].value_counts())

In [None]:
# Extract signal report value
df['signal_report_numeric'] = pd.to_numeric(df['signal_report'], errors='coerce')

# For signal reports like "569", take first two digits (quality: 5-9 range)
df['signal_report_numeric'] = df['signal_report'].apply(
    lambda x: float(str(x)[:2]) if pd.notna(x) and str(x) else 0
)

print("Signal report statistics:")
print(df['signal_report_numeric'].describe())

In [None]:
# Encode mode (operating mode)
mode_map = {'CW': 1, 'USB': 2, 'SSB': 3, 'LSB': 4, 'AM': 5, 'FM': 6}
df['mode_id'] = df['mode'].map(mode_map).fillna(0)

print(f"Mode encoding: {mode_map}")
print(f"\nMode ID distribution:")
print(df['mode_id'].value_counts().sort_index())

In [None]:
# Fill any remaining missing values
df['hour'] = df['hour'].fillna(df['hour'].median())
df['day_of_week'] = df['day_of_week'].fillna(df['day_of_week'].median())
df['band_id'] = df['band_id'].fillna(df['band_id'].median())
df['signal_report_numeric'] = df['signal_report_numeric'].fillna(0)

print("✓ Missing values handled")

## 5. Normalize Features

Scale numerical features to [0, 1] range for neural network input.

In [None]:
# Select features for the neural network
feature_columns = [
    'frequency',
    'band_id',
    'hour',
    'day_of_week',
    'signal_report_numeric',
    'mode_id'
]

X_raw = df[feature_columns].copy()

print(f"Raw feature matrix shape: {X_raw.shape}")
print(f"\nRaw feature statistics:")
print(X_raw.describe())

In [None]:
# Apply Min-Max normalization
scaler = MinMaxScaler(feature_range=(0, 1))
X_normalized = scaler.fit_transform(X_raw)
X_normalized_df = pd.DataFrame(X_normalized, columns=feature_columns)

print(f"Normalized feature matrix shape: {X_normalized.shape}")
print(f"\nNormalized feature statistics:")
print(X_normalized_df.describe())

In [None]:
# Compare before and after normalization
comparison = pd.DataFrame({
    'Feature': feature_columns,
    'Raw_Min': X_raw.min(),
    'Raw_Max': X_raw.max(),
    'Raw_Mean': X_raw.mean(),
    'Norm_Min': X_normalized_df.min(),
    'Norm_Max': X_normalized_df.max(),
    'Norm_Mean': X_normalized_df.mean()
})

print("Normalization Comparison:")
comparison

## 6. Create Feature Vector

Combine processed features into arrays ready for neural network input.

In [None]:
# The feature vector is already created as X_normalized
# Each row is one sample, each column is one feature

print(f"Feature vector shape: {X_normalized.shape}")
print(f"Number of samples: {X_normalized.shape[0]}")
print(f"Number of features: {X_normalized.shape[1]}")

print(f"\nFirst 5 feature vectors:")
print(X_normalized[:5])

In [None]:
# Display as a formatted table
print("First 10 normalized feature vectors:")
display(X_normalized_df.head(10))

In [None]:
# Export to CSV
output_file = 'radio_features_normalized.csv'
X_normalized_df.to_csv(output_file, index=False)
print(f"✓ Feature vectors exported to {output_file}")

# Also save raw features for reference
X_raw.to_csv('radio_features_raw.csv', index=False)
print(f"✓ Raw features exported to radio_features_raw.csv")

## 7. Validate Feature Vector

Verify that the feature vector is ready for neural network training.

In [None]:
# Validation checks
validation_checks = {
    "Shape is 2D": len(X_normalized.shape) == 2,
    "Data type is numeric": X_normalized.dtype in [np.float32, np.float64],
    "No NaN values": not np.isnan(X_normalized).any(),
    "No infinite values": not np.isinf(X_normalized).any(),
    "All values in [0, 1]": np.all((X_normalized >= 0) & (X_normalized <= 1)),
    "Expected feature count (6)": X_normalized.shape[1] == 6,
    "Non-empty dataset": X_normalized.shape[0] > 0
}

print("Validation Results:")
print("="*50)
for check, result in validation_checks.items():
    status = "✓ PASS" if result else "✗ FAIL"
    print(f"{status}: {check}")

all_passed = all(validation_checks.values())
print("="*50)
if all_passed:
    print("\n✓✓✓ All validation checks passed! ✓✓✓")
    print("Feature vector is ready for neural network input.")
else:
    print("\n✗ Some validation checks failed!")

In [None]:
# Detailed validation
print("Detailed Feature Vector Information:")
print("="*60)
print(f"Shape: {X_normalized.shape}")
print(f"Data Type: {X_normalized.dtype}")
print(f"Memory Usage: {X_normalized.nbytes / 1024:.2f} KB")
print(f"\nValue Range per Feature:")
print("-"*60)

for i, col in enumerate(feature_columns):
    feature_data = X_normalized[:, i]
    print(f"{col:<25} Min: {feature_data.min():.6f}  Max: {feature_data.max():.6f}  Mean: {feature_data.mean():.6f}  Std: {feature_data.std():.6f}")

In [None]:
# Summary
print("\n" + "="*60)
print("FEATURE EXTRACTION PIPELINE COMPLETE")
print("="*60)
print(f"\nInput: Ham radio spotting API")
print(f"Output: Normalized feature vector ({X_normalized.shape[0]} samples × {X_normalized.shape[1]} features)")
print(f"\nFeatures: {', '.join(feature_columns)}")
print(f"\nReady for use in:")
print("  • Neural networks (TensorFlow, PyTorch)")
print("  • Machine learning models (scikit-learn)")
print("  • Deep learning projects")
print(f"\nCSV Export: radio_features_normalized.csv")
print("="*60)