# UFO Sightings — Altair Visualizations
This notebook loads the public UFO dataset and creates two Vega-Lite/Altair visualizations.
- Plot 1: Annual trend of UFO reports in the U.S.
- Plot 2 (Interactive): Map-like scatter of sightings with a year slider and shape highlight.

**Data Source:** https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/ufo-scrubbed-geocoded-time-standardized-00.csv

In [None]:

import pandas as pd
import numpy as np
import altair as alt

# Avoid max rows warning in Altair
alt.data_transformers.disable_max_rows()

# Data URL (must be read from URL per assignment)
DATA_URL = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/ufo-scrubbed-geocoded-time-standardized-00.csv"

df = pd.read_csv(DATA_URL)

# Basic cleaning / transformations
# Parse dates
df['datetime'] = pd.to_datetime(df['date_time'], errors='coerce')

# Derive temporal columns
df['year'] = df['datetime'].dt.year
df['month'] = df['datetime'].dt.month
df['hour'] = df['datetime'].dt.hour

# Clean numeric fields
for col in ['latitude', 'longitude']:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Duration seconds — make a cleaned numeric column
# Column name in dataset is typically 'duration (seconds)'
dur_col = None
for c in df.columns:
    if c.lower().startswith('duration') and 'second' in c.lower():
        dur_col = c
        break

if dur_col is None:
    # Fallback: create zeros if not found
    df['duration_seconds'] = 0.0
else:
    df['duration_seconds'] = pd.to_numeric(df[dur_col], errors='coerce')

# Cap extreme durations for sizing (robust against outliers)
df['duration_capped'] = df['duration_seconds'].clip(upper=df['duration_seconds'].quantile(0.99))

# Limit to U.S. records to keep visuals legible (country column may be missing in some rows)
if 'country' in df.columns:
    df_us = df[df['country'].fillna('').str.upper().isin(['US','USA','UNITED STATES'])].copy()
else:
    df_us = df.copy()

# Drop rows missing critical coords
df_us = df_us.dropna(subset=['latitude', 'longitude'])

# For shape coloring — normalize missing or rare shapes
if 'shape' in df_us.columns:
    df_us['shape_clean'] = df_us['shape'].fillna('unknown').str.lower().replace({'': 'unknown'})
else:
    df_us['shape_clean'] = 'unknown'

# Keep reasonable year range (data spans mid-1900s to 2014-ish)
df_us = df_us[(df_us['year'] >= 1940) & (df_us['year'] <= 2015)]
len(df), len(df_us)


## Plot 1 — Annual Trend of UFO Reports in the U.S.
**Encodings**: x = year (temporal), y = count of reports (quantitative).

**Why**: A simple area+line emphasizes long-term changes in reporting volume.

**Transforms**: groupby year; filter to U.S.; parse dates.


In [None]:

# Aggregate counts per year
year_counts = (df_us
               .groupby('year', as_index=False)
               .size()
               .rename(columns={'size':'reports'}))

base1 = alt.Chart(year_counts).mark_area(opacity=0.35).encode(
    x=alt.X('year:O', title='Year'),
    y=alt.Y('reports:Q', title='Number of Reports'),
    tooltip=['year:O','reports:Q']
)

line1 = alt.Chart(year_counts).mark_line().encode(
    x='year:O',
    y='reports:Q'
)

plot1 = (base1 + line1).properties(
    width=700,
    height=350,
    title='UFO Reports per Year (United States)'
)

plot1


## Plot 2 — **Interactive** Latitude/Longitude Scatter (U.S.)
**Encodings**: x = longitude, y = latitude, size = capped duration, color = shape.

**Interactivity**:
- **Year slider** to filter the visible sightings.
- **Legend click** to highlight specific shapes (Altair selection bound to the color legend).
- **Hover tooltip** for details.

**Why**: The slider lets viewers focus on temporal slices; legend highlight reduces overplotting noise.


In [None]:

# Selection widgets
year_min, year_max = int(df_us['year'].min()), int(df_us['year'].max())

year_select = alt.selection_point(
    fields=['year'],
    bind=alt.binding_range(name='Year:', min=year_min, max=year_max, step=1),
    value={'year': 2000}  # default
)

shape_highlight = alt.selection_point(fields=['shape_clean'], bind='legend')

# Base scatter
plot2 = alt.Chart(df_us).mark_circle(opacity=0.5).encode(
    x=alt.X('longitude:Q', title='Longitude'),
    y=alt.Y('latitude:Q', title='Latitude'),
    size=alt.Size('duration_capped:Q', title='Duration (capped, s)', legend=None),
    color=alt.condition(shape_highlight, 'shape_clean:N', alt.value('lightgray'), title='Shape'),
    tooltip=[
        alt.Tooltip('datetime:T', title='Date/Time'),
        alt.Tooltip('city:N', title='City', empty='ignore'),
        alt.Tooltip('state:N', title='State', empty='ignore'),
        alt.Tooltip('shape_clean:N', title='Shape'),
        alt.Tooltip('duration_seconds:Q', title='Duration (s)', format=',')
    ]
).add_params(
    year_select,
    shape_highlight
).transform_filter(
    year_select  # filter by slider
).properties(
    width=700,
    height=400,
    title='UFO Sightings in the U.S. — Interactive by Year and Shape'
)

plot2


In [None]:

# Save charts as HTML (self-contained) for embedding
plot1_path = "/mnt/data/charts/ufo_plot1_annual_trend.html"
plot2_path = "/mnt/data/charts/ufo_plot2_interactive_scatter.html"

plot1.save(plot1_path)
plot2.save(plot2_path)

plot1_path, plot2_path
