# Imports and setup

In [3]:
import pandas as pd
import altair as alt

# allow large datasets
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

# Load the data from URL

In [4]:
data_url = "https://raw.githubusercontent.com/UIUC-iSchool-DataViz/is445_data/main/bfro_reports_fall2022.csv"

bfro = pd.read_csv(data_url)

bfro.head()

Unnamed: 0,observed,location_details,county,state,season,title,latitude,longitude,date,number,...,precip_intensity,precip_probability,precip_type,pressure,summary,uv_index,visibility,wind_bearing,wind_speed,location
0,Ed L. was salmon fishing with a companion in P...,East side of Prince William Sound,Valdez-Chitina-Whittier County,Alaska,Fall,,,,,1261.0,...,,,,,,,,,,
1,heh i kinda feel a little dumb that im reporti...,"the road is off us rt 80, i dont know the exit...",Warren County,New Jersey,Fall,,,,,438.0,...,,,,,,,,,,
2,I was on my way to Claremont from Lebanon on R...,Close to Claremont down 120 not far from Kings...,Sullivan County,New Hampshire,Summer,Report 55269: Dawn sighting at Stevens Brook o...,43.41549,-72.33093,2016-06-07,55269.0,...,0.001,0.7,rain,998.87,Mostly cloudy throughout the day.,6.0,9.7,262.0,0.49,POINT(-72.33093000000001 43.415490000000005)
3,I was northeast of Macy Nebraska along the Mis...,Latitude & Longitude : 42.158230 -96.344197,Thurston County,Nebraska,Spring,Report 59757: Possible daylight sighting of a ...,42.15685,-96.34203,2018-05-25,59757.0,...,0.0,0.0,,1008.07,Partly cloudy in the morning.,10.0,8.25,193.0,3.33,POINT(-96.34203000000001 42.15685)
4,"While this incident occurred a long time ago, ...","Ward County, Just outside of a the Minuteman T...",Ward County,North Dakota,Spring,Report 751: Hunter describes described being s...,48.25422,-101.3166,2000-04-21,751.0,...,,,rain,1011.47,Partly cloudy until evening.,6.0,10.0,237.0,11.14,POINT(-101.3166 48.254220000000004)


# Light cleaning & transformations

In [5]:
# If a 'year' column already exists, this will just reuse it.
if "date" in bfro.columns and "year" not in bfro.columns:
    # assume 'date' string like '10/15/2003'
    bfro["date_parsed"] = pd.to_datetime(bfro["date"], errors="coerce")
    bfro["year"] = bfro["date_parsed"].dt.year

# keep only rows with meaningful info
needed_cols = ["state", "season", "year"]
bfro_clean = bfro.dropna(subset=[col for col in needed_cols if col in bfro.columns])

# Make sure year is an integer
if "year" in bfro_clean.columns:
    bfro_clean["year"] = bfro_clean["year"].astype("Int64")

bfro_clean.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bfro_clean["year"] = bfro_clean["year"].astype("Int64")


Unnamed: 0,observed,location_details,county,state,season,title,latitude,longitude,date,number,...,precip_type,pressure,summary,uv_index,visibility,wind_bearing,wind_speed,location,date_parsed,year
2,I was on my way to Claremont from Lebanon on R...,Close to Claremont down 120 not far from Kings...,Sullivan County,New Hampshire,Summer,Report 55269: Dawn sighting at Stevens Brook o...,43.41549,-72.33093,2016-06-07,55269.0,...,rain,998.87,Mostly cloudy throughout the day.,6.0,9.7,262.0,0.49,POINT(-72.33093000000001 43.415490000000005),2016-06-07,2016
3,I was northeast of Macy Nebraska along the Mis...,Latitude & Longitude : 42.158230 -96.344197,Thurston County,Nebraska,Spring,Report 59757: Possible daylight sighting of a ...,42.15685,-96.34203,2018-05-25,59757.0,...,,1008.07,Partly cloudy in the morning.,10.0,8.25,193.0,3.33,POINT(-96.34203000000001 42.15685),2018-05-25,2018
4,"While this incident occurred a long time ago, ...","Ward County, Just outside of a the Minuteman T...",Ward County,North Dakota,Spring,Report 751: Hunter describes described being s...,48.25422,-101.3166,2000-04-21,751.0,...,rain,1011.47,Partly cloudy until evening.,6.0,10.0,237.0,11.14,POINT(-101.3166 48.254220000000004),2000-04-21,2000
5,"In early spring 1988, some friends of mine and...","Yancey County, North Carolina, near the summit...",Yancey County,North Carolina,Spring,Report 3339: Deep impressions seen in the snow,35.74875,-82.26195,1988-03-15,3339.0,...,,1014.47,Partly cloudy until evening and breezy through...,7.0,9.5,348.0,16.94,POINT(-82.26195 35.74875),1988-03-15,1988
6,This happened summertime early 70's (I think 7...,To get there take Highway 78 south out of Absa...,Stillwater County,Montana,Summer,Report 47215: Female fly fisherman's lucid rec...,45.31278,-109.6449,1971-12-15,47215.0,...,,,,,,,,POINT(-109.6449 45.31278),1971-12-15,1971


# Plot 1 – Static line chart (Bigfoot reports per year)

### Aggregate per year and build chart

In [6]:
# aggregate counts by year
year_counts = (
    bfro_clean
    .dropna(subset=["year"])
    .groupby("year")
    .size()
    .reset_index(name="count")
    .sort_values("year")
)

chart1 = (
    alt.Chart(year_counts, title="Bigfoot Reports per Year")
    .mark_line(point=True)
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("count:Q", title="Number of reports"),
        tooltip=["year:O", "count:Q"]
    )
    .properties(width=600, height=350)
)

chart1


### Save Plot 1 as JSON and PNG

In [None]:
from pathlib import Path

CURRENT_DIR = Path().resolve().parents[0]

chart1_json_path = CURRENT_DIR / "assets/json/bigfoot_year_counts.json"
chart1_png_path = CURRENT_DIR / "assets/pngs/bigfoot_year_counts.png"

chart1.save(chart1_json_path)
chart1.save(chart1_png_path)

Current directory: /Users/pratyushagarwal/Desktop/UIUC/3. FALL 2025/IS 445 - Data Visualization/Labs/pratyushagarwal22.github.io


# 