In [27]:
import pandas as pd
import altair as alt

# 禁用最大行限制
alt.data_transformers.disable_max_rows()

# 手动定义列名（根据你的数据样本）
columns = [
    'datetime_str',      # 第一列：日期时间原始字符串
    'city',              # 第二列
    'state',             # 第三列
    'country',           # 第四列
    'ufo_shape',         # 第五列
    'duration_seconds',  # 第六列
    'duration_text',     # 第七列
    'description',       # 第八列
    'date_posted',       # 第九列
    'latitude',          # 第十列
    'longitude'          # 第十一列
]

# 读取数据时指定列名
url = "https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/ufo-scrubbed-geocoded-time-standardized-00.csv"
ufo = pd.read_csv(
    url,
    names=columns,          # 使用自定义列名
    parse_dates=['datetime_str'],  # 解析日期时间列
    na_values=['']
)

In [28]:
# 转换日期列为标准datetime格式（原数据格式：月/日/年 时:分）
ufo['datetime'] = pd.to_datetime(
    ufo['datetime_str'],
    format='%m/%d/%Y %H:%M',  # 精确指定格式
    errors='coerce'           # 转换失败设为NaT
)

# 删除无效日期数据
ufo = ufo.dropna(subset=['datetime'])

In [29]:
# 转换经纬度为数值类型
ufo['latitude'] = pd.to_numeric(ufo['latitude'], errors='coerce')
ufo['longitude'] = pd.to_numeric(ufo['longitude'], errors='coerce')

# 删除无效坐标
ufo = ufo.dropna(subset=['latitude', 'longitude'])

# 过滤合理坐标范围（避免错误数据）
ufo = ufo[
    (ufo['latitude'].between(-90, 90)) &
    (ufo['longitude'].between(-180, 180))
]

In [30]:
# 从datetime列提取年份
ufo['year'] = ufo['datetime'].dt.year

# 过滤有效年份（示例数据包含1949-2014）
ufo = ufo[ufo['year'].between(1900, 2023)]

In [31]:
annual_counts = ufo.groupby('year').size().reset_index(name='counts')

trend_chart = alt.Chart(annual_counts).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(title='year', labelAngle=-45)),
    y=alt.Y('counts:Q', title='time'),
    tooltip=['year', 'counts']
).properties(
    width=800,
    title='UFO annual trend（1949-2014）'
)

trend_chart.save('trend.html')

In [32]:
import pandas as pd
import altair as alt


# 按形状和年份分组统计
grouped = ufo_filtered.groupby(['ufo_shape', 'year']).size().reset_index(name='counts')

# 创建分面柱状图
facet_chart = alt.Chart(grouped).mark_bar().encode(
    x=alt.X('year:O', title='year'),
    y=alt.Y('counts:Q', title='times'),
    color=alt.Color('ufo_shape:N', legend=None),  # 按形状着色
    tooltip=['ufo_shape', 'year', 'counts']
).properties(
    width=200,
    height=150
).facet(
    facet=alt.Facet('ufo_shape:N', title='UFO shape'),  # 按形状分面
    columns=3  # 每行显示3个子图
).properties(
    title='UFO annual trend for different shapes'
)

facet_chart.save('facet_chart.html')

In [24]:
!pip install altair --upgrade



In [2]:
#pip install altair

Collecting altair
  Downloading altair-5.4.1-py3-none-any.whl (658 kB)
Collecting narwhals>=1.5.2
  Downloading narwhals-1.35.0-py3-none-any.whl (325 kB)
Collecting typing-extensions>=4.10.0
  Downloading typing_extensions-4.13.2-py3-none-any.whl (45 kB)
Installing collected packages: typing-extensions, narwhals, altair
  Attempting uninstall: typing-extensions
    Found existing installation: typing-extensions 4.2.0
    Uninstalling typing-extensions-4.2.0:
      Successfully uninstalled typing-extensions-4.2.0
Successfully installed altair-5.4.1 narwhals-1.35.0 typing-extensions-4.13.2
Note: you may need to restart the kernel to use updated packages.
