# GitHub Stats Analysis

Interactive analysis of GitHub traffic statistics for NatLabRockies repositories.

In [None]:
from pathlib import Path

import plotly.express as px
import polars as pl

# Load data
data_path = Path("../data/stats.parquet")
df = pl.read_parquet(data_path)
print(f"Loaded {len(df)} records")
df.head()

## Summary Statistics

In [None]:
# Summary by repository
summary = (
    df.group_by("repo_name")
    .agg(
        [
            pl.col("views_total").sum().alias("total_views"),
            pl.col("views_unique").sum().alias("unique_views"),
            pl.col("clones_total").sum().alias("total_clones"),
            pl.col("clones_unique").sum().alias("unique_clones"),
            pl.col("stars_count").last().alias("stars"),
            pl.col("forks_count").last().alias("forks"),
            pl.col("date").min().alias("first_date"),
            pl.col("date").max().alias("last_date"),
        ]
    )
    .sort("total_views", descending=True)
)

summary

## Views Over Time

In [None]:
# Unique views by repository
fig = px.line(
    df.to_pandas(),
    x="date",
    y="views_unique",
    color="repo_name",
    title="Unique Views by Repository",
    markers=True,
)
fig.update_layout(hovermode="x unified")
fig.show()

## Clones Over Time

In [None]:
# Unique clones by repository
fig = px.line(
    df.to_pandas(),
    x="date",
    y="clones_unique",
    color="repo_name",
    title="Unique Clones by Repository",
    markers=True,
)
fig.update_layout(hovermode="x unified")
fig.show()

## Cumulative Views

In [None]:
# Calculate cumulative views per repo
cumulative = df.sort(["repo_name", "date"]).with_columns(
    pl.col("views_unique").cum_sum().over("repo_name").alias("cumulative_views")
)

fig = px.line(
    cumulative.to_pandas(),
    x="date",
    y="cumulative_views",
    color="repo_name",
    title="Cumulative Unique Views",
)
fig.show()

## Repository Comparison

In [None]:
# Bar chart comparison
fig = px.bar(
    summary.to_pandas(),
    x="repo_name",
    y=["total_views", "total_clones"],
    barmode="group",
    title="Total Views vs Clones by Repository",
)
fig.show()

## Weekly Trends

In [None]:
# Resample to weekly
weekly = (
    df.group_by_dynamic("date", every="1w", by="repo_name")
    .agg(
        [
            pl.col("views_unique").sum().alias("views"),
            pl.col("clones_unique").sum().alias("clones"),
        ]
    )
    .sort(["repo_name", "date"])
)

fig = px.line(
    weekly.to_pandas(),
    x="date",
    y="views",
    color="repo_name",
    title="Weekly Unique Views",
    markers=True,
)
fig.show()