# GitHub Productivity Analysis 2025

Comprehensive analysis of coding productivity with AI collaboration detection.

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from pathlib import Path

# Set plotly dark theme
px.defaults.template = "plotly_dark"

print("Libraries loaded successfully.")

## Load Data

In [None]:
# Load processed data
data_dir = Path("../data/processed")

commits = pd.read_parquet(data_dir / "commits.parquet")
prs = pd.read_parquet(data_dir / "prs.parquet")
repos = pd.read_parquet(data_dir / "repos.parquet")

print(f"Loaded {len(commits)} commits from {len(repos)} repos")
print(f"Loaded {len(prs)} pull requests")

# Preview
commits.head()

## Overview Statistics

In [None]:
# Basic stats
total_commits = len(commits)
total_additions = commits["additions"].sum()
total_deletions = commits["deletions"].sum()
net_lines = total_additions - total_deletions

active_repos = (repos["commits_2025"] > 0).sum()
coding_days = commits["date"].dt.date.nunique()

ai_commits = commits["is_ai_assisted"].sum()
ai_ratio = ai_commits / total_commits

print(f"""
## 2025 Productivity Summary

| Metric | Value |
|--------|-------|
| Total Commits | {total_commits:,} |
| Lines Added | {total_additions:,} |
| Lines Deleted | {total_deletions:,} |
| **Net Lines Written** | **{net_lines:,}** |
| Active Repos | {active_repos} |
| Coding Days | {coding_days} |
| AI-Assisted Commits | {ai_commits} ({ai_ratio:.1%}) |
""")

## Temporal Analysis

In [None]:
# Commits over time
commits["week"] = pd.to_datetime(commits["date"]).dt.to_period("W")
weekly = commits.groupby("week").size().reset_index(name="commits")
weekly["week"] = weekly["week"].dt.start_time

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=weekly["week"],
    y=weekly["commits"],
    mode="lines+markers",
    name="Weekly Commits",
    line=dict(color="#00d4aa", width=2)
))

fig.update_layout(
    title="Weekly Commit Activity",
    xaxis_title="Week",
    yaxis_title="Commits",
    height=400
)

fig.show()

## AI Collaboration Analysis

In [None]:
# AI breakdown
ai_df = commits[commits["is_ai_assisted"] == True]

if len(ai_df) > 0:
    # By agent
    agent_counts = ai_df["ai_agent"].value_counts()
    
    fig = go.Figure(data=go.Pie(
        labels=agent_counts.index,
        values=agent_counts.values,
        hole=0.4
    ))
    fig.update_layout(title="AI Agent Distribution")
    fig.show()
    
    print("\nAI Agent Breakdown:")
    print(agent_counts)
else:
    print("No AI-assisted commits found.")

## Top Repos

In [None]:
# Top repos by commits
top_repos = repos.nlargest(15, "commits_2025")[
    ["full_name", "commits_2025", "net_lines", "ai_commits", "language"]
]

top_repos["ai_ratio"] = (top_repos["ai_commits"] / top_repos["commits_2025"].replace(0, 1)).round(2)

print(top_repos.to_string(index=False))

## Language Breakdown

In [None]:
# By language
lang_stats = repos.groupby("language").agg({
    "commits_2025": "sum",
    "net_lines": "sum"
}).sort_values("net_lines", ascending=False)

lang_stats = lang_stats[lang_stats["net_lines"] > 0]

fig = go.Figure(data=go.Bar(
    x=lang_stats.index,
    y=lang_stats["net_lines"],
    marker=dict(color="#00d4aa")
))

fig.update_layout(
    title="Net Lines by Language",
    xaxis_title="Language",
    yaxis_title="Net Lines",
    height=400
)

fig.show()

print("\nLanguage Stats:")
print(lang_stats)

## Commit Patterns

In [None]:
# Day of week pattern
commits["dow"] = pd.to_datetime(commits["date"]).dt.day_name()
dow_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
dow_counts = commits["dow"].value_counts().reindex(dow_order, fill_value=0)

fig = go.Figure(data=go.Bar(
    x=dow_counts.index,
    y=dow_counts.values,
    marker=dict(color="#ff6b6b")
))

fig.update_layout(
    title="Commits by Day of Week",
    xaxis_title="Day",
    yaxis_title="Commits",
    height=400
)

fig.show()

# Hour of day pattern
commits["hour"] = pd.to_datetime(commits["date"]).dt.hour
hour_counts = commits.groupby("hour").size()

fig = go.Figure(data=go.Scatter(
    x=hour_counts.index,
    y=hour_counts.values,
    mode="lines+markers",
    line=dict(color="#00d4aa", width=2)
))

fig.update_layout(
    title="Commits by Hour of Day",
    xaxis_title="Hour",
    yaxis_title="Commits",
    height=400
)

fig.show()