In [1]:
# 1) Install prerequisites (if you haven’t already):
#    pip install scholarly pandas plotly

import datetime
import pandas as pd
import plotly.graph_objects as go
from scholarly import scholarly



In [2]:

# ——— CONFIG ———
GOOGLE_SCHOLAR_ID = "KYEJ7WkAAAAJ"  # e.g. 'mIoTjx0AAAAJ'
# ———————

In [3]:
# 2) Fetch and fill your author profile
author = scholarly.search_author_id(GOOGLE_SCHOLAR_ID)
author = scholarly.fill(author, sections=["indices", "counts", "publications"])

# 3) Inspect what came back
print("Available keys:", list(author.keys()))

Available keys: ['container_type', 'filled', 'scholar_id', 'source', 'name', 'url_picture', 'affiliation', 'organization', 'interests', 'email_domain', 'citedby', 'publications', 'citedby5y', 'hindex', 'hindex5y', 'i10index', 'i10index5y', 'cites_per_year']


In [4]:
# 3) Build a DataFrame of total citations per year
cites_dict = author.get("cites_per_year", {})     # {"2017": 3, "2018": 3, …}
cites_df = (
    pd.DataFrame([{"year": int(y), "value_cites": v}
                  for y, v in cites_dict.items()])
    .sort_values("year")
)

# 4) Build a DataFrame of your publications (year + citations)
pub_rows = []
for p in author.get("publications", []):
    bib   = p.get("bib", {})
    year  = bib.get("pub_year") or bib.get("year")
    if not year:
        continue
    try:
        year = int(year)
    except:
        continue
    cit = p.get("num_citations", 0)
    pub_rows.append({"year": year, "citations": cit})

pubs_df = pd.DataFrame(pub_rows)

# 5) Compute h-index **at the end of each year**:
years = sorted(set(cites_df["year"]).union(pubs_df["year"]))
h_list = []
for yr in years:
    # all pubs up through that year
    subset = pubs_df[pubs_df["year"] <= yr]["citations"].tolist()
    subset.sort(reverse=True)
    h = 0
    for i, c in enumerate(subset, start=1):
        if c >= i:
            h = i
        else:
            break
    h_list.append({"year": yr, "value_hidx": h})

h_df = pd.DataFrame(h_list)

# 6) Merge the two series
df = pd.merge(h_df, cites_df, on="year", how="outer").sort_values("year")

# 7) Plot with Plotly
today = datetime.date.today().isoformat()
fig = go.Figure()

# h-index line
fig.add_trace(go.Scatter(
    x=df["year"],
    y=df["value_hidx"],
    mode="lines+markers",
    name="H-index"
))

# citation bars
fig.add_trace(go.Bar(
    x=df["year"],
    y=df["value_cites"],
    name="Total citations",
    opacity=0.6
))

fig.update_layout(
    title=f"Google Scholar Metrics – As of {today}",
    xaxis_title="Year",
    yaxis_title="Count",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    bargap=0.2
)

fig.show()
# — or — 
# fig.write_html("gs_metrics.html")

In [5]:
fig.show()


In [6]:
fig.write_image("gs_metrics.png", width=800, height=500)


In [10]:
# 2) Citations per year + cumulative
cites = author.get("cites_per_year", {})
cites_df = (
    pd.DataFrame([{"year": int(y), "per_year": v} for y, v in cites.items()])
      .sort_values("year")
)
cites_df["cumulative"] = cites_df["per_year"].cumsum()

# 3) Compute year-end h-index
pubs = []
for p in author["publications"]:
    bib = p.get("bib", {})
    y = bib.get("pub_year") or bib.get("year")
    try:
        year = int(y)
    except:
        continue
    pubs.append({"year": year, "citations": p.get("num_citations", 0)})
pubs_df = pd.DataFrame(pubs)

years = sorted(set(cites_df["year"]).union(pubs_df["year"]))
h_list = []
for yr in years:
    cs = sorted(pubs_df[pubs_df["year"] <= yr]["citations"], reverse=True)
    h = sum(1 for i, c in enumerate(cs, 1) if c >= i)
    h_list.append({"year": yr, "h_index": h})
h_df = pd.DataFrame(h_list)

# current (latest) h-index for the annotation
current_h = int(h_df["h_index"].max())

# 4) Build figure
today = datetime.date.today().isoformat()
fig = go.Figure()

# H-index trace
fig.add_trace(go.Scatter(
    x=h_df["year"], y=h_df["h_index"],
    mode="lines+markers", name="H-index",
    visible=False
))
# Citations per year
fig.add_trace(go.Bar(
    x=cites_df["year"], y=cites_df["per_year"],
    name="Citations per year", opacity=0.7,
    visible=True
))
# Cumulative citations
fig.add_trace(go.Scatter(
    x=cites_df["year"], y=cites_df["cumulative"],
    mode="lines+markers", name="Cumulative citations",
    visible=True
))

# 5) Toggle buttons (H-index vs Citations)
fig.update_layout(
    template="none",  # start from blank
    paper_bgcolor="rgba(0,0,0,0)",
    plot_bgcolor="rgba(0,0,0,0)",
    title=f"Google Scholar Metrics — As of {today}",
    xaxis=dict(
        title="Year",
        showgrid=False,
        showline=True,
        linecolor="black",
        ticks="outside"
    ),
    yaxis=dict(
        title="Count",
        showgrid=False,
        showline=True,
        linecolor="black",
        ticks="outside"
    ),
    updatemenus=[dict(
        type="buttons", direction="right",
        x=0.5, y=1.15, xanchor="center",
        buttons=[
            dict(label="H-index",
                 method="update",
                 args=[{"visible": [True, False, False]},
                       {"yaxis": {"title": "H-index"},
                        "title": f"H-index — As of {today}"}]),
            dict(label="Citations",
                 method="update",
                 args=[{"visible": [False, True, True]},
                       {"yaxis": {"title": "Citations"},
                        "title": f"Citations — As of {today}"}]),
        ]
    )],
    margin=dict(l=60, r=20, t=80, b=50)
)

# 6) Add current h-index annotation (top-left)
fig.add_annotation(
    text=f"Current h-index: <b>{current_h}</b>",
    xref="paper", yref="paper",
    x=0.01, y=0.95,
    showarrow=False,
    font=dict(size=14)
)

# 7) Export standalone HTML
fig.write_html("gs_metrics_toggle.html", include_plotlyjs="cdn")

In [22]:
# 2) Build citations DataFrame
cites = author.get("cites_per_year", {})
cites_df = (
    pd.DataFrame([{"year": int(y), "per_year": v} for y, v in cites.items()])
      .sort_values("year")
      .reset_index(drop=True)
)
cites_df["cumulative"] = cites_df["per_year"].cumsum()

# 3) Make the figure
today = datetime.date.today().isoformat()
fig = go.Figure()

# per‐year bars (black)
fig.add_trace(go.Bar(
    x=cites_df["year"], 
    y=cites_df["per_year"],
    name="Citations per year",
))

# cumulative line (black)
fig.add_trace(go.Scatter(
    x=cites_df["year"], 
    y=cites_df["cumulative"],
    mode="lines+markers",
    name="Cumulative citations",
    line=dict(width=4),
    marker=dict(size=8)
))

# 4) Layout styling: transparent, no grids, mirrored axis lines
fig.update_layout(
    title=f"Google Scholar H-index: 11",
    template="none",
    paper_bgcolor="rgba(0,0,0,0)",
    plot_bgcolor="rgba(0,0,0,0)",
    xaxis=dict(
        title="Year",
        showgrid=False,
        showline=True,
        mirror=True,
        linewidth=4,
        ticks="outside",
    ),
    yaxis=dict(
        title="Citations",
        showgrid=False,
        showline=True,
        mirror=True,
        linewidth=4,
        ticks="outside",
    ),
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1,
        font=dict(size=16),
    ),
    margin=dict(l=60, r=20, t=80, b=50)
)

fig.update_layout(
    title=None,
)

# 2) Add a big h-index overlay in the top-left
fig.add_annotation(
    text=f"H-index: <b>{current_h}</b>",
    xref="paper", yref="paper",
    x=0.02, y=0.98,           # inset from top-left
    showarrow=False,
    font=dict(size=32),
    align="left"
)

fig.update_layout(
    xaxis=dict(
        title=dict(text="<b>Year</b>", font=dict(size=22)),
        tickfont=dict(size=16)
    ),
    yaxis=dict(
        title=dict(text="<b>Citations</b>", font=dict(size=22)),
        tickfont=dict(size=16)
    ),
    legend=dict(
        font=dict(size=16)
    )
)

# 5) Export standalone HTML
fig.write_html("gs_citations.html", include_plotlyjs="cdn")