In [4]:
import pandas as pd
from bokeh.io import show
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, HoverTool, Div, RangeSlider, CustomJS, Spacer, FactorRange, LinearColorMapper, ColorBar, BasicTicker, PrintfTickFormatter
from bokeh.palettes import viridis, Viridis256
from bokeh.layouts import row, column
import markdown

In [6]:
# intro description and stacked bar

md_text = """
# General Info

The following visualizations rely on a CSV file with the following columns:

- **Year**
- **Technology**
- **Conference**

CSV files should live in a `CSV_Files` folder within the root directory.  
Example path: `../CSV_Files/NVM_data.csv`

See our tool guide for more on graph manipulation.

---

## Stacked Bar Chart (Year vs. Publications)

- **Hover details**  
  User can hover over bars to see the the aggregated data for each technology.
- **Toggle hover**  
  User may disable the hover tooltip via the toolbar if desired.  
- **Filtering**  
  User may use the multi-select dropdown to filter by available technologies. 
- **Range slider**  
  User may use the slider to adjust the year window visible. Refresh the graph to quickly return to the original view.
- **Dataset summary**  
  A summary with information about the dataset (e.g., total records, years covered) is displayed alongside the graph.  
- **Missing data**  
  Any rows with missing values are excluded from the visualization.
"""

html = markdown.markdown(md_text, extensions=['extra'])
comment = Div(
    text=html, 
    width=500, 
    styles={ 
      'font-size': '14px',   
    }
)

#————————————————data summary——————————————————
raw = pd.read_csv("../CSV_Files/NVM_data.csv") # CSV filepath

total_records = len(raw)
year_min, year_max = int(raw["Year"].min()), int(raw["Year"].max())

missing_year = raw[ raw["Year"].isnull() ]
missing_dois = missing_year["DOI"].dropna().unique().tolist()

if missing_dois:
    doi_list = "".join(
        f"<li><a href='https://doi.org/{doi}' target='_blank'>{doi}</a></li>"
        for doi in missing_dois
    )
    html_missing = f"""
    <h3>Excluded Records</h3>
    <p>The following DOIs were dropped because “Year” was missing:</p>
    <ul>
      {doi_list}
    </ul>
    <hr>
    """
else:
    html_missing = """
    <h3>Excluded Records</h3>
    <p><em>None – all rows had a Year.</em></p>
    <hr>
    """


html_summary = f"""
<h3>Dataset Summary</h3>
<ul>
  <li><strong>Total Records:</strong> {total_records}</li>
  <li><strong>Year Range:</strong> {year_min} – {year_max}</li>
</ul>
<hr>
"""
full_summary = html_summary + html_missing

summary_div = Div(text=full_summary, width=500, styles={'font-size': '14px'})

#————————————————————————————————————————————————————————


df = raw
df = df.groupby(by=["Technology", 'Year']).size().reset_index(name='Count')

df = df.pivot(index='Year', columns="Technology", values='Count').fillna(0)
df.reset_index(inplace=True) 

names = df.columns[1:].tolist()

source = ColumnDataSource(df)
colors = viridis(len(names))

y_max = df[names].values.sum(axis=1).max()

p = figure(x_range=(df["Year"].min() - 1, df["Year"].max() + 1), y_range=(0, df[names].values.sum(axis=1).max() * 1.1),
           x_axis_label='Year', y_axis_label='Publications',
           title='Publications over Time for Various NVM Technologies',
           tools="pan,wheel_zoom,box_zoom, save, reset, fullscreen, help",
           width=1000, height=600)

# hover tooltips
hover = HoverTool(tooltips=[("Year", "@Year"), ("Publications", "@$name")])
p.add_tools(hover)

p.vbar_stack(stackers=names, x='Year', width=0.8, color=colors, source=source, 
             legend_label=names, alpha=0.8)

# slider
slider = RangeSlider(
    start=df.Year.min(), end=df.Year.max(),
    value=(df.Year.min(), df.Year.max()),
    step=1, title="Year Range"
)
slider.js_on_change('value', CustomJS(args=dict(xr=p.x_range), code="""
    xr.start = cb_obj.value[0];
    xr.end   = cb_obj.value[1];
"""))
slider_row = row(Spacer(width=30), slider)

# legend style
p.legend.orientation = "vertical"
p.legend.location = "top_left"
p.legend.label_text_font_size = "10pt"
p.xaxis.axis_label_text_font_size = "16pt"
p.yaxis.axis_label_text_font_size = "16pt"
p.title.text_font_size = "18pt"
p.xaxis.major_label_text_font_size = "14pt"
p.yaxis.major_label_text_font_size = "14pt"
p.legend.click_policy = "hide"

layout = column(comment, slider_row, p, summary_div)
show(layout)

In [8]:

# grouped bar

md_text = """
## Grouped Bar Chart

- **Hover details**  
  Users can hover over each stacked bar group to see:
    - Technology name  
    - Number of publications  
    - Year of publication  

- **Toggle hover**  
  User may deselect the hover function via the tool bar. 

- **Dataset summary**  
  A summary of the dataset (e.g., total records, range of years, number of technologies) is displayed alongside the graph.  

- **Missing data**  
  Any rows with missing values are excluded from this visualization.
"""

html = markdown.markdown(md_text, extensions=['extra'])
comment = Div(
    text=html, 
    width=500, 
    styles={ 
      'font-size': '14px',   
    }
)

#————————————————data summary——————————————————
raw = pd.read_csv("../CSV_Files/NVM_data.csv") # CSV filepath

total_records = len(raw)
year_min, year_max = int(raw["Year"].min()), int(raw["Year"].max())

missing_year = raw[ raw["Year"].isnull() ]
missing_dois = missing_year["DOI"].dropna().unique().tolist()

if missing_dois:
    doi_list = "".join(
        f"<li><a href='https://doi.org/{doi}' target='_blank'>{doi}</a></li>"
        for doi in missing_dois
    )
    html_missing = f"""
    <h3>Excluded Records</h3>
    <p>The following DOIs were dropped because “Year” was missing:</p>
    <ul>
      {doi_list}
    </ul>
    <hr>
    """
else:
    html_missing = """
    <h3>Excluded Records</h3>
    <p><em>None – all rows had a Year.</em></p>
    <hr>
    """


html_summary = f"""
<h3>Dataset Summary</h3>
<ul>
  <li><strong>Total Records:</strong> {total_records}</li>
  <li><strong>Year Range:</strong> {year_min} – {year_max}</li>
</ul>
<hr>
"""
full_summary = html_summary + html_missing

summary_div = Div(text=full_summary, width=500, styles={'font-size': '14px'})

#————————————————————————————————————————————————————————


df = raw # CSV filepath
df = df.groupby(by=["Technology", 'Year']).size().reset_index(name='Count')
df["Year"] = df["Year"].astype(float).astype(int).astype(str)

df_pivot = df.pivot(index='Year', columns="Technology", values='Count').fillna(0)
df_pivot.reset_index(inplace=True)

years = df_pivot["Year"].tolist()
technologies = sorted(df_pivot.columns[1:])
factors = [(year, tech) for year in years for tech in technologies]
counts = [df_pivot.loc[df_pivot['Year'] == year, tech].values[0] for year in years for tech in technologies]

source = ColumnDataSource(data=dict(
    x=factors,
    counts=counts,
    year=[f[0] for f in factors],
    tech=[f[1] for f in factors],
))
colors = viridis(len(technologies))
color_map = {tech: colors[i] for i, tech in enumerate(technologies)}
source.data["color"] = [color_map[tech] for tech in source.data["tech"]]

# grouped by year
p = figure(x_range=FactorRange(*factors), y_range=(0, max(counts) * 1.2),
           title="Publications over Time for Various NVM Technologies",
           x_axis_label="Year", y_axis_label="Publications",
           tools="pan,wheel_zoom,box_zoom, save, reset, fullscreen, help",
           width=1000, height=600)


p.vbar(x='x', top='counts', width=0.8, source=source, color='color', legend_field="tech")

# hover tool
tooltips = [("Technology", "@tech"), ("Publications", "@counts"), ("Year", "@year")]
p.add_tools(HoverTool(tooltips=tooltips))


p.x_range.range_padding = 0.05
p.xaxis.group_label_orientation = "vertical"
p.xaxis.group_text_font_size = "14pt"
p.xaxis.group_text_font_style = "normal"
p.xaxis.group_text_color = "black"

# hiding labels and ticks on x axis 
p.xaxis.major_label_text_font_size = "0pt" 
p.xaxis.major_tick_line_color = None     
p.xaxis.minor_tick_line_color = None  
p.xgrid.grid_line_color = None

# legend styling
p.legend.orientation = "vertical"
p.legend.location = "top_left"
p.legend.click_policy = "hide"
p.xaxis.axis_label_text_font_size = "16pt"
p.yaxis.axis_label_text_font_size = "16pt"
p.title.text_font_size = "18pt"
p.yaxis.major_label_text_font_size = "14pt"

layout = column(comment, p, summary_div)

show(layout)






In [9]:
# line chart

md_text = """
## Line Chart

- **Hover details**  
  Users can hover over data points to see:

    - Technology name  
    - Number of publications  
    - Year published  

- **Toggle hover**  
  User may deselect the hover function via the tool bar. 

- **Toggle line visibility**  
  Click items in the color legend to hide or show a technology’s line graph; only the data points will remain visible.  

- **Dataset summary**  
  A summary of the dataset (e.g., total records, range of years, number of technologies) is displayed alongside the visual.  

- **Missing data**  
  Any rows with missing values are excluded from this visualization.
"""


html = markdown.markdown(md_text, extensions=['extra'])
comment = Div(
    text=html,
    width=500,
    styles={              
      'font-size': '14px',
    }
)

#————————————————data summary——————————————————
raw = pd.read_csv("../CSV_Files/NVM_data.csv") # CSV filepath

total_records = len(raw)
year_min, year_max = int(raw["Year"].min()), int(raw["Year"].max())

missing_year = raw[ raw["Year"].isnull() ]
missing_dois = missing_year["DOI"].dropna().unique().tolist()

if missing_dois:
    doi_list = "".join(
        f"<li><a href='https://doi.org/{doi}' target='_blank'>{doi}</a></li>"
        for doi in missing_dois
    )
    html_missing = f"""
    <h3>Excluded Records</h3>
    <p>The following DOIs were dropped because “Year” was missing:</p>
    <ul>
      {doi_list}
    </ul>
    <hr>
    """
else:
    html_missing = """
    <h3>Excluded Records</h3>
    <p><em>None – all rows had a Year.</em></p>
    <hr>
    """


html_summary = f"""
<h3>Dataset Summary</h3>
<ul>
  <li><strong>Total Records:</strong> {total_records}</li>
  <li><strong>Year Range:</strong> {year_min} – {year_max}</li>
</ul>
<hr>
"""
full_summary = html_summary + html_missing

summary_div = Div(text=full_summary, width=500, styles={'font-size': '14px'})

#————————————————————————————————————————————————————————


df = raw # CSV 
df = df.groupby(by=["Technology", 'Year']).size().reset_index(name='Count')

df_pivot = df.pivot(index='Year', columns="Technology", values='Count').fillna(0)
df_pivot.reset_index(inplace=True)
names = df_pivot.columns[1:].tolist()

p_line = figure(
    x_axis_label='Year', y_axis_label='Publications',
    title='Publications over Time for Various NVM Technologies (Line Chart)',
    tools="pan,wheel_zoom,box_zoom, save, reset, fullscreen, help",
    width=1000, height=600)

for name, color in zip(names, viridis(len(names))):
    source = ColumnDataSource(data={
        'Year': df_pivot['Year'],
        'Count': df_pivot[name]
    })
    
    p_line.line(x='Year', y='Count', source=source, legend_label=name, line_width=2, color=color)
    p_line.scatter(
        x='Year', y='Count', source=source,
        color=color, size=5,
    )
    
    hover = HoverTool(tooltips=[("Technology", name), ("Publications", "@Count"), ("Year", "@Year"),],
                      mode='vline',
                      renderers=[p_line.renderers[-2]])
    p_line.add_tools(hover)

p_line.legend.location = "top_left"
p_line.legend.label_text_font_size = "10pt"
p_line.xaxis.axis_label_text_font_size = "16pt"
p_line.yaxis.axis_label_text_font_size = "16pt"
p_line.title.text_font_size = "18pt"
p_line.xaxis.major_label_text_font_size = "14pt"
p_line.yaxis.major_label_text_font_size = "14pt"
p_line.legend.click_policy = "hide"

layout = column(comment, p_line, summary_div)
show(layout)


In [10]:
# Heat Map


md_text = """
## Heat Map

- **Legend**  
  Reference the color key to the right of the heat map.

- **Hover details**  
  Users can hover over rows in the heat map to see:

    - Number of publications  
    - Year published  

- **Toggle hover**  
  User may deselect the hover function via the tool bar.  

- **Dataset summary**  
  A summary of the dataset (e.g., total records, range of years, number of technologies) is displayed alongside the visual.  

- **Missing data**  
  Any rows with missing values are excluded from this visualization.
"""


html = markdown.markdown(md_text, extensions=['extra'])
comment = Div(
    text=html,
    width=500,
    styles={              
      'font-size': '14px',
    }
)

#————————————————data summary——————————————————
raw = pd.read_csv("../CSV_Files/NVM_data.csv") # CSV filepath

total_records = len(raw)
year_min, year_max = int(raw["Year"].min()), int(raw["Year"].max())

missing_year = raw[ raw["Year"].isnull() ]
missing_dois = missing_year["DOI"].dropna().unique().tolist()

if missing_dois:
    doi_list = "".join(
        f"<li><a href='https://doi.org/{doi}' target='_blank'>{doi}</a></li>"
        for doi in missing_dois
    )
    html_missing = f"""
    <h3>Excluded Records</h3>
    <p>The following DOIs were dropped because “Year” was missing:</p>
    <ul>
      {doi_list}
    </ul>
    <hr>
    """
else:
    html_missing = """
    <h3>Excluded Records</h3>
    <p><em>None – all rows had a Year.</em></p>
    <hr>
    """


html_summary = f"""
<h3>Dataset Summary</h3>
<ul>
  <li><strong>Total Records:</strong> {total_records}</li>
  <li><strong>Year Range:</strong> {year_min} – {year_max}</li>
</ul>
<hr>
"""
full_summary = html_summary + html_missing

summary_div = Div(text=full_summary, width=500, styles={'font-size': '14px'})

#————————————————————————————————————————————————————————

df = raw # CSV 
df = df.groupby(by=["Technology", 'Year']).size().reset_index(name='Count')
df['Year'] = df['Year'].astype(float).astype(int).astype(str)

source = ColumnDataSource(df)

mapper = LinearColorMapper(palette=Viridis256, low=df['Count'].min(), high=df['Count'].max())

p_heat = figure(
    title='Heatmap of Publications by Year and Technology',
    x_range=sorted(df['Year'].unique().tolist()),
    y_range=sorted(df["Technology"].unique()),
    x_axis_location="above",
    width=1000,
    height=600,
    tools="pan,wheel_zoom,box_zoom, save, reset, fullscreen, help",
    tooltips=[("Year", "@Year"), ("Publications", "@Count")],
)

p_heat.rect(
    x="Year", y="Technology", width=1, height=1, source=source,
    fill_color={'field': 'Count', 'transform': mapper},
    line_color=None
)

color_bar = ColorBar(
    color_mapper=mapper,
    location=(0, 0),
    ticker=BasicTicker(),
    formatter=PrintfTickFormatter(format="%d"),
    label_standoff=10,
    title="Number of Publications"
)
p_heat.add_layout(color_bar, 'right')

p_heat.axis.major_label_text_font_size = "14pt"
p_heat.title.text_font_size = "18pt"
p_heat.axis.axis_label_text_font_size = "16pt"

p_heat.axis.major_label_standoff = 10
p_heat.xaxis.axis_label = "Year"
p_heat.yaxis.axis_label = "Technology"
p_heat.xaxis.major_label_orientation = 1.0

p_heat.grid.grid_line_color = None
p_heat.outline_line_color = None

layout = column(comment, p_heat, summary_div)
show(layout)


