In [1]:
#%pip install openai pandas numpy scikit-learn
import os, json, glob, re, time, unicodedata
from typing import Tuple, List, Dict, Any
import pandas as pd

In [11]:
# --- Generate profile descriptions from each JSON via OpenAI ---
# Prereqs:
#   pip install openai pandas
#   set OPENAI_API_KEY in your environment (e.g., setx OPENAI_API_KEY "sk-...")



# ---- Input/Output paths ----
INPUT_FOLDER = r"C:\Users\koand\Downloads\new_scrapes\Json"
OUT_CSV      = r"C:\Users\koand\Downloads\new_scrapes\profiles_from_json.csv"

# ---- OpenAI client ----
from openai import OpenAI
client = OpenAI()  # uses OPENAI_API_KEY from env

MODEL = "gpt-4o-mini"#"gpt-5-nano-2025-08-07"#"gpt-4.1" #"gpt-4o-mini"  # lightweight & fast for extraction

# ---- Helpers to extract context & URLs from scraped JSON ----
OMIT_KEYS = {"url", "display_url", "href", "link", "img", "image", "thumbnail"}
URL_RE = re.compile(r"https?://\S+", re.IGNORECASE)
SORRY_RE = re.compile(r"\bsorry\b", re.IGNORECASE)

PAPER_URL_HINTS = (
    "doi.org", "arxiv.org", "springer.com", "wiley.com", "nature.com",
    "science.org", "sciencedirect.com", "ieeexplore.ieee.org", "acm.org",
    "acs.org", "rsc.org", "pubmed", "nih.gov", ".pdf"
)

def strip_controls(s: str) -> str:
    return "".join(ch for ch in s if unicodedata.category(ch) != "Cf")

def any_contains_sorry(obj) -> bool:
    if isinstance(obj, dict):
        for k, v in obj.items():
            if k in OMIT_KEYS:
                continue
            if any_contains_sorry(v):
                return True
    elif isinstance(obj, list):
        return any(any_contains_sorry(v) for v in obj)
    elif isinstance(obj, str):
        return bool(SORRY_RE.search(obj))
    return False

def collect(obj, texts: List[str], urls: List[str]):
    if isinstance(obj, dict):
        for k, v in obj.items():
            if k in OMIT_KEYS:
                # still collect URLs if the value itself is a URL string
                if isinstance(v, str) and URL_RE.match(v):
                    urls.append(v.strip())
                continue
            collect(v, texts, urls)
    elif isinstance(obj, list):
        for v in obj:
            collect(v, texts, urls)
    elif isinstance(obj, str):
        s = strip_controls(obj).strip()
        if not s:
            return
        if URL_RE.match(s):
            urls.append(s)
        else:
            texts.append(s)

def extract_context_and_urls(data: Any, max_chars: int = 30000) -> Tuple[str, List[str]]:
    # filter out list sections that contain "sorry"
    sections = data if isinstance(data, list) else [data]
    filtered = [sec for sec in sections if not any_contains_sorry(sec)]
    texts, urls = [], []
    for sec in filtered:
        collect(sec, texts, urls)
    # de-duplicate & trim
    urls = list(dict.fromkeys([u.strip(",.;)") for u in urls]))
    context = " ".join(texts)
    context = re.sub(r"\s+", " ", context).strip()
    if len(context) > max_chars:
        context = context[:max_chars]
    return context, urls

def pick_paperish_urls(urls: List[str], limit: int = 5) -> List[str]:
    # Prefer URLs that look like papers/DOIs/PDFs
    scored = []
    for u in urls:
        score = sum(h in u.lower() for h in PAPER_URL_HINTS)
        scored.append((score, u))
    scored.sort(key=lambda x: (-x[0], urls.index(x[1])))  # stable
    picked = [u for sc,u in scored if sc > 0][:limit]
    # fallback: if none matched hints, just take a few generic URLs
    if not picked:
        picked = urls[:min(limit, len(urls))]
    return picked

# ---- Prompt scaffolding ----
SYSTEM_PROMPT = (
    "You are a precise information-extraction assistant. "
    "Use ONLY the evidence given in 'context' and 'candidate_urls'. "
    "If an item is missing or uncertain, return 'Unknown' (or an empty list). "
    "Return STRICT JSON (no markdown, no extra commentary)."
)

USER_TEMPLATE = """Extract a lab/PI profile in this schema:

{{
  "name_first": "First",
  "name_last": "Last",
  "research_summary": "4-6 sentences summarizing the group's research and recent publications.",
  "PI_history": ["YYYY – Role, Organization","YYYY – Award, Organization" "..."],
  "student_history": "3-6 sentences on current size, alumni outcomes, etc.",
  "keywords": ["6-15","topic","words"],
  "lab_site": "URL of the lab website (from candidate_urls if present)",
  "paper_links": ["list", "of", "paper URLs from candidate_urls only"],
}}

Rules:
- Do NOT fabricate names, dates, jobs, counts, or links.
- Only use URLs from candidate_urls. If unsure, leave 'Unknown' or [].
- If a name is a single token, put it in name_last and set name_first to Unknown.
- Keep PI_history entries short: 'YYYY – Role, Org'. No more than 6 items.

context:
<<<
{context}
>>>

candidate_urls:
{urls}
"""

def call_openai_for_profile(context: str, urls: List[str]) -> Dict[str, Any]:
    user_prompt = USER_TEMPLATE.format(context=context, urls="\n".join(urls[:100]))
    resp = client.chat.completions.create(
        model=MODEL,
        temperature=0.2,
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": user_prompt},
        ],
        max_tokens=5000,
    )
    content = resp.choices[0].message.content.strip()
    # Be defensive: pull JSON object if any stray text appears
    try:
        # If it's wrapped in ```json ... ```, strip fences
        if content.startswith("```"):
            content = re.sub(r"^```(?:json)?\s*|\s*```$", "", content, flags=re.DOTALL).strip()
        data = json.loads(content)
    except Exception:
        # salvage between first { and last }
        m = re.search(r"\{.*\}", content, flags=re.DOTALL)
        if not m:
            raise ValueError(f"Model returned non-JSON:\n{content[:400]}")
        data = json.loads(m.group(0))
    return data

def format_employment(history: List[str]) -> str:
    if not history:
        return "Unknown"
    return "\n".join(f"- {item}" for item in history)

def format_keywords(words: List[str]) -> str:
    if not words:
        return "Unknown"
    return ", ".join(words)

# ---- Main loop: iterate files, call API, build DataFrame ----
rows = []
json_files = sorted(glob.glob(os.path.join(INPUT_FOLDER, "*.json")))
for i, fp in enumerate(json_files, 1):
    try:
        with open(fp, "r", encoding="utf-8") as f:
            data = json.load(f)
    except UnicodeDecodeError:
        with open(fp, "r", encoding="utf-8", errors="ignore") as f:
            data = json.load(f)

    context, urls = extract_context_and_urls(data)
    paperish = pick_paperish_urls(urls, limit=5)

    # prefer paper-ish candidates at the top of the list we show the model
    candidate_urls = paperish + [u for u in urls if u not in paperish]

    profile = call_openai_for_profile(context, candidate_urls)

    name_first = (profile.get("name_first") or "Unknown").strip()
    name_last  = (profile.get("name_last")  or "Unknown").strip()

    research_summary = (profile.get("research_summary") or "Unknown").strip()
    PI_history = profile.get("PI_history") or []
    student_history = (profile.get("student_history") or "Unknown").strip()
    keywords = profile.get("keywords") or []
    lab_site = (profile.get("lab_site") or "Unknown").strip()
    paper_links = profile.get("paper_links") or []

    rows.append({
        "source": fp,
        "Name": f"{name_first}, {name_last}",
        "Research Summary": research_summary + (("\nPaper links: " + "; ".join(paper_links)) if paper_links else ""),
        "PI History": format_employment(PI_history),
        "Student History": student_history,
        "Key Words": format_keywords(keywords),
        "Link to Lab Site": lab_site,
    })

    # gentle pacing to avoid rate limits on big batches
    time.sleep(0.1)

df_profiles = pd.DataFrame(rows).set_index("source")
df_profiles.to_csv(OUT_CSV, index=True)
df_profiles


Unnamed: 0_level_0,Name,Research Summary,PI History,Student History,Key Words,Link to Lab Site
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
C:\Users\koand\Downloads\new_scrapes\Json\Amassian‬.json,"Aram, Amassian",The Amassian Research Lab focuses on developin...,"- 2019 – Associate Professor, North Carolina S...",The Amassian Research Lab is actively seeking ...,"organic photovoltaics, hybrid materials, solar...",https://mse.ncsu.edu/amassian/
C:\Users\koand\Downloads\new_scrapes\Json\Augustyn‬.json,"Unknown, Augustyn",The Augustyn Group investigates materials at e...,"- Unknown – Professor, North Carolina State Un...",The group currently includes undergraduate and...,"pseudocapacitance, energy storage, transition ...",https://mse.ncsu.edu/augustyn/
C:\Users\koand\Downloads\new_scrapes\Json\Balke‬.json,"Nina, Balke",The Balke Research Lab explores nanoscale mate...,"- 2021 – Associate Professor, North Carolina S...",The lab currently hosts a diverse group of stu...,"nanoscale materials, electromechanical couplin...",https://mse.ncsu.edu/balke/
C:\Users\koand\Downloads\new_scrapes\Json\Brenner.json,"Unknown, Brenner",The Brenner Research Lab focuses on computatio...,"- Unknown – Principal Investigator, North Caro...",The Brenner Research Lab is currently home to ...,"computational materials science, atomistic mod...",https://mse.ncsu.edu/brenner/
C:\Users\koand\Downloads\new_scrapes\Json\Collazo‬.json,"Ramon, Collazo",The Wide Bandgaps Group focuses on the thermod...,"- Unknown – PI, North Carolina State University",The lab is currently active with a focus on tr...,"III-nitrides, AlN, AlGaN, semiconductors, epit...",https://mse.ncsu.edu/collazo/
C:\Users\koand\Downloads\new_scrapes\Json\Gupta.json,"Rajeev, Gupta",The Corrosion and Advanced Materials Laborator...,"- 2020 – Associate Professor, North Carolina S...",The lab currently hosts several doctoral stude...,"corrosion, materials science, surface engineer...",https://mse.ncsu.edu/rkgupta2/
C:\Users\koand\Downloads\new_scrapes\Json\Gwalani.json,"Bharat, Gwalani",The Gwalani Research Lab investigates the synt...,"- 2022 – Assistant Professor, NC State Univers...",The Gwalani Research Lab is currently looking ...,"metallic materials, extreme environments, micr...",https://mse.ncsu.edu/gwalani/
C:\Users\koand\Downloads\new_scrapes\Json\Jones.json,"Jacob, Jones",The Jones Research Lab focuses on understandin...,"- 2013 – Professor, North Carolina State Unive...",The Jones Research Group includes a diverse ar...,"materials science, piezoelectric materials, cr...",https://mse.ncsu.edu/jones/
C:\Users\koand\Downloads\new_scrapes\Json\KUnocic.json,"Kinga, Unocic",Kinga Unocic's research focuses on materials f...,"- 2024 – Associate Professor, North Carolina S...",The lab is expected to grow as Kinga Unocic tr...,"materials, high-temperature, oxidation, corros...",https://mse.ncsu.edu/kaunocic/
C:\Users\koand\Downloads\new_scrapes\Json\LaBean.json,"Unknown, LaBean",The Biopolymer Engineering and Nanotechnology ...,"- 2011 – Professor, NC State University\n- Unk...",The lab currently includes a mix of graduate s...,"biopolymers, DNA, nanotechnology, self-assembl...",https://mse.ncsu.edu/labean/


In [24]:
# --- Second cell: build (name, text) df from JSONs and append CSV info ---

import os, glob, json, re, unicodedata
from typing import Any, List, Tuple, Dict
import pandas as pd
from collections import defaultdict

# ---- Paths ----
INPUT_FOLDER = r"C:\Users\koand\Downloads\new_scrapes\Json"
PROFILES_CSV = r"C:\Users\koand\Downloads\new_scrapes\profiles_from_json.csv"

# ---- Load profiles CSV (produced in the first cell) ----
df_profiles = pd.read_csv(PROFILES_CSV)

# If your first cell saved 'source' as the index, the CSV will contain a 'source' column now.
# Expect columns: ['source','Name','Research Summary','Employment History','Student History','Key Words','Link to Lab Site']
required_cols = {"source","Name"}
missing = required_cols - set(df_profiles.columns)
if missing:
    raise ValueError(f"CSV missing required column(s): {missing}")

# Normalize and derive a basename->PI Name mapping using the 'source' path saved in CSV
def norm_basename(p: str) -> str:
    # robust: normalize separators, casefold
    base = os.path.basename(str(p))
    return base.casefold()

csv_name_by_base: Dict[str, str] = {}
for _, row in df_profiles.iterrows():
    base = norm_basename(row["source"])
    csv_name_by_base[base] = row["Name"]

# ---- URL & structure cleaning helpers (text extraction from JSON) ----
OMIT_KEYS = {"url", "display_url", "href", "link", "img", "image", "thumbnail"}
URL_RE = re.compile(r"https?://\S+", re.IGNORECASE)
SORRY_RE = re.compile(r"\bsorry\b", re.IGNORECASE)

def strip_controls(s: str) -> str:
    return "".join(ch for ch in s if unicodedata.category(ch) != "Cf")

def any_contains_sorry(obj: Any) -> bool:
    if isinstance(obj, dict):
        for k, v in obj.items():
            if k in OMIT_KEYS:
                continue
            if any_contains_sorry(v):
                return True
    elif isinstance(obj, list):
        return any(any_contains_sorry(v) for v in obj)
    elif isinstance(obj, str):
        return bool(SORRY_RE.search(obj))
    return False

def collect_text(obj: Any, out_texts: List[str]) -> None:
    """Collect plain text strings, skipping URL-like parts and structural-only content."""
    if isinstance(obj, dict):
        for k, v in obj.items():
            if k in OMIT_KEYS:
                # ignore structural link/image fields
                continue
            collect_text(v, out_texts)
    elif isinstance(obj, list):
        for v in obj:
            collect_text(v, out_texts)
    elif isinstance(obj, str):
        s = strip_controls(obj).strip()
        if not s:
            return
        # drop standalone URLs and strip inline URLs from text
        if URL_RE.fullmatch(s):
            return
        s = URL_RE.sub("", s)  # remove embedded URLs
        s = re.sub(r"\s+", " ", s).strip()
        if s and s.lower() != "view article authors":
            out_texts.append(s)

def extract_text_from_json(data: Any, max_chars: int = 200000) -> str:
    sections = data if isinstance(data, list) else [data]
    # drop sections that contain "sorry"
    filtered = [sec for sec in sections if not any_contains_sorry(sec)]
    texts: List[str] = []
    for sec in filtered:
        collect_text(sec, texts)
    # de-duplicate while preserving order
    seen = set()
    dedup = []
    for t in texts:
        if t not in seen:
            seen.add(t)
            dedup.append(t)
    joined = " ; ".join(dedup)
    if len(joined) > max_chars:
        joined = joined[:max_chars]
    return joined

# ---- Iterate JSONs: map each file to its PI via basename from the CSV ----
json_files = sorted(glob.glob(os.path.join(INPUT_FOLDER, "*.json")))
name_to_texts: Dict[str, List[str]] = defaultdict(list)
used_files = []
unmatched_json = []

for fp in json_files:
    base = norm_basename(fp)
    pi_name = csv_name_by_base.get(base)
    try:
        # robust open (utf-8 first, then ignore errors)
        try:
            with open(fp, "r", encoding="utf-8") as f:
                data = json.load(f)
        except UnicodeDecodeError:
            with open(fp, "r", encoding="utf-8", errors="ignore") as f:
                data = json.load(f)
    except Exception as e:
        unmatched_json.append((fp, f"ReadError: {e}"))
        continue

    text = extract_text_from_json(data)
    if not pi_name:
        unmatched_json.append((fp, "No matching PI in CSV (basename mismatch)"))
        continue

    name_to_texts[pi_name].append(text)
    used_files.append(fp)

# ---- Aggregate to one row per PI (name, text) ----
rows_json = []
for name, texts in name_to_texts.items():
    merged_text = " ; ".join([t for t in texts if t])
    rows_json.append({"name": name, "text": merged_text})

df_json = pd.DataFrame(rows_json)

# If some PIs had no JSON match, ensure they still appear (with empty text) so the join stays PI-complete
all_pi_names = df_profiles["Name"].dropna().unique().tolist()
missing_from_json = sorted(set(all_pi_names) - set(df_json["name"]))
if missing_from_json:
    df_json = pd.concat(
        [df_json, pd.DataFrame({"name": missing_from_json, "text": [""] * len(missing_from_json)})],
        ignore_index=True
    )

# ---- Append CSV info (except 'source') to each PI's text ----
# Build a compact string of the CSV columns (excluding 'source')
info_cols = [c for c in df_profiles.columns if c not in {"source"}]  # keep 'Name' for context
def row_to_suffix(r: pd.Series) -> str:
    parts = []
    for c in info_cols:
        if c == "Name":
            continue
        val = str(r[c]).strip()
        if val and val.lower() != "unknown":
            parts.append(f"{c}: {val}")
    return " ; ".join(parts)

suffix_by_name = {r["Name"]: row_to_suffix(r) for _, r in df_profiles.iterrows()}

def append_suffix(row):
    suffix = suffix_by_name.get(row["name"], "")
    if suffix:
        if row["text"]:
            return f'{row["text"]} ; {suffix}'
        else:
            return suffix
    return row["text"]

df_json["text"] = df_json.apply(append_suffix, axis=1)

# ---- Final checks ----
n_files = len(json_files)
n_used = len(used_files)
n_unmatched = len(unmatched_json)
n_pis = len(all_pi_names)
n_rows_final = len(df_json)

print(f"JSON files found: {n_files}")
print(f"JSON files used : {n_used}")
print(f"Unmatched JSONs : {n_unmatched}")
if n_unmatched:
    for fp, reason in unmatched_json[:10]:
        print(f"  - {fp} -> {reason}")
    if len(unmatched_json) > 10:
        print(f"  ... and {len(unmatched_json)-10} more")

assert n_used == n_files, f"Not all JSON files were used ({n_used}/{n_files}). See 'unmatched_json'."
assert n_rows_final == n_pis, f"Row count {n_rows_final} != PI count {n_pis}."

# ---- Result: a single entry per PI ----
df_name_text = df_json[["name", "text"]].copy()
df_name_text.reset_index(drop=True, inplace=True)

# Peek
df_name_text.head(3)

# OUT_TABLE = r"C:\Users\koand\Downloads\new_scrapes\name_text_table.csv"
# try:
#     df_name_text.to_csv(OUT_TABLE, index=False)
#     print(f"Saved Name–Text table to: {OUT_TABLE}")
# except Exception as e:
#     print(f"Could not save to {OUT_TABLE}: {e}")


JSON files found: 19
JSON files used : 19
Unmatched JSONs : 0


Unnamed: 0,name,text
0,"Aram, Amassian",Aram Amassian - Google Scholar Colloidal-quant...
1,"Unknown, Augustyn",Veronica Augustyn - Google Scholar Pseudocapac...
2,"Nina, Balke",Nina Balke - Google Scholar Electric-field con...


In [25]:
# --- Third cell: vectorize PI texts and define similarity ranking helper ---

import os
import numpy as np
import pandas as pd

# Paths
PROFILES_CSV = r"C:\Users\koand\Downloads\new_scrapes\profiles_from_json.csv"

# Load profiles (ground truth for the output columns)
profiles = pd.read_csv(PROFILES_CSV).drop_duplicates(subset=["Name"]).copy()

# Make a lookup from Name -> the fields we need in the final table
# We'll be lenient: if "PI History" isn't present, fall back to "Employment History".
def _get_col(df, primary, fallback=None, default=""):
    if primary in df.columns:
        return df[primary].fillna(default)
    if fallback and fallback in df.columns:
        return df[fallback].fillna(default)
    return pd.Series([default] * len(df), index=df.index)

profiles["_Research summary"] = _get_col(profiles, "Research Summary")
profiles["_PI history"]       = _get_col(profiles, "PI History", fallback="Employment History")
profiles["_Student history"]  = _get_col(profiles, "Student History")
profiles["_Key words"]        = _get_col(profiles, "Key Words")
profiles["_Link to lab site"] = _get_col(profiles, "Link to Lab Site")

profile_meta = profiles.set_index("Name")[[
    "_Research summary",
    "_PI history",
    "_Student history",
    "_Key words",
    "_Link to lab site"
]]

# --- Vectorization (TF-IDF) ---
try:
    from sklearn.feature_extraction.text import TfidfVectorizer
except ImportError as e:
    raise ImportError("scikit-learn is required for vectorization. Install with: pip install scikit-learn") from e

# Expect df_name_text from previous cell with columns: ['name', 'text']
if "df_name_text" not in globals():
    raise RuntimeError("df_name_text not found. Please run the previous cell first.")

# Ensure one row per PI and fill NaNs
df_name_text = (
    df_name_text.copy()
    .drop_duplicates(subset=["name"])
    .assign(text=lambda d: d["text"].fillna(""))
)

# Vectorizer settings (similar to your earlier setup)
VEC_OPTS = dict(
    stop_words="english",
    lowercase=True,
    ngram_range=(1, 2),
    max_df=0.9,
    min_df=2
)

vectorizer = TfidfVectorizer(**VEC_OPTS)
X = vectorizer.fit_transform(df_name_text["text"].tolist())   # shape: (n_pis, n_terms)
pi_names = df_name_text["name"].tolist()

# Build a quick mapping for row index -> PI name and name -> profile meta
idx_to_name = np.array(pi_names)
name_in_meta = profile_meta.index.to_list()
missing_meta = sorted(set(pi_names) - set(name_in_meta))
if missing_meta:
    # Keep going, but warn in output so you know which will have empty metadata rows
    print("Warning: The following PIs are in df_name_text but missing from profiles CSV metadata:")
    for nm in missing_meta:
        print("  -", nm)

# --- Similarity helper ---
def compare_student_entry(student_text: str, top_k: int = 10) -> pd.DataFrame:
    """
    Vectorize the student's text and return a ranked similarity table:
    Columns: Name, Similarity score, Research summary, PI history, Student history, Key words, Link to lab site
    """
    if not isinstance(student_text, str) or not student_text.strip():
        raise ValueError("Provide a non-empty student_text string.")

    q = vectorizer.transform([student_text])      # shape: (1, n_terms)
    # cosine similarity for TF-IDF vectors = dot product (since rows are L2-normalized)
    sims = (q @ X.T).toarray().ravel()            # shape: (n_pis,)

    order = np.argsort(-sims)[:top_k]
    names_ordered = idx_to_name[order]
    scores_ordered = sims[order]

    rows = []
    for nm, sc in zip(names_ordered, scores_ordered):
        # Pull metadata directly from the CSV-derived mapping (empty strings if missing)
        meta = profile_meta.loc[nm] if nm in profile_meta.index else pd.Series(
            ["", "", "", "", ""],
            index=["_Research summary","_PI history","_Student history","_Key words","_Link to lab site"]
        )
        rows.append({
            "Name": nm,
            "Similarity score": float(sc),
            "Research summary": meta["_Research summary"],
            "PI history": meta["_PI history"],
            "Student history": meta["_Student history"],
            "Key words": meta["_Key words"],
            "Link to lab site": meta["_Link to lab site"],
        })

    return pd.DataFrame(rows)

# --- Example usage (uncomment to test) ---
# student_text = """
# I'm interested in data-driven materials discovery, polymer nanocomposites,
# molecular dynamics of soft matter, and energy storage interfaces. I enjoy
# machine learning for materials, high-throughput simulation, and structure–property modeling.
# """
# display(compare_student_entry(student_text, top_k=15))


In [26]:
#--- Example usage (uncomment to test) ---
student_text = """
I'm interested in  biopolymers,
molecular dynamics of soft matter. I enjoy
machine learning for materials, high-throughput simulation, and structure–property modeling.
"""
display(compare_student_entry(student_text, top_k=10))


Unnamed: 0,Name,Similarity score,Research summary,PI history,Student history,Key words,Link to lab site
0,"Yara, Yingling",0.068361,The Yingling Research Group focuses on the int...,"- Unknown – PI, North Carolina State University",The Yingling Research Group includes graduate ...,"molecular modeling, nanocomposites, biomimetic...",https://mse.ncsu.edu/yingling/
1,"Unknown, Brenner",0.059009,The Brenner Research Lab focuses on computatio...,"- Unknown – Principal Investigator, North Caro...",The Brenner Research Lab is currently home to ...,"computational materials science, atomistic mod...",https://mse.ncsu.edu/brenner/
2,"Martin, Thuo",0.04521,The Soft Materials Matter Transport Research L...,"- 2022 – Professor, North Carolina State Unive...",The lab currently hosts a diverse group of gra...,"soft matter, metastable materials, surface the...",https://mse.ncsu.edu/thuo/
3,"Unknown, LaBean",0.040012,The Biopolymer Engineering and Nanotechnology ...,"- 2011 – Professor, NC State University\n- Unk...",The lab currently includes a mix of graduate s...,"biopolymers, DNA, nanotechnology, self-assembl...",https://mse.ncsu.edu/labean/
4,"Joseph, Tracy",0.033388,"The Nanomagnetism Lab, led by Joseph Tracy, fo...","- 2007 – Professor, North Carolina State Unive...",The Nanomagnetism Lab currently includes gradu...,"nanomagnetism, nanoparticles, synthesis, energ...",https://mse.ncsu.edu/nanomagnetism/
5,"Unknown, Seifrid",0.032938,The Data-Driven Organic Materials Lab focuses ...,"- 2023 – Assistant Professor, North Carolina S...",The lab is currently active and focuses on int...,"organic materials, self-driving labs, machine ...",https://ddomlab.org/
6,"Raymond, Unocic",0.029197,The Raymond Unocic Research Lab advances elect...,"- 2024 – Associate Professor, North Carolina S...",The lab is committed to shaping the next gener...,"electron microscopy, nanoscience, materials sc...",https://mse.ncsu.edu/rrunocic/
7,"Aram, Amassian",0.02658,The Amassian Research Lab focuses on developin...,"- 2019 – Associate Professor, North Carolina S...",The Amassian Research Lab is actively seeking ...,"organic photovoltaics, hybrid materials, solar...",https://mse.ncsu.edu/amassian/
8,"Rajeev, Gupta",0.02153,The Corrosion and Advanced Materials Laborator...,"- 2020 – Associate Professor, North Carolina S...",The lab currently hosts several doctoral stude...,"corrosion, materials science, surface engineer...",https://mse.ncsu.edu/rkgupta2/
9,"Nina, Balke",0.015655,The Balke Research Lab explores nanoscale mate...,"- 2021 – Associate Professor, North Carolina S...",The lab currently hosts a diverse group of stu...,"nanoscale materials, electromechanical couplin...",https://mse.ncsu.edu/balke/


OLD