In [None]:
!pip install spacy
!python3 -m spacy download en_core_web_sm

In [None]:
RAW_TRIGGERS = {
    "Remember": ["Cite","Define","Describe","Draw","Enumerate","Identify","Index","Indicate","Label","List","Match","Meet","Name",
                 "Outline","Point","Quote","Read","Recall","Recite","Recognize","Record","Repeat","Reproduce","Review","Select",
                 "State","Study","Tabulate","Trace","Write"],
    "Understand": ["Add","Approximate","Articulate","Associate","Characterize","Clarify","Classify","Compare","Compute","Contrast",
                   "Convert","Defend","Describe","Detail","Differentiate","Discuss","Distinguish","Elaborate","Estimate","Example",
                   "Explain","Express","Extend","Extrapolate","Factor","Generalize","Give","Infer","Interact","Interpolate", "Interpret",
                   "Observe", "Paraphrase", "Picture graphically", "Predict", "Review", "Rewrite", "Subtract", "Summarize",
                   "Translate", "Visualize"],
    "Apply": ["Acquire","Adapt","Allocate","Alphabetize","Apply","Ascertain","Assign","Attain","Avoid","Back up","Calculate",
              "Capture","Change","Classify","Complete","Compute","Construct","Customize","Demonstrate","Depreciate","Derive",
              "Determine","Diminish","Discover","Draw","Employ","Examine","Exercise","Explore","Expose","Express", "Factor", "Figure",
             "Graph", "Handle", "Illustrate", "Interconvert", "Investigate","Manipulate","Modify", "Operate", "Personalize",
              "Plot","Practice", "Predict", "Prepare", "Price", "Process", "Produce", "Project", "Provide", "Relate", "Round off",
             "Sequence", "Show", "Simulate", "Sketch", "Solve", "Subscribe", "Tabulate", "Transcribe", "Translate", "Use"],
    "Analyze": ["Analyze","Audit","Blueprint","Breadboard","Break down","Characterize","Classify","Compare","Confirm","Contrast",
                "Correlate","Detect","Diagnose","Diagram","Differentiate","Discriminate","Dissect","Distinguish","Document",
                "Ensure","Examine","Explain","Explore","Figure out","File","Group","Identify","Illustrate","Infer","Interrupt",
                "Inventory", "Investigate", "Layout", "Manage", "Maximize", "Minimize", "Optimize", "Order", "Outline", "Point out",
                "Prioritize", "Proofread", "Query", "Relate", "Select", "Separate", "Subdivide", "Train", "Transform"],
    "Evaluate": ["Appraise","Assess","Compare","Conclude","Contrast","Counsel","Criticize","Critique","Defend","Determine",
                 "Discriminate","Estimate","Evaluate","Explain","Grade","Hire","Interpret","Judge","Justify","Measure","Predict",
                 "Prescribe","Rank","Rate","Recommend","Release","Select","Summarize","Support","Test","Validate","Verify"],
    "Create": ["Abstract","Animate","Arrange","Assemble","Budget","Categorize","Code","Combine","Compile","Compose","Construct",
               "Cope","Correspond","Create","Cultivate","Debug","Depict","Design","Develop","Devise","Dictate","Enhance",
               "Explain","Facilitate","Format","Formulate","Generalize","Generate","Handle","Import","Improve","Incorporate",
               "Integrate","Interface","Join","Lecture","Model","Modify","Network","Organize","Outline","Overhaul","Plan",
               "Portray","Prepare","Prescribe","Produce","Program","Rearrange","Reconstruct","Relate","Reorganize","Revise",
               "Rewrite","Specify","Summarize"]
}

In [None]:
# bloom_classifier_datadriven_v2.py
from __future__ import annotations
from typing import Dict, List, Tuple, Optional
from collections import defaultdict
import re
import math


# ---------------------- Config (generic, not per-verb or priority-based) ----------------------
# When exact evidence is weak, we blend with semantic similarity instead of discarding exact.
BLEND_THRESHOLD = 0.30           # if max exact < this → blend exact + semantic
BLEND_ALPHA = 0.45               # final = alpha*Exact(norm) + (1-alpha)*Semantic(norm)

# Ambiguity penalty: 1 / (k ** P) where k=#levels containing that lemma/phrase
AMBIGUITY_POWER = 1.75            # was 2.0; relax so ambiguous verbs still contribute some signal

# Generic attenuation for non-root matches (no per-verb rules)
SINGLE_NONROOT_MULT = 0.30       # single-word trigger not at sentence root is weak
PHRASE_NONROOT_MULT = 0.70       # multi-word phrase not containing root is moderately weak

# ---------------------- NLP setup: prefer Sentence-Transformers, else spaCy vectors ----------------------
EMBEDDING_BACKEND = None
_embed_model = None
_spacy_nlp = None

def _try_sentence_transformers():
    global EMBEDDING_BACKEND, _embed_model
    try:
        from sentence_transformers import SentenceTransformer
        _embed_model = SentenceTransformer("all-MiniLM-L6-v2")
        EMBEDDING_BACKEND = "st"
        return True
    except Exception:
        return False

def _try_spacy_vectors():
    global EMBEDDING_BACKEND, _spacy_nlp
    try:
        import spacy
        for name in ("en_core_web_md", "en_core_web_lg", "en_core_web_trf", "en_core_web_sm"):
            try:
                nlp = spacy.load(name)
                _spacy_nlp = nlp
                if nlp.vocab.vectors:  # real vectors
                    EMBEDDING_BACKEND = "spacy"
                    return True
            except Exception:
                continue
        return False
    except Exception:
        return False

def _ensure_spacy_for_lemmas():
    global _spacy_nlp
    if _spacy_nlp is not None:
        return True
    try:
        import spacy
        _spacy_nlp = spacy.load("en_core_web_sm")
        return True
    except Exception:
        try:
            import spacy
            _spacy_nlp = spacy.blank("en")
            return True
        except Exception:
            return False

def setup_nlp():
    if _try_sentence_transformers():
        _ensure_spacy_for_lemmas(); return
    if _try_spacy_vectors():
        return
    _ensure_spacy_for_lemmas()

# ---------------------- Lemmatization utils ----------------------
WS = re.compile(r"\s+")
PUNCT = re.compile(r"[^\w\s'-]")

def _norm(s: str) -> str:
    return WS.sub(" ", PUNCT.sub(" ", s.strip())).strip()

def _lemmas(text: str) -> List[str]:
    if _spacy_nlp is None:
        return _norm(text).lower().split()
    doc = _spacy_nlp(text)
    return [t.lemma_.lower() if getattr(t, "lemma_", None) else t.text.lower()
            for t in doc if t.text.strip()]

def _lemma_tuple(phrase: str) -> Tuple[str, ...]:
    return tuple(_lemmas(phrase))

# ---------------------- Build indices (data-only) ----------------------
def build_trigger_index(raw: Dict[str, List[str]]):
    level_to_phrases = {}
    level_to_lemmas = {}
    singles, multis = {}, {}
    for level, phrases in raw.items():
        uniq = []
        seen = set()
        for p in phrases:
            if not p or not p.strip():
                continue
            pn = _norm(p)
            if pn not in seen:
                seen.add(pn); uniq.append(pn)
        level_to_phrases[level] = uniq
        ltuples = [_lemma_tuple(p) for p in uniq]
        level_to_lemmas[level] = ltuples
        singles[level] = set(w[0] for w in ltuples if len(w) == 1)
        multis[level]  = set(w for w in ltuples if len(w) > 1)
    return level_to_phrases, level_to_lemmas, singles, multis

def build_conflict_index(level_to_lemmas: Dict[str, List[Tuple[str,...]]]):
    lemma_levels = defaultdict(set)
    phrase_levels = defaultdict(set)
    for lvl, tuples in level_to_lemmas.items():
        for t in tuples:
            if len(t) == 1:
                lemma_levels[t[0]].add(lvl)
            elif len(t) > 1:
                phrase_levels[t].add(lvl)
    lemma_counts  = {lem: len(lvls) for lem, lvls in lemma_levels.items()}
    phrase_counts = {phr: len(lvls) for phr, lvls in phrase_levels.items()}
    return lemma_counts, phrase_counts

# ---------------------- Exact matching (conflict-aware + root sensitivity) ----------------------
def _find_doc_root(doc):
    try:
        return next((t for t in doc if t.head == t), None)
    except Exception:
        return None

def exact_match_scores(question: str,
                       singles: Dict[str,set],
                       multis: Dict[str,set],
                       lemma_counts: Dict[str,int],
                       phrase_counts: Dict[Tuple[str,...],int],
                       spacy_nlp) -> Tuple[Dict[str,float], int]:
    """
    Returns:
        scores: dict[level] -> float
        hit_count: number of distinct trigger hits (singles+phrases) found anywhere
    """
    doc = spacy_nlp(question)
    toks = [t.lemma_.lower() if getattr(t, "lemma_", None) else t.text.lower() for t in doc]
    uni = set(toks)
    root = _find_doc_root(doc)
    root_lem = root.lemma_.lower() if (root is not None and hasattr(root, "lemma_")) else None

    def weight_single(lem: str) -> float:
        k = max(1, lemma_counts.get(lem, 1))
        base = 1.0 / float(k ** AMBIGUITY_POWER)     # softer than k^2 but penalizes ambiguity
        mult = 1.0 if (root_lem is not None and lem == root_lem) else SINGLE_NONROOT_MULT
        return base * mult

    def weight_phrase(tup: Tuple[str,...]) -> float:
        k = max(1, phrase_counts.get(tup, 1))
        base = 2.0 / float(k ** AMBIGUITY_POWER)
        contains_root = (root_lem is not None and root_lem in tup)
        mult = 1.0 if contains_root else PHRASE_NONROOT_MULT
        return base * mult

    def ngrams(seq: List[str], n: int):
        return {tuple(seq[i:i+n]) for i in range(0, max(0, len(seq)-n+1))}

    max_len = max((len(p) for s in multis.values() for p in s), default=1)
    ngram_bags = {n: ngrams(toks, n) for n in range(2, max_len+1)}

    scores = defaultdict(float)
    hit_count = 0

    # singles
    for level, vocab in singles.items():
        hits = uni & vocab
        for lem in hits:
            scores[level] += weight_single(lem)
            hit_count += 1

    # phrases
    for level, phrases in multis.items():
        for phr in phrases:
            n = len(phr)
            if phr in ngram_bags.get(n, set()):
                scores[level] += weight_phrase(phr)
                hit_count += 1

    return dict(scores), hit_count

# ---------------------- Semantic similarity (data-only) ----------------------
_embed_cache_ready = False
_flat_triggers: List[Tuple[str, str]] = []  # [(level, phrase)]
_flat_trigger_vecs = None

def _prepare_trigger_embeddings(level_to_phrases: Dict[str,List[str]]):
    global _embed_cache_ready, _flat_triggers, _flat_trigger_vecs
    if _embed_cache_ready:
        return
    _flat_triggers = [(lvl, p) for lvl, lst in level_to_phrases.items() for p in lst]
    P = _embed_texts([p for _, p in _flat_triggers])
    _flat_trigger_vecs = P
    _embed_cache_ready = True

def _embed_texts(texts: List[str]):
    global EMBEDDING_BACKEND, _embed_model, _spacy_nlp
    if EMBEDDING_BACKEND == "st":
        return _embed_model.encode(texts, normalize_embeddings=True)
    if EMBEDDING_BACKEND == "spacy" and _spacy_nlp is not None and _spacy_nlp.vocab.vectors:
        import numpy as np
        vecs = []
        for t in texts:
            doc = _spacy_nlp(t)
            v = doc.vector
            n = float(np.linalg.norm(v))
            vecs.append(v / n if n > 0 else v)
        return vecs
    return None

def _cos(a, b):
    try:
        import numpy as np
        return float(np.dot(a, b))
    except Exception:
        return float(sum(x*y for x, y in zip(a, b)))

def semantic_scores(question: str,
                    level_to_phrases: Dict[str,List[str]]) -> Optional[Dict[str, float]]:
    _prepare_trigger_embeddings(level_to_phrases)
    if _flat_trigger_vecs is None:
        return None
    Q = _embed_texts([question])
    if Q is None:
        return None
    qv = Q[0]
    per_level = defaultdict(float)  # take MAX similarity per level
    for (lvl, _p), vec in zip(_flat_triggers, _flat_trigger_vecs):
        sim = _cos(qv, vec)
        if sim > per_level[lvl]:
            per_level[lvl] = sim
    return dict(per_level)

# ---------------------- Last-resort: lemma Jaccard (data-only) ----------------------
def lemma_overlap_scores(question: str, level_to_lemmas: Dict[str, List[Tuple[str,...]]]) -> Dict[str, float]:
    qset = set(_lemma_tuple(question))
    scores = {}
    for lvl, ltuples in level_to_lemmas.items():
        best = 0.0
        for tup in ltuples:
            pset = set(tup)
            if not pset:
                continue
            inter = len(qset & pset)
            uni = len(qset | pset)
            best = max(best, (inter / uni) if uni else 0.0)
        scores[lvl] = best
    return scores

# ---------------------- Score utilities ----------------------
def _normalize(scores: Dict[str, float]) -> Dict[str, float]:
    if not scores:
        return scores
    mx = max(scores.values())
    if mx <= 0:
        # avoid all-zero; return as-is
        return scores
    return {k: (v / mx) for k, v in scores.items()}

def _rank(scores: Dict[str, float]):
    return sorted(scores.items(), key=lambda kv: kv[1], reverse=True)

# ---------------------- Public Classifier ----------------------
class BloomClassifier:
    def __init__(self):
        setup_nlp()
        (self.level_to_phrases,
         self.level_to_lemmas,
         self.singles,
         self.multis) = build_trigger_index(RAW_TRIGGERS)
        self.lemma_counts, self.phrase_counts = build_conflict_index(self.level_to_lemmas)
        _prepare_trigger_embeddings(self.level_to_phrases)

    def classify(self, question: str, top_k: int = 3):
        # 1) exact (conflict-aware + root sensitivity)
        exact, hit_count = exact_match_scores(
            question,
            self.singles, self.multis,
            self.lemma_counts, self.phrase_counts,
            _spacy_nlp
        )

        method = None
        scores = {}

        if exact:
            # strong enough? keep exact as-is
            if max(exact.values()) >= BLEND_THRESHOLD or hit_count >= 2:
                scores = exact
                method = "exact"
            else:
                # weak exact → try blending with semantic if available
                sem = semantic_scores(question, self.level_to_phrases)
                if sem:
                    exact_n = _normalize(exact)
                    sem_n = _normalize(sem)
                    # blend
                    scores = {lvl: BLEND_ALPHA * exact_n.get(lvl, 0.0) + (1.0 - BLEND_ALPHA) * sem_n.get(lvl, 0.0)
                              for lvl in set(exact_n) | set(sem_n)}
                    method = "blend"
                else:
                    # no embeddings → fall back to lemma overlap, but still incorporate exact (normalized)
                    jacc = lemma_overlap_scores(question, self.level_to_lemmas)
                    exact_n = _normalize(exact)
                    jacc_n = _normalize(jacc)
                    scores = {lvl: BLEND_ALPHA * exact_n.get(lvl, 0.0) + (1.0 - BLEND_ALPHA) * jacc_n.get(lvl, 0.0)
                              for lvl in set(exact_n) | set(jacc_n)}
                    method = "blend_lemma"
        else:
            # 2) no exact → semantic; else lemma-overlap
            sem = semantic_scores(question, self.level_to_phrases)
            if sem:
                scores = sem
                method = "semantic"
            else:
                scores = lemma_overlap_scores(question, self.level_to_lemmas)
                method = "lemma_overlap"

        ranked = _rank(scores)
        decided_level, decided_score = ranked[0]
        ties = [lvl for lvl, sc in ranked if math.isclose(sc, decided_score, rel_tol=1e-6, abs_tol=1e-9)]

        return {
            "method": method,
            "decided": decided_level if len(ties) == 1 else None,  # None if tie at top
            "top_levels": ranked[:top_k],
            "ties_at_top": ties if len(ties) > 1 else [],
            "all_scores": scores
        }

In [None]:
# ---------------------- Demo ----------------------
if __name__ == "__main__":
    clf = BloomClassifier()
    tests = [
        "List the steps of the Krebs cycle.",
        "How would you explain the trade-offs of this design?",
        "Calculate the standard deviation of this dataset.",
        "Compare A vs B and justify your choice.",
        "Break down the factors that influence demand.",
        "Design an experiment to test plant growth.",
        "What are the ethical implications of using facial recognition in schools?"
    ]
    for q in tests:
        r = clf.classify(q)
        print(f"\nQ: {q}\n -> method={r['method']} | decided={r['decided']} | top={[(k, round(v, 3)) for k, v in r['top_levels']]}")

In [None]:
# bloom_lemmatizer_match.py
from collections import defaultdict
import re
import spacy

l_n = {(int(index)+1):key for index,key in enumerate(RAW_TRIGGERS.keys())}
# ---------------- Setup spaCy for lemmatization ----------------
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    import spacy.cli
    spacy.cli.download("en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")

def normalize(text: str) -> str:
    """Strip punctuation and lowercase text."""
    return re.sub(r"[^\w\s'-]", "", text).strip().lower()

def lemmatize(text: str) -> list[str]:
    """Return lemmatized tokens using spaCy."""
    doc = nlp(text)
    return [t.lemma_.lower() for t in doc if t.text.strip()]

# ---------------- Build lemma lookup ----------------
LEVEL_TO_LEMMAS = {}
for level, verbs in RAW_TRIGGERS.items():
    lemmas = set()
    for v in verbs:
        for l in lemmatize(normalize(v)):
            lemmas.add(l)
    LEVEL_TO_LEMMAS[level] = lemmas

# ---------------- Classification ----------------
def classify_question(question: str):
    q_lemmas = set(lemmatize(question))
    scores = defaultdict(int)

    for level, lemmas in LEVEL_TO_LEMMAS.items():
        overlap = q_lemmas & lemmas
        scores[level] = len(overlap)

    # sort by matches
    ranked = sorted(scores.items(), key=lambda kv: kv[1], reverse=True)
    best_level, best_score = ranked[0]

    # If no match at all, return None
    decided = best_level if best_score > 0 else None
    return {
        "question": question,
        "decided": decided,
        "scores": ranked
    }
def check_if_assigned_correct_level(question,level):
    # Ensure the question is a string, and handle missing values
    if not isinstance(question, str):
        question = str(question) if question is not None else ""
        
    result = classify_question(question)
    possibilities = [item[0].lower().strip() for item in result['scores'] if item[1] != 0]
    
    return l_n[level].lower().strip() in possibilities

In [None]:
tests = [
    "List the steps of the Krebs cycle.",
    "How would you explain the trade-offs of this design?",
    "Calculate the standard deviation of this dataset.",
    "Compare A vs B and justify your choice.",
    "Break down the factors that influence demand.",
    "Design an experiment to test plant growth.",
    "What are the ethical implications of using facial recognition in schools?"
]

for q in tests:
    # r = classify_question(q)
    # print(f"\nQ: {q}\n -> decided={r['decided']} | scores={r['scores']}")
    print(check_if_assigned_correct_level(q,1))


In [None]:
!pip install xlsxwriter

In [None]:
from copy import copy
from openpyxl.utils import get_column_letter

def questions_level_eval(og_excel, mod_excel):
    wb = load_workbook(og_excel)
    ps_sheets = [sheet for sheet in wb.sheetnames if sheet.startswith('PS')]
    modified_dfs = {}

    for sheet_name in ps_sheets:
        df = pd.read_excel(og_excel, sheet_name=sheet_name)
        results = []

        for _, row in df.iterrows():
            question = row['Questions']
            level = row['Index'] % 6 or 6
            result = check_if_assigned_correct_level(question, level)
            results.append(result)

        # Insert at index 4 (fifth column)
        df.insert(4, 'Assigned Level Check', results)
        modified_dfs[sheet_name] = df

    with pd.ExcelWriter(mod_excel, engine='openpyxl') as writer:
        for sheet_name, modified_df in modified_dfs.items():
            modified_df.to_excel(writer, sheet_name=sheet_name, index=False)
            ws = writer.sheets[sheet_name]
            original_ws = wb[sheet_name]

            # shift merged ranges if they are at/after column 5 (E)
            shift_index = 5
            for merged_range in original_ws.merged_cells.ranges:
                start_col, start_row, end_col, end_row = merged_range.bounds

                if start_col >= shift_index:
                    start_col += 1
                    end_col += 1

                new_range = f"{get_column_letter(start_col)}{start_row}:{get_column_letter(end_col)}{end_row}"
                ws.merge_cells(new_range)

        # === Copy Summary sheet "as is" ===
        if "Summary" in wb.sheetnames:
            orig_summary = wb["Summary"]
            new_summary = writer.book.create_sheet("Summary")

            for row in orig_summary.iter_rows():
                for cell in row:
                    new_cell = new_summary.cell(row=cell.row, column=cell.col_idx, value=cell.value)
                    if cell.has_style:
                        new_cell.font = copy(cell.font)
                        new_cell.border = copy(cell.border)
                        new_cell.fill = copy(cell.fill)
                        new_cell.number_format = copy(cell.number_format)
                        new_cell.protection = copy(cell.protection)
                        new_cell.alignment = copy(cell.alignment)

            # copy column widths
            for col_letter, dim in orig_summary.column_dimensions.items():
                new_summary.column_dimensions[col_letter].width = dim.width

            # copy row heights
            for row_idx, dim in orig_summary.row_dimensions.items():
                new_summary.row_dimensions[row_idx].height = dim.height

            # copy merged cells
            for merged_range in orig_summary.merged_cells.ranges:
                new_summary.merge_cells(str(merged_range))

    print(f"File saved successfully with merged cells as {mod_excel}")

In [None]:
import pandas as pd
from copy import copy
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter, column_index_from_string

def grade_level_eval(og_excel, mod_excel, threshold=15):
    wb = load_workbook(og_excel)
    ps_sheets = [s for s in wb.sheetnames if s.startswith('PS')]

    # prepare modified dataframes for PS2–PS5
    modified_dfs = {}
    for sheet_name in ["PS2", "PS3", "PS4", "PS5"]:
        if sheet_name in wb.sheetnames:
            df = pd.read_excel(og_excel, sheet_name=sheet_name)
            # prefer the named column, otherwise use 6th column
            if 'Grade level' in df.columns:
                grades = pd.to_numeric(df['Grade level'], errors='coerce')
            else:
                grades = pd.to_numeric(df.iloc[:, 5], errors='coerce')
            results = (grades >= threshold).fillna(False)
            df.insert(6, f'Grade Level Check', results.tolist())
            modified_dfs[sheet_name] = df

    with pd.ExcelWriter(mod_excel, engine='openpyxl') as writer:
        # write sheets in the SAME order as the original workbook
        for sheet_name in wb.sheetnames:
            orig_ws = wb[sheet_name]

            if sheet_name in modified_dfs:
                # write modified version
                modified_df = modified_dfs[sheet_name]
                modified_df.to_excel(writer, sheet_name=sheet_name, index=False)
                new_ws = writer.sheets[sheet_name]

                shift_index = 7  # inserted at col 7
                # copy col widths
                for col_letter, dim in orig_ws.column_dimensions.items():
                    try:
                        col_idx = column_index_from_string(col_letter)
                    except Exception:
                        continue
                    new_col_idx = col_idx + 1 if col_idx >= shift_index else col_idx
                    new_col_letter = get_column_letter(new_col_idx)
                    if dim.width is not None:
                        new_ws.column_dimensions[new_col_letter].width = dim.width

                # row heights
                for row_idx, dim in orig_ws.row_dimensions.items():
                    if dim.height is not None:
                        new_ws.row_dimensions[row_idx].height = dim.height

                # merged ranges adjusted
                for merged_range in orig_ws.merged_cells.ranges:
                    start_col, start_row, end_col, end_row = merged_range.bounds
                    if end_col >= shift_index:
                        if start_col >= shift_index:
                            start_col += 1
                        end_col += 1
                    new_range = f"{get_column_letter(start_col)}{start_row}:{get_column_letter(end_col)}{end_row}"
                    try:
                        new_ws.merge_cells(new_range)
                    except Exception:
                        pass

            else:
                # copy completely as-is (PS1, Summary, etc.)
                new_ws = writer.book.create_sheet(sheet_name)
                writer.sheets[sheet_name] = new_ws

                max_row = orig_ws.max_row
                max_col = orig_ws.max_column
                for r in range(1, max_row + 1):
                    for c in range(1, max_col + 1):
                        source_cell = orig_ws.cell(row=r, column=c)
                        new_cell = new_ws.cell(row=r, column=c, value=source_cell.value)
                        if getattr(source_cell, "has_style", False):
                            try:
                                new_cell.font = copy(source_cell.font)
                                new_cell.border = copy(source_cell.border)
                                new_cell.fill = copy(source_cell.fill)
                                new_cell.number_format = copy(source_cell.number_format)
                                new_cell.protection = copy(source_cell.protection)
                                new_cell.alignment = copy(source_cell.alignment)
                            except Exception:
                                pass

                # column widths
                for col_letter, dim in orig_ws.column_dimensions.items():
                    if dim.width is not None:
                        new_ws.column_dimensions[col_letter].width = dim.width
                # row heights
                for row_idx, dim in orig_ws.row_dimensions.items():
                    if dim.height is not None:
                        new_ws.row_dimensions[row_idx].height = dim.height
                # merged cells
                for merged_range in orig_ws.merged_cells.ranges:
                    try:
                        new_ws.merge_cells(str(merged_range))
                    except Exception:
                        pass

        # remove the default "Sheet" if it exists and is empty
        if 'Sheet' in writer.book.sheetnames and len(writer.book.sheetnames) > 1:
            maybe = writer.book['Sheet']
            if maybe.max_row == 1 and maybe.max_column == 1 and maybe.cell(1, 1).value is None:
                writer.book.remove(maybe)

    print(f"File saved successfully as {mod_excel}")

In [None]:
# Run the function for Excel file
file_name = "Large_Model_Questions_WITH_BERTSCORE_PRF_debertaxlargemnli.xlsx"
questions_level_eval(file_name, f"{file_name.split('.')[-2]}-LevelCheck.xlsx")

In [None]:
# Run for both files
grade_level_eval(f"{file_name.split('.')[-2]}-LevelCheck.xlsx", f"{file_name.split('.')[-2]}-BloomAndGradeLevelsCheck.xlsx")