In [1]:
!pip install groq rouge-score bert-score nltk
import nltk
nltk.download('punkt')

Collecting groq
  Downloading groq-0.36.0-py3-none-any.whl.metadata (16 kB)
Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting bert-score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading groq-0.36.0-py3-none-any.whl (137 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.3/137.3 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=d27481bde8efa62766ef49c6ca662e6cb66441346705b875b0877959a6fb3916
  Stored in directory: /root/.cache/pip/wheels/85/9d/af/01feefbe7d55ef5468796f0c6

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [2]:
%%javascript
function ClickConnect(){
  console.log("Clicking");
  document.querySelector("colab-toolbar-button#connect").click();
}
setInterval(ClickConnect, 60000)

<IPython.core.display.Javascript object>

In [None]:
#####################################################################
# 1. MOUNT DRIVE & IMPORTS
#####################################################################
from google.colab import drive
drive.mount('/content/drive')

import os, re, json, time, logging
from datetime import datetime
from pathlib import Path
from typing import List, Dict, Any

import pandas as pd
import numpy as np
from groq import Groq   # <-- CHANGED ONLY THIS


#####################################################################
# 2. PATHS & API KEY (GROQ)
#####################################################################
INPUT_FILE = "/content/drive/MyDrive/Final Thesis Code/Input/clean_input_30.xlsx"

BASE_OUT = Path("/content/drive/MyDrive/Final Thesis Code/Output/Instruction Prompting/llama-3.3-70b-versatile/")
BASE_OUT.mkdir(parents=True, exist_ok=True)

FINAL_OUTPUT_FILE = BASE_OUT / "llama-3.3-70b-versatile_instruction_full_output.xlsx"

API_KEY_PATH = "/content/drive/MyDrive/Final Thesis Code/api_keys/groq_key2.txt"

def load_key(path):
    with open(path, "r") as f:
        return f.read().strip()

API_KEY = load_key(API_KEY_PATH)
client = Groq(api_key=API_KEY)

print("Input file:", INPUT_FILE)
print("Output folder:", BASE_OUT)
print("Groq key loaded ✓")


#####################################################################
# 3. GLOBAL CONFIG
#####################################################################
MODEL_NAME     = "llama-3.3-70b-versatile"
MAX_CHARS      = 2600
GLOBAL_MIN_GAP = 25        # SAFE FOR FREE COLAB
LAST_TS        = 0.0

VALID_TOPICS = [
    "Natural Language Processing","Artificial Intelligence","Prompt Engineering",
    "Machine Learning","Deep Learning","Reinforcement Learning","Generative AI",
    "Data Science","Time Series","Statistics","LangChain","Langraph",
    "Python Programming","Mlops","Agentic AI","Other"
]


#####################################################################
# 4. LOGGING
#####################################################################
def setup_logging():
    logs = Path("/content/logs")
    logs.mkdir(exist_ok=True)
    logfile = logs / f"log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"

    logging.basicConfig(
        level=logging.INFO,
        handlers=[logging.FileHandler(logfile, encoding="utf-8"),
                  logging.StreamHandler()],
        format="%(asctime)s | %(levelname)s | %(message)s",
    )
    return logging.getLogger(__name__)

logger = setup_logging()


#####################################################################
# 5. CLEANING & CHUNKING
#####################################################################
def deep_clean(text: str) -> str:
    t = str(text)
    t = re.sub(r'https?://\S+|www\.\S+', ' ', t)
    t = re.sub(r'\b[\w\.-]+@[\w\.-]+\.\w+\b', ' ', t)
    t = re.sub(r'\[.*?\]|\(.*?\)', ' ', t)
    t = re.sub(r'\s+', ' ', t)
    t = re.sub(r'\bNLP\b', 'Natural Language Processing (NLP)', t)
    t = re.sub(r'\bML\b', 'Machine Learning (ML)', t)
    t = re.sub(r'\bAI\b', 'Artificial Intelligence (AI)', t)
    return t.strip()

def chunk_text(text: str, max_chars: int = MAX_CHARS) -> List[str]:
    clean = deep_clean(text)
    if len(clean) <= max_chars:
        return [clean] if clean else [""]

    sents = [s.strip() for s in re.split(r'(?<=[.!?])\s+', clean) if s.strip()]
    chunks, cur = [], ""
    for s in sents:
        if len(cur) + len(s) + 1 <= max_chars:
            cur = f"{cur} {s}".strip()
        else:
            if cur:
                chunks.append(cur)
            cur = s
    if cur:
        chunks.append(cur)
    return chunks or [""]


#####################################################################
# 6. JSON EXTRACTION
#####################################################################
def extract_json(text: str) -> Dict[str, Any]:
    if not text:
        return {}
    s = text.find("{")
    e = text.rfind("}")
    if s == -1 or e == -1 or e <= s:
        return {}
    try:
        return json.loads(text[s:e+1])
    except:
        return {}


#####################################################################
# 7. GROQ CALL (RATE LIMIT SAFE)
#####################################################################
def groq_call(prompt: str, temperature: float = 0.2, retries: int = 3) -> str:
    global LAST_TS
    now = time.time()

    if LAST_TS > 0 and now - LAST_TS < GLOBAL_MIN_GAP:
        wait = GLOBAL_MIN_GAP - (now - LAST_TS)
        logger.info(f"Waiting {wait:.1f}s (respecting global gap)")
        time.sleep(wait)

    for attempt in range(1, retries+1):
        try:
            resp = client.chat.completions.create(
                model=MODEL_NAME,
                messages=[{"role": "user", "content": prompt}],
                temperature=temperature,
                max_tokens=2048,
            )
            LAST_TS = time.time()
            return resp.choices[0].message.content.strip()

        except Exception as e:
            logger.warning(f"Groq call failed ({attempt}/{retries}): {e}")
            time.sleep(5 * attempt)

    logger.error("Groq failed after all retries")
    return ""


#####################################################################
# 8. INSTRUCTION-BASED TASKS (PROMPTS UNCHANGED)
#####################################################################

##########################
# 8.1 SUMMARISATION
##########################
def generate_summary(transcript: str) -> str:
    chunks = chunk_text(transcript)
    partial_summaries = []

    for i, c in enumerate(chunks, 1):
        logger.info(f"Summarisation – chunk {i}/{len(chunks)}")

        prompt = f"""
INSTRUCTION:
Summarise the following educational transcript chunk in clear, academic English.

TASK OBJECTIVE:
Produce a concise and coherent summary that captures the key ideas, definitions, technical processes, and any conclusions.

REQUIREMENTS:
• Length: 80–120 words (4–6 sentences) for this chunk.
• Start with the main topic or focus of the chunk.
• Include important concepts, methods, or examples.
• Maintain logical flow and preserve domain-specific terminology.
• Do NOT include any reasoning steps or chain-of-thought.
• Do NOT add headings, bullet points, or commentary.

OUTPUT FORMAT:
{{"generated_summary":"<summary text>"}}

Transcript Chunk:
\"\"\"{c}\"\"\""""

        out = groq_call(prompt, temperature=0.18)
        j = extract_json(out)
        chunk_summary = j.get("generated_summary", "").strip() or out[:600]
        partial_summaries.append(chunk_summary)

    combined = " ".join(partial_summaries)

    final_prompt = f"""
INSTRUCTION:
Combine the draft summaries into one final summary for the full transcript.

TASK OBJECTIVE:
Produce a single coherent summary that captures the overall topic, key ideas, technical methods, and any final conclusions.

REQUIREMENTS:
• Length: 120–160 words.
• Start with the main overall topic.
• Integrate the main concepts and processes from all chunks.
• Maintain logical flow, academic tone, and technical precision.
• Do NOT include reasoning steps or chain-of-thought.
• Do NOT add headings or bullet points.

OUTPUT FORMAT:
{{"generated_summary":"<summary text>"}}

Draft Chunk Summaries:
\"\"\"{combined}\"\"\""""

    out2 = groq_call(final_prompt, temperature=0.18)
    j2 = extract_json(out2)
    return j2.get("generated_summary", "").strip() or out2[:900]


##########################
# 8.2 TOPIC CLASSIFICATION
##########################
def classify_topic(transcript: str, summary: str) -> List[str]:
    first_chunk = chunk_text(transcript)[0]
    topics_list = ", ".join(VALID_TOPICS)

    prompt = f"""
INSTRUCTION:
Classify the educational transcript into one or more relevant topics from the list below.

AVAILABLE TOPICS:
{topics_list}

TASK OBJECTIVE:
Identify which topics best describe the main technical and conceptual content of the transcript.

GUIDELINES:
• Choose up to THREE relevant topics.
• Use the summary as a high-level guide.
• Only use topics from the AVAILABLE TOPICS list.
• If nothing fits, use "Other".

OUTPUT FORMAT:
{{"predicted_topics":["<TOPIC1>","<TOPIC2>",...] }}

Summary (hint):
\"\"\"{summary[:350]}\"\"\"

Transcript Chunk:
\"\"\"{first_chunk}\"\"\""""

    out = groq_call(prompt, temperature=0.22)
    j = extract_json(out)
    topics = j.get("predicted_topics", [])

    if isinstance(topics, str):
        topics = [topics]

    cleaned = []
    for t in topics:
        for v in VALID_TOPICS:
            if t.lower() == v.lower():
                cleaned.append(v)
                break

    return list(dict.fromkeys(cleaned))[:3] or ["Other"]


##########################
# 8.3 Q&A GENERATION
##########################
def generate_qa(transcript: str) -> str:
    first_chunk = chunk_text(transcript)[0]

    prompt = f"""
INSTRUCTION:
Generate five question–answer pairs based on the transcript content.

TASK OBJECTIVE:
Form comprehension questions that test understanding of key ideas, reasoning, and examples discussed in the transcript.

GUIDELINES:
• Create EXACTLY five (5) question–answer pairs.
• Each pair should begin with a different question type:
  1. What – factual or definitional
  2. Why – reasoning or purpose
  3. How – process or mechanism
  4. When – timing or condition
  5. Who – person, system, or entity
• Each answer must be directly supported by information in the transcript.
• Keep answers concise (maximum 25 words).
• Avoid generic or meta questions.
• Ensure all questions are technically relevant and educational.

OUTPUT FORMAT:
Return ONLY a one-line JSON object:
{{"generated_questions":[{{"q":"...","a":"..."}}, ...]}}

Transcript:
\"\"\"{first_chunk}\"\"\""""

    out = groq_call(prompt, temperature=0.15)
    j = extract_json(out)

    lines = []
    for qa in j.get("generated_questions", []):
        q = qa.get("q", "").strip()
        a = qa.get("a", "").strip()
        if q: lines.append(f"Q: {q}")
        if a: lines.append(f"A: {a}")
    return "\n".join(lines)


##########################
# 8.4 KEY CONCEPT EXTRACTION
##########################
def generate_concepts(transcript: str) -> str:
    first_chunk = chunk_text(transcript)[0]

    prompt = f"""
INSTRUCTION:
Extract the key technical concepts and terms that represent the main ideas in the transcript.

TASK OBJECTIVE:
Identify and list core terminology, methods, or technical phrases.

GUIDELINES:
• Extract 10–12 distinct concepts.
• Prefer multi-word technical phrases.
• Exclude generic words.
• Capitalise each concept.
• No duplicates.

OUTPUT FORMAT:
{{"key_concepts":["Concept 1","Concept 2",...]}}

Transcript:
\"\"\"{first_chunk}\"\"\""""

    out = groq_call(prompt, temperature=0.22)
    j = extract_json(out)
    concepts = j.get("key_concepts", [])

    if not isinstance(concepts, list):
        concepts = []

    return ", ".join([c.strip() for c in concepts if c.strip()])


#####################################################################
# 9. MAIN PIPELINE (NO EVALUATION)
#####################################################################
def run_pipeline() -> pd.DataFrame:
    df = pd.read_excel(INPUT_FILE)
    results = []

    if FINAL_OUTPUT_FILE.exists():
        old = pd.read_excel(FINAL_OUTPUT_FILE)
        if "row_index" in old.columns:
            done = set(old["row_index"])
            results = old.to_dict(orient="records")
            print(f"Resuming: {len(done)} rows already processed.")
        else:
            done = set()
    else:
        done = set()

    for idx, row in df.iterrows():
        if idx in done:
            print(f"Skipping row {idx}")
            continue

        title = str(row.get("title", "")).strip()
        transcript = str(row.get("transcript", "")).strip()

        print("\n" + "="*80)
        print(f"PROCESSING ROW {idx}: {title}")
        print("="*80)

        try:
            summary = generate_summary(transcript)
            topics = classify_topic(transcript, summary)
            qa_text = generate_qa(transcript)
            concepts = generate_concepts(transcript)
        except Exception as e:
            logger.error(f"Error row {idx}: {e}")
            summary, topics, qa_text, concepts = "", ["Other"], "", ""

        print("\nSUMMARY:\n", summary)
        print("\nTOPICS:\n", topics)
        print("\nQ&A:\n", qa_text)
        print("\nKEY CONCEPTS:\n", concepts)

        rec = {
            "row_index": idx,
            "title": title,
            "summary": summary,
            "topic_classification": ", ".join(topics),
            "Q_and_A": qa_text,
            "key_concepts": concepts
        }

        results.append(rec)
        pd.DataFrame(results).to_excel(FINAL_OUTPUT_FILE, index=False)
        print(f"Saved row {idx}")

    df_out = pd.DataFrame(results)
    df_out.to_excel(FINAL_OUTPUT_FILE, index=False)
    print("\nDONE. Final file saved:", FINAL_OUTPUT_FILE)
    return df_out


#####################################################################
# 10. RUN
#####################################################################
df_out = run_pipeline()
print("\nInstruction-based (Groq) pipeline completed successfully.")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Input file: /content/drive/MyDrive/Final Thesis Code/Input/clean_input_30.xlsx
Output folder: /content/drive/MyDrive/Final Thesis Code/Output/Instruction Prompting/llama-3.3-70b-versatile
Groq key loaded ✓

PROCESSING ROW 0: Reinforcement Learning through Human Feedback - EXPLAINED! | RLHF

SUMMARY:
 Reinforcement learning with human feedback is a powerful framework that integrates human input into the training process, guiding and accelerating learning. By utilizing algorithms such as Q-learning and proximal policy optimization, human feedback can be incorporated to enable more informed decision-making. This approach is exemplified in ChatGPT, where a rewards model provides human feedback to enhance response generation capabilities. The resulting framework enables more efficient and effective learning, making it a valuable tool for various applications, and 

ERROR:__main__:Groq failed after all retries
ERROR:__main__:Groq failed after all retries



SUMMARY:
 Generative Artificial Intelligence (AI) creates new content based on patterns learned from existing data, utilizing large language models trained on vast amounts of internet data. This technology enables AI agents to take input, think, and act to complete tasks, leveraging tools and knowledge to make decisions. Agentic AI takes this a step further, involving one or more agents working autonomously to reach complex goals through multi-step reasoning and planning, allowing for the handling of sophisticated tasks such as flight booking and visa applications with increasing complexity.

TOPICS:
 ['Generative AI', 'Artificial Intelligence', 'Agentic AI']

Q&A:
 

KEY CONCEPTS:
 
Saved row 12

PROCESSING ROW 13: Covariance in Statistics


ERROR:__main__:Groq failed after all retries
ERROR:__main__:Groq failed after all retries
ERROR:__main__:Groq failed after all retries
ERROR:__main__:Groq failed after all retries



SUMMARY:
 The main overall topic revolves around the discussion of advanced technical methods and their applications. Key ideas include the integration of complex processes and the utilization of specialized tools to achieve precise outcomes. The technical methods employed involve a combination of theoretical frameworks and practical approaches, ensuring a comprehensive understanding of the subject matter. Through the application of these methods, researchers can draw meaningful conclusions and make informed decisions. The overall discussion highlights the importance of technical precision and academic rigor in achieving accurate results, ultimately contributing to the advancement of knowledge in the field.

TOPICS:
 ['Other']

Q&A:
 

KEY CONCEPTS:
 
Saved row 13

PROCESSING ROW 14: 3. Objective || End to End AI Tutorial


ERROR:__main__:Groq failed after all retries
ERROR:__main__:Groq failed after all retries
ERROR:__main__:Groq failed after all retries
ERROR:__main__:Groq failed after all retries



SUMMARY:
 The main overall topic revolves around the discussion of advanced technical methods and their applications. Key ideas include the integration of complex processes and the utilization of specialized tools to achieve precise outcomes. The technical methods employed involve a combination of theoretical frameworks and practical approaches, ensuring a comprehensive understanding of the subject matter. Through the application of these methods, researchers can draw meaningful conclusions and make informed decisions. The overall discussion highlights the importance of a multidisciplinary approach, incorporating various fields of study to foster innovation and progress. Ultimately, the topic emphasizes the significance of technical precision and academic rigor in driving advancements and achieving desired results.

TOPICS:
 ['Other']

Q&A:
 

KEY CONCEPTS:
 
Saved row 14

PROCESSING ROW 15: Python Training - Python Dictionary Basics


ERROR:__main__:Groq failed after all retries
ERROR:__main__:Groq failed after all retries
ERROR:__main__:Groq failed after all retries


KeyboardInterrupt: 

In [3]:
#####################################################################
# 1. MOUNT DRIVE & IMPORTS
#####################################################################
from google.colab import drive
drive.mount('/content/drive')

import os, re, json, time, logging
from datetime import datetime
from pathlib import Path
from typing import List, Dict, Any

import pandas as pd
import numpy as np
from groq import Groq   # <-- CHANGED ONLY THIS


#####################################################################
# 2. PATHS & API KEY (GROQ)
#####################################################################
INPUT_FILE = "/content/drive/MyDrive/Final Thesis Code/Input/clean_input_30.xlsx"

BASE_OUT = Path("/content/drive/MyDrive/Final Thesis Code/Output/Instruction Prompting/llama-3.3-70b-versatile/")
BASE_OUT.mkdir(parents=True, exist_ok=True)

FINAL_OUTPUT_FILE = BASE_OUT / "llama-3.3-70b-versatile_instruction_full_output.xlsx"

API_KEY_PATH = "/content/drive/MyDrive/Final Thesis Code/api_keys/groq_key1.txt"

def load_key(path):
    with open(path, "r") as f:
        return f.read().strip()

API_KEY = load_key(API_KEY_PATH)
client = Groq(api_key=API_KEY)

print("Input file:", INPUT_FILE)
print("Output folder:", BASE_OUT)
print("Groq key loaded ✓")


#####################################################################
# 3. GLOBAL CONFIG
#####################################################################
MODEL_NAME     = "llama-3.3-70b-versatile"
MAX_CHARS      = 2600
GLOBAL_MIN_GAP = 25        # SAFE FOR FREE COLAB
LAST_TS        = 0.0

VALID_TOPICS = [
    "Natural Language Processing","Artificial Intelligence","Prompt Engineering",
    "Machine Learning","Deep Learning","Reinforcement Learning","Generative AI",
    "Data Science","Time Series","Statistics","LangChain","Langraph",
    "Python Programming","Mlops","Agentic AI","Other"
]


#####################################################################
# 4. LOGGING
#####################################################################
def setup_logging():
    logs = Path("/content/logs")
    logs.mkdir(exist_ok=True)
    logfile = logs / f"log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"

    logging.basicConfig(
        level=logging.INFO,
        handlers=[logging.FileHandler(logfile, encoding="utf-8"),
                  logging.StreamHandler()],
        format="%(asctime)s | %(levelname)s | %(message)s",
    )
    return logging.getLogger(__name__)

logger = setup_logging()


#####################################################################
# 5. CLEANING & CHUNKING
#####################################################################
def deep_clean(text: str) -> str:
    t = str(text)
    t = re.sub(r'https?://\S+|www\.\S+', ' ', t)
    t = re.sub(r'\b[\w\.-]+@[\w\.-]+\.\w+\b', ' ', t)
    t = re.sub(r'\[.*?\]|\(.*?\)', ' ', t)
    t = re.sub(r'\s+', ' ', t)
    t = re.sub(r'\bNLP\b', 'Natural Language Processing (NLP)', t)
    t = re.sub(r'\bML\b', 'Machine Learning (ML)', t)
    t = re.sub(r'\bAI\b', 'Artificial Intelligence (AI)', t)
    return t.strip()

def chunk_text(text: str, max_chars: int = MAX_CHARS) -> List[str]:
    clean = deep_clean(text)
    if len(clean) <= max_chars:
        return [clean] if clean else [""]

    sents = [s.strip() for s in re.split(r'(?<=[.!?])\s+', clean) if s.strip()]
    chunks, cur = [], ""
    for s in sents:
        if len(cur) + len(s) + 1 <= max_chars:
            cur = f"{cur} {s}".strip()
        else:
            if cur:
                chunks.append(cur)
            cur = s
    if cur:
        chunks.append(cur)
    return chunks or [""]


#####################################################################
# 6. JSON EXTRACTION
#####################################################################
def extract_json(text: str) -> Dict[str, Any]:
    if not text:
        return {}
    s = text.find("{")
    e = text.rfind("}")
    if s == -1 or e == -1 or e <= s:
        return {}
    try:
        return json.loads(text[s:e+1])
    except:
        return {}


#####################################################################
# 7. GROQ CALL (RATE LIMIT SAFE)
#####################################################################
def groq_call(prompt: str, temperature: float = 0.2, retries: int = 3) -> str:
    global LAST_TS
    now = time.time()

    if LAST_TS > 0 and now - LAST_TS < GLOBAL_MIN_GAP:
        wait = GLOBAL_MIN_GAP - (now - LAST_TS)
        logger.info(f"Waiting {wait:.1f}s (respecting global gap)")
        time.sleep(wait)

    for attempt in range(1, retries+1):
        try:
            resp = client.chat.completions.create(
                model=MODEL_NAME,
                messages=[{"role": "user", "content": prompt}],
                temperature=temperature,
                max_tokens=2048,
            )
            LAST_TS = time.time()
            return resp.choices[0].message.content.strip()

        except Exception as e:
            err_msg = str(e).lower()
            if "cloudflare" in err_msg or "503" in err_msg or "500" in err_msg:
                  logger.warning(
                      f"[Cloudflare Error] ({attempt}/{retries}) – Internal server error from Groq. Retrying..."
                  )
            elif "429" in err_msg:
                  logger.warning(
                      f"[Rate Limit] ({attempt}/{retries}) – Groq rate limit hit. Retrying..."
                  )
            else:
                  logger.warning(
                      f"[Groq Error] ({attempt}/{retries}) – {str(e)[:200]} ..."
                  )
            time.sleep(5 * attempt)

    logger.error("Groq failed after all retries")
    return ""


#####################################################################
# 8. INSTRUCTION-BASED TASKS (PROMPTS UNCHANGED)
#####################################################################

##########################
# 8.1 SUMMARISATION
##########################
def generate_summary(transcript: str) -> str:
    chunks = chunk_text(transcript)
    partial_summaries = []

    for i, c in enumerate(chunks, 1):
        logger.info(f"Summarisation – chunk {i}/{len(chunks)}")

        prompt = f"""
INSTRUCTION:
Summarise the following educational transcript chunk in clear, academic English.

TASK OBJECTIVE:
Produce a concise and coherent summary that captures the key ideas, definitions, technical processes, and any conclusions.

REQUIREMENTS:
• Length: 80–120 words (4–6 sentences) for this chunk.
• Start with the main topic or focus of the chunk.
• Include important concepts, methods, or examples.
• Maintain logical flow and preserve domain-specific terminology.
• Do NOT include any reasoning steps or chain-of-thought.
• Do NOT add headings, bullet points, or commentary.

OUTPUT FORMAT:
{{"generated_summary":"<summary text>"}}

Transcript Chunk:
\"\"\"{c}\"\"\""""

        out = groq_call(prompt, temperature=0.18)
        j = extract_json(out)
        chunk_summary = j.get("generated_summary", "").strip() or out[:600]
        partial_summaries.append(chunk_summary)

    combined = " ".join(partial_summaries)

    final_prompt = f"""
INSTRUCTION:
Combine the draft summaries into one final summary for the full transcript.

TASK OBJECTIVE:
Produce a single coherent summary that captures the overall topic, key ideas, technical methods, and any final conclusions.

REQUIREMENTS:
• Length: 120–160 words.
• Start with the main overall topic.
• Integrate the main concepts and processes from all chunks.
• Maintain logical flow, academic tone, and technical precision.
• Do NOT include reasoning steps or chain-of-thought.
• Do NOT add headings or bullet points.

OUTPUT FORMAT:
{{"generated_summary":"<summary text>"}}

Draft Chunk Summaries:
\"\"\"{combined}\"\"\""""

    out2 = groq_call(final_prompt, temperature=0.18)
    j2 = extract_json(out2)
    return j2.get("generated_summary", "").strip() or out2[:900]


##########################
# 8.2 TOPIC CLASSIFICATION
##########################
def classify_topic(transcript: str, summary: str) -> List[str]:
    first_chunk = chunk_text(transcript)[0]
    topics_list = ", ".join(VALID_TOPICS)

    prompt = f"""
INSTRUCTION:
Classify the educational transcript into one or more relevant topics from the list below.

AVAILABLE TOPICS:
{topics_list}

TASK OBJECTIVE:
Identify which topics best describe the main technical and conceptual content of the transcript.

GUIDELINES:
• Choose up to THREE relevant topics.
• Use the summary as a high-level guide.
• Only use topics from the AVAILABLE TOPICS list.
• If nothing fits, use "Other".

OUTPUT FORMAT:
{{"predicted_topics":["<TOPIC1>","<TOPIC2>",...] }}

Summary (hint):
\"\"\"{summary[:350]}\"\"\"

Transcript Chunk:
\"\"\"{first_chunk}\"\"\""""

    out = groq_call(prompt, temperature=0.22)
    j = extract_json(out)
    topics = j.get("predicted_topics", [])

    if isinstance(topics, str):
        topics = [topics]

    cleaned = []
    for t in topics:
        for v in VALID_TOPICS:
            if t.lower() == v.lower():
                cleaned.append(v)
                break

    return list(dict.fromkeys(cleaned))[:3] or ["Other"]


##########################
# 8.3 Q&A GENERATION
##########################
def generate_qa(transcript: str) -> str:
    first_chunk = chunk_text(transcript)[0]

    prompt = f"""
INSTRUCTION:
Generate five question–answer pairs based on the transcript content.

TASK OBJECTIVE:
Form comprehension questions that test understanding of key ideas, reasoning, and examples discussed in the transcript.

GUIDELINES:
• Create EXACTLY five (5) question–answer pairs.
• Each pair should begin with a different question type:
  1. What – factual or definitional
  2. Why – reasoning or purpose
  3. How – process or mechanism
  4. When – timing or condition
  5. Who – person, system, or entity
• Each answer must be directly supported by information in the transcript.
• Keep answers concise (maximum 25 words).
• Avoid generic or meta questions.
• Ensure all questions are technically relevant and educational.

OUTPUT FORMAT:
Return ONLY a one-line JSON object:
{{"generated_questions":[{{"q":"...","a":"..."}}, ...]}}

Transcript:
\"\"\"{first_chunk}\"\"\""""

    out = groq_call(prompt, temperature=0.15)
    j = extract_json(out)

    lines = []
    for qa in j.get("generated_questions", []):
        q = qa.get("q", "").strip()
        a = qa.get("a", "").strip()
        if q: lines.append(f"Q: {q}")
        if a: lines.append(f"A: {a}")
    return "\n".join(lines)


##########################
# 8.4 KEY CONCEPT EXTRACTION
##########################
def generate_concepts(transcript: str) -> str:
    first_chunk = chunk_text(transcript)[0]

    prompt = f"""
INSTRUCTION:
Extract the key technical concepts and terms that represent the main ideas in the transcript.

TASK OBJECTIVE:
Identify and list core terminology, methods, or technical phrases.

GUIDELINES:
• Extract 10–12 distinct concepts.
• Prefer multi-word technical phrases.
• Exclude generic words.
• No duplicates.

OUTPUT FORMAT:
{{"key_concepts":["Concept 1","Concept 2",...]}}

Transcript:
\"\"\"{first_chunk}\"\"\""""

    out = groq_call(prompt, temperature=0.22)
    j = extract_json(out)
    concepts = j.get("key_concepts", [])

    if not isinstance(concepts, list):
        concepts = []

    return ", ".join([c.strip() for c in concepts if c.strip()])


#####################################################################
# 9. MAIN PIPELINE (NO EVALUATION)
#####################################################################
def run_pipeline() -> pd.DataFrame:
    df = pd.read_excel(INPUT_FILE)
    results = []

    if FINAL_OUTPUT_FILE.exists():
        old = pd.read_excel(FINAL_OUTPUT_FILE)
        if "row_index" in old.columns:
            done = set(old["row_index"])
            results = old.to_dict(orient="records")
            print(f"Resuming: {len(done)} rows already processed.")
        else:
            done = set()
    else:
        done = set()

    for idx, row in df.iterrows():
        if idx in done:
            print(f"Skipping row {idx}")
            continue

        title = str(row.get("title", "")).strip()
        transcript = str(row.get("transcript", "")).strip()

        print("\n" + "="*80)
        print(f"PROCESSING ROW {idx}: {title}")
        print("="*80)

        try:
            summary = generate_summary(transcript)
            topics = classify_topic(transcript, summary)
            qa_text = generate_qa(transcript)
            concepts = generate_concepts(transcript)
        except Exception as e:
            logger.error(f"Error row {idx}: {e}")
            summary, topics, qa_text, concepts = "", ["Other"], "", ""

        print("\nSUMMARY:\n", summary)
        print("\nTOPICS:\n", topics)
        print("\nQ&A:\n", qa_text)
        print("\nKEY CONCEPTS:\n", concepts)

        rec = {
            "row_index": idx,
            "title": title,
            "summary": summary,
            "topic_classification": ", ".join(topics),
            "Q_and_A": qa_text,
            "key_concepts": concepts
        }

        results.append(rec)
        pd.DataFrame(results).to_excel(FINAL_OUTPUT_FILE, index=False)
        print(f"Saved row {idx}")

    df_out = pd.DataFrame(results)
    df_out.to_excel(FINAL_OUTPUT_FILE, index=False)
    print("\nDONE. Final file saved:", FINAL_OUTPUT_FILE)
    return df_out


#####################################################################
# 10. RUN
#####################################################################
df_out = run_pipeline()
print("\nInstruction-based (Groq) pipeline completed successfully.")


Mounted at /content/drive
Input file: /content/drive/MyDrive/Final Thesis Code/Input/clean_input_30.xlsx
Output folder: /content/drive/MyDrive/Final Thesis Code/Output/Instruction Prompting/llama-3.3-70b-versatile
Groq key loaded ✓
Resuming: 17 rows already processed.
Skipping row 0
Skipping row 1
Skipping row 2
Skipping row 3
Skipping row 4
Skipping row 5
Skipping row 6
Skipping row 7
Skipping row 8
Skipping row 9
Skipping row 10
Skipping row 11
Skipping row 12
Skipping row 13
Skipping row 14
Skipping row 15
Skipping row 16

PROCESSING ROW 17: Meta Llama 3 Is Here- And It Will Rule the Open Source LLM Models

SUMMARY:
 The main topic is Meta Llama 3, an open-source LLM model released by Meta, offering 8 billion and 70 billion pre-trained and instruction-tuned versions. This model excels at language nuances and complex tasks, outperforming other models, including paid LLM models. It has been trained on a large dataset and supports an 8K context length. The model's performance is notabl

In [6]:
#####################################################################
# 1. IMPORTS
#####################################################################
import os, re, json, warnings
import pandas as pd
import numpy as np

from rouge_score import rouge_scorer
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from bert_score import score as bert_score

from sklearn.metrics import precision_recall_fscore_support


#####################################################################
# 2. SUPPRESS WARNINGS (BERTScore spam)
#####################################################################
warnings.filterwarnings("ignore")
import logging
logging.getLogger("transformers").setLevel(logging.ERROR)
logging.getLogger("absl").setLevel(logging.ERROR)


#####################################################################
# 3. PATHS (EDIT THESE)
#####################################################################
INPUT_FILE = "/content/drive/MyDrive/Final Thesis Code/Input/clean_input_30.xlsx"
OUTPUT_FILE = "/content/drive/MyDrive/Final Thesis Code/Output/Instruction Prompting/llama-3.3-70b-versatile/llama-3.3-70b-versatile_instruction_full_output.xlsx"
FINAL_EVAL_JSON = "/content/drive/MyDrive/Final Thesis Code/Output/Instruction Prompting/llama-3.3-70b-versatile/evaluation_final.json"

print("Loaded input:", INPUT_FILE)
print("Loaded model output:", OUTPUT_FILE)


#####################################################################
# 4. GOLD TOPIC EXTRACTION (KEYWORD-BASED — FINAL VERSION)
#####################################################################
def gold_topics_from_ref_summary(ref_sum: str):
    text = (ref_sum or "").lower()
    matched = []

    rules = [
        ("Natural Language Processing", [
            "nlp", "bert", "transformer", "language model", "token",
            "text processing", "semantic", "embedding"
        ]),
        ("Artificial Intelligence", [
            "artificial intelligence", "ai system", "symbolic ai",
            "reasoning", "planning", "search"
        ]),
        ("Prompt Engineering", [
            "prompt", "few-shot", "zero-shot", "instruction",
            "cot", "chain-of-thought", "in-context learning"
        ]),
        ("Machine Learning", [
            "machine learning", "supervised", "unsupervised", "regression",
            "classification", "clustering", "features"
        ]),
        ("Deep Learning", [
            "deep learning", "neural network", "cnn", "rnn",
            "lstm", "gan", "transformer model", "backpropagation"
        ]),
        ("Reinforcement Learning", [
            "reinforcement", "policy gradient", "q-learning",
            "reward", "actor-critic", "rlhf"
        ]),
        ("Generative AI", [
            "genai", "text generation", "image generation",
            "diffusion", "sampling", "generation model", "llm"
        ]),
        ("Data Science", [
            "data science", "visualization", "feature", "pandas",
            "analysis", "data preprocessing", "eda"
        ]),
        ("Time Series", [
            "time series", "forecasting", "temporal", "trend",
            "seasonality", "arima", "prophet", "lag"
        ]),
        ("Statistics", [
            "statistics", "probability", "distribution", "variance",
            "hypothesis", "confidence interval", "p-value"
        ]),
        ("LangChain", [
            "langchain", "chain", "memory", "retriever",
            "agent executor", "llmchain", "prompt template"
        ]),
        ("Langraph", [
            "langraph", "workflow", "graph", "multi-agent orchestration",
            "node", "edge", "state graph"
        ]),
        ("Python Programming", [
            "python", "numpy", "matplotlib", "function",
            "loop", "list comprehension", "script"
        ]),
        ("Mlops", [
            "mlops", "deployment", "monitoring", "pipeline",
            "model registry", "cicd", "serving"
        ]),
        ("Agentic AI", [
            "agentic", "tool calling", "multi-agent",
            "planner", "agent", "reasoning agent", "autonomous"
        ])
    ]

    for label, keywords in rules:
        if any(kw in text for kw in keywords):
            matched.append(label)

    return matched or ["Other"]


#####################################################################
# 5. TOKENIZER FOR QA & CONCEPTS
#####################################################################
STOPWORDS = set([
    "the","a","an","in","on","for","to","and","or","of","with","as",
    "by","at","from","that","this","is","are","was","were","be","been",
    "it","its","into","about","over","under","between","across",
    "through","their","they","you","your","we","our"
])

def tokenize(text: str):
    return [
        t for t in re.findall(r"[A-Za-z][A-Za-z0-9\-_\’']+", text.lower())
        if t not in STOPWORDS
    ]


#####################################################################
# 6. FINAL EVALUATION FUNCTION  (FULL AND CORRECT)
#####################################################################
def evaluate(df_out: pd.DataFrame, df_ref: pd.DataFrame):

    rouge = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
    smooth = SmoothingFunction().method1

    sum_r, sum_b, sum_bert = [], [], []
    overlap_acc_list, jaccard_list, micro_f1_list = [], [], []
    macro_f1_list, weighted_f1_list = [], []
    qa_bleu, qa_div, qa_ans = [], [], []
    kc_p, kc_r, kc_f = [], [], []

    VALID_TOPICS = [
        "Natural Language Processing", "Artificial Intelligence", "Prompt Engineering",
        "Machine Learning", "Deep Learning", "Reinforcement Learning", "Generative AI",
        "Data Science", "Time Series", "Statistics", "LangChain", "Langraph",
        "Python Programming", "Mlops", "Agentic AI", "Other"
    ]

    # for macro/weighted F1
    all_true, all_pred = [], []

    for _, row in df_out.iterrows():
        idx = int(row["row_index"])
        ref_summary = df_ref.loc[idx, "Reference Summary"] or ""

        # -------------------- Summarisation --------------------
        gen_sum = row["summary"] or ""
        r = rouge.score(ref_summary, gen_sum)['rougeL'].fmeasure
        b = sentence_bleu([ref_summary.split()], gen_sum.split(), smoothing_function=smooth)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            P, R, F1 = bert_score([gen_sum], [ref_summary], lang='en', verbose=False)

        sum_r.append(r)
        sum_b.append(b)
        sum_bert.append(float(F1.mean()))

        # -------------------- Topic Classification --------------------
        gold = gold_topics_from_ref_summary(ref_summary)
        pred = [x.strip() for x in (row["topic_classification"] or "").split(",") if x.strip()]

        set_pred = set(pred)
        set_gold = set(gold)

        # Overlap Accuracy (your metric)
        overlap_acc = 1.0 if len(set_pred & set_gold) > 0 else 0.0

        # Jaccard
        inter = len(set_pred & set_gold)
        union = len(set_pred | set_gold)
        jaccard = inter / union if union > 0 else 0.0

        # Micro-F1
        tp = inter
        fp = len([p for p in pred if p not in gold])
        fn = len([g for g in gold if g not in pred])

        prec = tp / (tp + fp) if (tp + fp) else 0.0
        rec  = tp / (tp + fn) if (tp + fn) else 0.0
        micro_f1 = (2 * prec * rec / (prec + rec)) if (prec + rec) else 0.0

        overlap_acc_list.append(overlap_acc)
        jaccard_list.append(jaccard)
        micro_f1_list.append(micro_f1)

        # Macro/Weighted F1 prep
        true_bin = [1 if t in gold else 0 for t in VALID_TOPICS]
        pred_bin = [1 if t in pred else 0 for t in VALID_TOPICS]

        all_true.append(true_bin)
        all_pred.append(pred_bin)

        # -------------------- Q&A --------------------
        qa_text = row["Q_and_A"] or ""
        qs = [l[2:].strip() for l in qa_text.splitlines() if l.lower().startswith("q:")]

        gold_qs = [
            "What is the main topic discussed in the video?",
            "Why is this topic important?",
            "How is the core concept explained?",
            "What example is mentioned in the content?",
            "What is the key conclusion of the video?"
        ]

        if qs:
            bleu_vals = [
                sentence_bleu([g.split()], q.split(), smoothing_function=smooth)
                for g in gold_qs for q in qs
            ]
            qa_bleu.append(np.mean(bleu_vals))
        else:
            qa_bleu.append(0.0)

        toks = [t for q in qs for t in q.split()]
        qa_div.append(len(set(toks)) / len(toks) if toks else 0.0)

        ref_tokens = set(tokenize(ref_summary))
        ans_count = sum(
            1 for q in qs
            if len(set(tokenize(q)) & ref_tokens) / max(1, len(tokenize(q))) >= 0.3
        )
        qa_ans.append(ans_count / len(qs) if qs else 0.0)

        # -------------------- Key Concepts --------------------
        kc_text = str(row.get("key_concepts", "") or "")
        pred_concepts = [c.strip().lower() for c in kc_text.split(",") if c.strip()]

        ref_concepts = tokenize(ref_summary)
        ref_top = ref_concepts[:25]

        tp_kc = len([p for p in pred_concepts[:10] if any(p in r or r in p for r in ref_top)])

        p_val = tp_kc / 10
        r_val = tp_kc / len(ref_top) if ref_top else 0
        f1_val = (2*p_val*r_val/(p_val+r_val)) if (p_val+r_val) else 0

        kc_p.append(p_val)
        kc_r.append(r_val)
        kc_f.append(f1_val)

    # Compute macro/weighted F1
    all_true = np.array(all_true)
    all_pred = np.array(all_pred)

    macro_f1 = precision_recall_fscore_support(all_true, all_pred, average="macro", zero_division=0)[2]
    weighted_f1 = precision_recall_fscore_support(all_true, all_pred, average="weighted", zero_division=0)[2]

    return {
        "Summarisation": {
            "ROUGE-L F1": float(np.mean(sum_r)),
            "BLEU": float(np.mean(sum_b)),
            "BERTScore F1": float(np.mean(sum_bert))
        },
        "Topic Classification": {
            "Overlap Accuracy": float(np.mean(overlap_acc_list)),
            "Jaccard Index": float(np.mean(jaccard_list)),
            "Micro F1": float(np.mean(micro_f1_list)),
            "Macro F1": float(macro_f1),
            "Weighted F1": float(weighted_f1)
        },
        "Q&A Generation": {
            "BLEU": float(np.mean(qa_bleu)),
            "Diversity": float(np.mean(qa_div)),
            "Answerability": float(np.mean(qa_ans))
        },
        "Key Concept Extraction": {
            "Precision@10": float(np.mean(kc_p)),
            "Recall@10": float(np.mean(kc_r)),
            "F1@10": float(np.mean(kc_f))
        }
    }


#####################################################################
# 7. RUN EVALUATION
#####################################################################
df_ref = pd.read_excel(INPUT_FILE)
df_out = pd.read_excel(OUTPUT_FILE)

eval_summary = evaluate(df_out, df_ref)

print("\n==================== FINAL EVALUATION METRICS ====================")
for task, vals in eval_summary.items():
    print(f"\n{task}:")
    for metric, value in vals.items():
        print(f"  - {metric}: {value:.4f}")

with open(FINAL_EVAL_JSON, "w") as f:
    json.dump(eval_summary, f, indent=2)

print("\nSaved corrected evaluation JSON to:", FINAL_EVAL_JSON)


Loaded input: /content/drive/MyDrive/Final Thesis Code/Input/clean_input_30.xlsx
Loaded model output: /content/drive/MyDrive/Final Thesis Code/Output/Instruction Prompting/llama-3.3-70b-versatile/llama-3.3-70b-versatile_instruction_full_output.xlsx


Summarisation:
  - ROUGE-L F1: 0.2901
  - BLEU: 0.0587
  - BERTScore F1: 0.8863

Topic Classification:
  - Overlap Accuracy: 0.8667
  - Jaccard Index: 0.3411
  - Micro F1: 0.4598
  - Macro F1: 0.4189
  - Weighted F1: 0.4275

Q&A Generation:
  - BLEU: 0.0297
  - Diversity: 0.7584
  - Answerability: 0.6867

Key Concept Extraction:
  - Precision@10: 0.6267
  - Recall@10: 0.2507
  - F1@10: 0.3581

Saved corrected evaluation JSON to: /content/drive/MyDrive/Final Thesis Code/Output/Instruction Prompting/llama-3.3-70b-versatile/evaluation_final.json
