In [1]:
import sounddevice as sd
import numpy as np
import queue
import threading
import io
from pydub import AudioSegment
from pydub.effects import normalize

# Create a queue to store audio chunks
q = queue.Queue()
recording = False
audio_frames = []

# Callback function that runs when new audio data is available
def audio_callback(indata, frames, time, status):
    if status:
        print(status)
    q.put(indata.copy())
    audio_frames.append(indata.copy())

# Function to start recording
def start_recording(samplerate=44100, channels=1):  # Changed to mono
    global recording, audio_frames
    
    # Clear previous recording data
    audio_frames = []
    
    # Set recording flag
    recording = True
    
    # Start recording in a separate thread
    def record_thread():
        try:
            # Start the input stream with callback
            with sd.InputStream(samplerate=samplerate, channels=channels, dtype='float32', callback=audio_callback):
                print("Recording started. Use stop_recording() to stop.")
                
                # Keep the stream open until recording is set to False
                while recording:
                    sd.sleep(100)  # Sleep to reduce CPU usage
        except Exception as e:
            print(f"Error in recording: {e}")
    
    # Start the recording thread
    thread = threading.Thread(target=record_thread)
    thread.daemon = True
    thread.start()
    
    return thread

# Function to stop recording and save the audio as MP3
def stop_recording(filename="recording.mp3", samplerate=44100):
    global recording
    
    if not recording:
        print("Not currently recording.")
        return None
    
    # Stop the recording
    recording = False
    
    # Wait for any remaining audio data
    print("Stopping recording...")
    sd.sleep(50)  # Give time for the last audio chunks to be processed
    
    # Save the recording if we have audio frames
    if audio_frames:
        # Concatenate all recorded frames
        recorded_data = np.concatenate(audio_frames)
        
        # Normalize the audio data
        recorded_data = recorded_data / np.max(np.abs(recorded_data))
        
        # Convert float32 (-1 to 1) to int16 (-32768 to 32767)
        recorded_data = (recorded_data * 32767).astype(np.int16)
        
        # Create AudioSegment
        audio = AudioSegment(
            recorded_data.tobytes(),
            frame_rate=samplerate,
            sample_width=2,  # 16-bit
            channels=1  # Mono
        )
        
        # Apply audio enhancements
        audio = audio.compress_dynamic_range()  # Compress dynamic range
        audio = normalize(audio)  # Normalize volume
        audio = audio.high_pass_filter(80)  # Remove low frequency noise
        audio = audio.low_pass_filter(10000)  # Remove high frequency noise
        
        # Export as MP3
        audio.export(filename, format="mp3", bitrate="128k")
        print(f"Enhanced MP3 file saved to {filename}")
        
        return filename
    else:
        print("No audio recorded")
        return None

# Example usage
print("Starting recording. Press Ctrl+C to stop...")
record_thread = start_recording()

# Wait for user to stop recording
input("Press Enter to stop recording...\n")

# Stop recording and save file
mp3_file = stop_recording()

Starting recording. Press Ctrl+C to stop...
Recording started. Use stop_recording() to stop.
Stopping recording...
Enhanced MP3 file saved to recording.mp3


In [None]:
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import librosa

# Load model and processor
processor = WhisperProcessor.from_pretrained("openai/whisper-small")
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
model.config.forced_decoder_ids = None

# Load the audio file
input_speech_path = "/home/prasun/Desktop/ADHYAYAN_MITRA/testing/recording.mp3"
# Using librosa to load the audio file
audio_array, original_sampling_rate = librosa.load(input_speech_path, sr=16000)

# Process the loaded audio data
input_features = processor(audio_array, sampling_rate=16000, return_tensors="pt").input_features

# Generate token ids
predicted_ids = model.generate(input_features)

# Decode the predicted ids to get the transcription
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
print(transcription)

Due to a bug fix in https://github.com/huggingface/transformers/pull/28687 transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English.This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass `language='en'`.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


# Pipeline

In [None]:
import sys
import os
from pathlib import Path

# Get the current notebook path
notebook_path = Path().resolve()  # Returns: /ADHYAYAN_MITRA/testing

# Add project root to Python path
project_root = (notebook_path.parent).parent  # Goes up to /ADHYAYAN_MITRA
sys.path.append(str(project_root))

# Now import your components
from components.audio_recorder.recorder import AudioRecorder
from components.whisper.whisper_small import AudioTranscriptor

In [None]:
project_root

PosixPath('/home/prasun/Desktop/ADHYAYAN_MITRA')

In [None]:
notebook_path

PosixPath('/home/prasun/Desktop/ADHYAYAN_MITRA/components/transcripter_api')

In [None]:
import sys
import os
from pathlib import Path

# Get the current notebook path
notebook_path = Path().resolve()  # Returns: /ADHYAYAN_MITRA/testing

# Add project root to Python path
project_root = (notebook_path.parent)  # Goes up to /ADHYAYAN_MITRA
sys.path.append(str(project_root))

# Now import your components
from components.audio_recorder.recorder import AudioRecorder
from components.whisper.whisper_small import AudioTranscriptor

recording = input("""
Enter Yes -> If you need to record the audio and then transcribe it.
Enter No -> If you want to transcribe the pre-recorded audio. 
""")

if recording == "Yes":
    # Initialize components
    recorder = AudioRecorder()

    # Start recording
    print("Starting recording...")
    recorder.start_recording()

    input()    
    # Stop and save
    audio_path = recorder.stop_recording()
elif recording == "No":
    files = os.listdir(".")
    if "recording.mp3" in files:
        audio_path = os.path.abspath("recording.mp3")
    else:
        print("Recording needs to be done")

In [None]:
audio_path

'/home/prasun/Desktop/ADHYAYAN_MITRA/components/transcripter_api/recording.mp3'

In [None]:
if audio_path:
    transcriptor = AudioTranscriptor()
    print(f"Saved recording to: {audio_path}")
    print("Transcribing...")
    transcript = (transcriptor.transcribe(audio_path))
else:
    print("Recording failed")

Saved recording to: /home/prasun/Desktop/ADHYAYAN_MITRA/testing/recording.mp3
Transcribing...


Due to a bug fix in https://github.com/huggingface/transformers/pull/28687 transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English.This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass `language='en'`.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


In [None]:
transcript

" I've said to many people that live in a world where knowledge and skills are the most important thing. That's the goal that you are after. More than money, you want to be able to do it.  to develop knowledge and skills. And so this person asks, what would be the three or so skills that are the most essential moving forward in the 21st century? And it's not like specific skills that I'm going to talk about like coding or mastery AI or going to some business strategy. Because I think that's not really what the spirit of the question is. In general, there are certain kind of personal skills that you want to develop.  So number one, and this is something that I have in my book, Mastery, you must see being social and getting along with people and cooperating knowing how to work with people as a skill It's not something you were born with. It's not something that some people are good but bad or bad at like anything It is a skill that you develop Being social and knowing how to work with pe

In [None]:
# Specify the file name
file_name = "transcript.txt"

# Use `with` to open the file in write mode
with open(file_name, "w") as file:
    # Write content to the file
    file.write(transcript)

print(f"File '{file_name}' has been created and written successfully.")

File 'transcript.txt' has been created and written successfully.


In [None]:
with open('transcript.txt','r') as file:
    print(file.read())

 I've said to many people that live in a world where knowledge and skills are the most important thing. That's the goal that you are after. More than money, you want to be able to do it.  to develop knowledge and skills. And so this person asks, what would be the three or so skills that are the most essential moving forward in the 21st century? And it's not like specific skills that I'm going to talk about like coding or mastery AI or going to some business strategy. Because I think that's not really what the spirit of the question is. In general, there are certain kind of personal skills that you want to develop.  So number one, and this is something that I have in my book, Mastery, you must see being social and getting along with people and cooperating knowing how to work with people as a skill It's not something you were born with. It's not something that some people are good but bad or bad at like anything It is a skill that you develop Being social and knowing how to work with peo

# pipeline for all in one document parsing to .md

In [None]:
# from docling.document_converter import DocumentConverter
# from pathlib import Path

# # Define the directories
# sources_dir = Path("Docs/")
# save_dir = Path("Docs/save")

# # Create save directory if it doesn't exist
# save_dir.mkdir(parents=True, exist_ok=True)

# # Collect source links or file paths from user input
# source_links = []
# while True:
#     new_link = input("Enter link or file path (type 'done' to finish): ")
#     if new_link.lower() == 'done':
#         break
#     source_links.append(new_link)

# # Combine files from the directory and user-provided links
# all_sources = []

# # Add user-provided links or file paths
# for link in source_links:
#     all_sources.append(link)  # Convert user input to Path objects

# # Process all collected sources
# for i, source in enumerate(all_sources):
#     try:
#         converter = DocumentConverter()
#         result = converter.convert(source)  # Convert the file/link
#         text = result.document.export_to_markdown()

#         # Save the markdown output to a file
#         output_path = save_dir / f"converted_{i}.md"
#         with open(output_path, "w", encoding="utf-8") as file:
#             file.write(text)

#         print(f"Converted {source} to {output_path}")
#     except Exception as e:
#         print(f"Error processing {source}: {e}")


Error processing : [Errno 21] Is a directory: '.'
Error processing : [Errno 21] Is a directory: '.'
Error processing : [Errno 21] Is a directory: '.'
Error processing : [Errno 21] Is a directory: '.'


# Doc pipeline

In [None]:
from docling.document_converter import DocumentConverter
from pathlib import Path

path = Path(input("Enter the path to the document(format PDF, MD, DOCX)"))
if path.is_file() and path.suffix.lower() in ['.pdf', '.md', '.docx']:
    converter = DocumentConverter()
    result = converter.convert(path)
    text = (result.document.export_to_markdown())
else:
    print("The file needs to be PDF, DOCX or MD format.")

In [None]:
print(text)

# Comprehensive Report on System Design

System design is the process of defining the architecture, components, interfaces, and data models for building software systems that meet specific requirements efficiently and effectively. It transforms user requirements into a structured blueprint that guides the development of reliable, scalable, and maintainable software solutions[1].

## Definition and Fundamentals

System design encompasses planning and structuring complex systems to fulfill both functional and non-functional requirements. It involves making crucial decisions about how different components will interact to achieve desired functionality[2]. A well-designed system aims to be:

- **Reliable**: Handles faults, failures, and errors gracefully

- **Effective**: Meets all user needs and business requirements

- **Maintainable**: Remains flexible and easy to scale or modify[9]

## Key System Design Principles

### SOLID Principles

The SOLID principles provide fundamental guidelin

In [None]:
from transformers import AutoTokenizer

# Load the tokenizer for Qwen2.5-0.5B-Instruct
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")

def count_tokens(input_text):
    # Tokenize the input text
    tokens = tokenizer(input_text, return_tensors="pt")

    # Count the number of tokens
    num_tokens = len(tokens["input_ids"][0])
    
    return num_tokens

print(f"Number of tokens: {count_tokens(text)}")

Number of tokens: 2407


In [None]:
# import re
# from langchain_core.prompts import PromptTemplate
# from langchain_huggingface.llms import HuggingFacePipeline
# from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# model_id = "Qwen/Qwen2.5-0.5B-Instruct"
# tokenizer = AutoTokenizer.from_pretrained(model_id)
# model = AutoModelForCausalLM.from_pretrained(model_id)
# pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=3000)
# hf = HuggingFacePipeline(pipeline=pipe)

# def summarizer(input_text):
#     template = """
#     You are tasked with summarizing a document in a clear, concise, and professional manner. 
#     Your summary should retain all critical information while eliminating unnecessary details. 

#     To guide your approach, here is an example:
#     Document:
#     {ex_text}
#     Summary:
#     {ex_summarized_text}

#     Now, summarize the following document:
#     {text}

#     Make sure the response is in Markdown format.
#     """

#     prompt = PromptTemplate.from_template(template)

#     chain = prompt | hf.bind(skip_prompt=True)
#     chain = prompt| model
#     text = chain.invoke({"ex_text":ex_text,"ex_summarized_text":ex_summarized_text,"text": input_text})
#     pattern = r"```markdown\n(.*?)$"
#     result = re.search(pattern, text, re.DOTALL)

#     if result:
#         text = result.group(1).strip()

#     return (text)

In [None]:
# import re
# from langchain_core.prompts import PromptTemplate
# from langchain_huggingface.llms import HuggingFacePipeline
# from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
# import torch

# # Configure quantization
# quantization_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_compute_dtype=torch.float16,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_use_double_quant=True
# )

# model_id = "Qwen/Qwen2.5-0.5B-Instruct"
# tokenizer = AutoTokenizer.from_pretrained(model_id)

# # Load model with quantization
# model = AutoModelForCausalLM.from_pretrained(
#     model_id,
#     quantization_config=quantization_config,
#     device_map="auto"
# )

# # Configure generation parameters for efficiency
# pipe = pipeline(
#     "text-generation", 
#     model=model, 
#     tokenizer=tokenizer, 
#     max_new_tokens=3000,
#     torch_dtype=torch.float16
# )

# hf = HuggingFacePipeline(pipeline=pipe)

# def summarizer(input_text):
#     template = """
#     You are tasked with summarizing a document in a clear, concise, and professional manner. 
#     Your summary should retain all critical information while eliminating unnecessary details. 

#     To guide your approach, here is an example:
#     Document:
#     {ex_text}
#     Summary:
#     {ex_summarized_text}

#     Now, summarize the following document:
#     {text}

#     Make sure the response is in Markdown format.
#     """

#     prompt = PromptTemplate.from_template(template)

#     chain = prompt | hf.bind(skip_prompt=True)
#     text = chain.invoke({"ex_text":ex_text,"ex_summarized_text":ex_summarized_text,"text": input_text})

#     pattern = r"```markdown\n(.*?)```"
#     result = re.search(pattern, text, re.DOTALL)

#     if result:
#         text = result.group(1).strip()
    
#     return (text)

The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.
CUDA is required but not available for bitsandbytes. Please consider installing the multi-platform enabled version of bitsandbytes, which is currently a work in progress. Please check currently supported platforms and installation instructions at https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend


RuntimeError: CUDA is required but not available for bitsandbytes. Please consider installing the multi-platform enabled version of bitsandbytes, which is currently a work in progress. Please check currently supported platforms and installation instructions at https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend

In [None]:
# import re
# from langchain_core.prompts import PromptTemplate
# from langchain_ollama.llms import OllamaLLM

# def summarizer(input_text):
#     model = OllamaLLM(
#         model="qwen2.5:0.5b",
#         temperature=0,
#     )

#     template = """
#     You are tasked with summarizing a document in a clear, concise, and professional manner. 
#     Your summary should retain all critical information while eliminating unnecessary details. 

#     To guide your approach, here is an example:
#     Document:
#     {ex_text}
#     Summary:
#     {ex_summarized_text}

#     Now, summarize the following document:
#     {text}

#     Make sure the response is in Markdown format.
#     """

#     prompt = PromptTemplate.from_template(template)

#     chain = prompt| model
#     text = chain.invoke({"ex_text":ex_text,"ex_summarized_text":ex_summarized_text,"text": input_text})

#     pattern = r"```markdown\n(.*?)```"
#     result = re.search(pattern, text, re.DOTALL)

#     if result:
#         text = result.group(1).strip()
    
#     return (text)

In [None]:
# pipeline
while count_tokens(text) >= 1500:
    text = summarizer(text)
    print(f"Number of tokens: {count_tokens(text)}")
    print(text)
    print("------------------------------------------------------------")

Number of tokens: 539
```markdown
# System Design: A Comprehensive Guide for Beginners

## Introduction to System Design

System design is a crucial aspect of software development, focusing on how systems are built and managed. It involves understanding the requirements, designing the architecture, implementing the system, and ensuring its reliability and scalability.

### Key Concepts in System Design

1. **Requirements Gathering**: Identifying user needs and defining the system's goals.
2. **System Architecture**: Planning the overall structure of the system, including components, interfaces, and communication mechanisms.
3. **Database Management**: Designing the database schema to store data efficiently and securely.
4. **User Interface (UI)**: Creating a user-friendly interface for users interacting with the system.
5. **Performance Optimization**: Ensuring that the system can handle high loads and perform well under various conditions.

### Best Practices in System Design

1. **Mo

In [None]:
import os
from pathlib import Path
import asyncio

os.chdir('..')
os.chdir('components')
print(Path('.').cwd())

from doc_pipeline.pipeline import DocumentProcessor
processor = DocumentProcessor()
result = processor.process_document()
print(result)
# os.chdir('..')
# os.chdir('transcripter_api')
# print(Path('.').cwd())
# from transcripter import transcribe_audio
# transcribe_audio()


/home/prasun/Desktop/ADHYAYAN_MITRA/components


ModuleNotFoundError: No module named 'doc_ex'

In [None]:
processor = DocumentProcessor()
result = processor.process_document()
print(result)

Document exceeds 2000 tokens. Summarizing...
# System Design: A Comprehensive Guide for Beginners

## Introduction to System Design

System design is a critical aspect of software development, ensuring that an application or system meets its functional and non-functional requirements. It involves several key components:

1. **Requirements Gathering**: Identifying the user needs and defining the system's goals.
2. **Architecture Planning**: Defining the overall structure and components of the system.
3. **Design**: Creating a detailed blueprint for how the system will be built.
4. **Implementation**: Writing code to implement the design.
5. **Testing**: Ensuring that the system meets its functional and non-functional requirements.

## Key Concepts in System Design

- **Scalability**: The ability of an application or system to handle increased loads without degradation in performance.
- **Reliability**: The assurance that a system will operate correctly under various conditions.
- **Main

In [None]:
from transformers import AutoTokenizer

# Load the tokenizer for Qwen2.5-0.5B-Instruct
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")

def count_tokens(input_text):
    # Tokenize the input text
    tokens = tokenizer(input_text, return_tensors="pt")

    # Count the number of tokens
    num_tokens = len(tokens["input_ids"][0])
    
    return num_tokens

print(f"Number of tokens: {count_tokens(doc_result)}")

## Testing

In [1]:
import sys
import os

# Add the parent directory to the path so Python can find the components
sys.path.append(os.path.abspath('..'))

In [2]:
# Now you can import components
from components.doc_pipeline.pipeline import DocumentProcessor
from components.transcripter_api import transcripter 
from components.select_llm import llm
from components.gap_analyzer import analyzer

In [4]:
trans_result = transcripter.transcribe_audio()
print(trans_result)

Processing: /home/prasun/Desktop/ADHYAYAN_MITRA/components/transcripter_api/recording.mp3


Due to a bug fix in https://github.com/huggingface/transformers/pull/28687 transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English.This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass `language='en'`.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Transcription completed in 12.5s
Transcript saved to /home/prasun/Desktop/ADHYAYAN_MITRA/components/transcripter_api/transcript.txt
 Alright, let me try to talk through what I remember from the rise of nationalism in Europe. I'll just say what comes to mind and see where I get stuck. So, a nation is basically a big group of people who share things like language, history and I think live under one government. I remember reading about Frederick Sorio. He was some French artist in 1848. He made these prints showing people from different countries marching together towards the Statue of Liberty.  I think those stood for the end of old monarchy or something, but I'm not totally sure. The French Revolution was a big deal for nationalism. It made people in France feel more united, and they set up a republic after getting rid of the king. Then Napoleon came, and he made a lot of changes. I know he made everyone equal before the law and got rid of privileges by birth, but I'm a bit fuzzy on wha

In [5]:
doc_result = DocumentProcessor().process_document()
print(doc_result)

Document has 1686 tokens. Using raw document.
## The Rise of Nationalism in Europe Class 10 Notes History Chapter 1

The Rise of Nationalism in Europe Class 10 Notes Social Science History Chapter 1 SST Pdf free download is part of [Class 10 Social Science Notes](https:/www.learncbse.in/class-10-social-science-notes) for Quick Revision. Here we have given The Rise of Nationalism in Europe Class 10 History Chapter 1 Notes. According to new CBSE Exam Pattern, [MCQ Questions For Class 10 Social Science with Answers](https:/www.learncbse.in/mcq-questions-for-class-10-social-science-with-answers) Carries 20 Marks.

| Board        | CBSE                              |
|--------------|-----------------------------------|
| Textbook     | NCERT                             |
| Class        | Class 10                          |
| Subject      | Social Science Notes              |
| Chapter      | History Chapter 1                 |
| Chapter Name | The Rise of Nationalism in Europe |
| Category 

# Building the Gap_Analysis Pipeline

In [17]:
from components.select_llm import llm
llm = llm.set_llm()

In [18]:
print("google" if "models/gemma-3-27b-it" in llm.model else "ollama")

google


In [19]:
analysis = analyzer.learning_gap(
    llm = llm,
    doc_result=doc_result,
    trans_result=trans_result
)

In [20]:
from IPython.display import display, Markdown

display(Markdown(analysis))

## Learning Gap Analysis: The Rise of Nationalism in Europe - Student Response

Here's a detailed analysis of the student's understanding of "The Rise of Nationalism in Europe," based on their provided response and the course learning material.

**1. DEMONSTRATED KNOWLEDGE:**

The student demonstrates a good grasp of the *broad strokes* of the topic. Specifically:

* **Definition of Nation:** They accurately define a nation as a group sharing common descent, language, history, and government.
* **Sorrieu’s Vision:** They correctly recall Sorrieu’s prints and their symbolic representation of a unified Europe marching towards liberty. They understand the prints represented the end of absolutist institutions.
* **French Revolution & Nationalism:** They correctly identify the French Revolution as a catalyst for nationalism and the shift from monarchy to republic.
* **Italian & German Unification – Key Figures & Events:** They accurately identify key figures like Mazzini, Cavour, Garibaldi, and Bismarck, and the general sequence of events in both Italian and German unification. They correctly identify Sardinia-Piedmont’s role in Italy and Prussia’s in Germany.
* **Female Allegories:** They remember Marianne and Germania as symbols of nationalism and recall some details about Marianne (red cap, presence on coins/stamps).

**2. KNOWLEDGE GAPS:**

Several important concepts are missing or inadequately addressed:

* **Napoleonic Code – Specific Reforms:** While they mention Napoleon made everyone equal before the law, they lack detail regarding the full scope of the Napoleonic Code (abolition of feudalism, guild restrictions, standardized weights & measures, improved transport).
* **Conservative Reaction Post-Napoleon:** The student mentions conservatism after Napoleon’s defeat but doesn’t elaborate on *what* conservative regimes did to suppress revolutionary ideas (restoration of old monarchies, censorship, etc.). The role of the Congress of Vienna is completely absent.
* **Rise of the Middle Class & Liberalism:** The material highlights the role of the middle class and liberalism as driving forces of nationalism. This is not mentioned by the student.
* **Industrialization’s Impact:** The connection between industrialization, the emergence of a working class, and the spread of nationalist ideas is missing.
* **Nationalism & Imperialism – Detailed Connection:** The student acknowledges this as an area needing review, but the material emphasizes how nationalism, in its later stages, became aggressive and fueled imperialistic competition, ultimately contributing to WWI. This nuanced shift isn’t understood.
* **Detailed Understanding of Allegories:** The student knows *that* Marianne and Germania were used, but doesn’t fully grasp *why* the mother figure was chosen as a symbol of the nation (Matribhumi concept).
* **The Frankfurt Assembly’s Failure – Reasons:** The student states the King of Prussia rejected the offer, but doesn’t explain *why* (fear of losing power, opposition from Junkers and military).



**3. MISCONCEPTIONS:**

* **Napoleon’s Reforms – Limited View:** The student’s understanding of Napoleon’s reforms is limited to equality before the law. This suggests they see it as a purely legal change, rather than a systemic overhaul of French society. *Correct Concept:* Napoleon’s reforms were far-reaching, impacting administration, property rights, economic systems, and social structures.
* **Cavour’s Role – Misspelling & Limited Detail:** The student spells Cavour as “appeed Montt” and doesn’t fully articulate his strategic role in utilizing diplomacy and war to achieve unification. *Correct Concept:* Cavour was a master diplomat who skillfully maneuvered Sardinia-Piedmont into a position to lead the unification process.

**4. DEPTH OF UNDERSTANDING:**

The student demonstrates primarily *surface-level* knowledge. They can recall names, dates, and general events, but struggle with the underlying causes, consequences, and complexities of the historical processes. Their understanding is largely descriptive rather than analytical. For example, they can *state* that Bismarck fought wars, but don’t explain *why* he chose those specific conflicts or how they contributed to unification. The connection between the different elements (e.g., economic changes, social classes, political ideologies) is weak.

**5. LEARNING PRIORITIES:**

1. **Napoleonic Code & Conservative Reaction (High Priority):** Understanding the full impact of Napoleon’s reforms and the subsequent conservative backlash is crucial for understanding the context in which nationalism developed.
2. **Rise of Middle Class, Liberalism & Industrialization (High Priority):** These factors were fundamental drivers of nationalist sentiment and need to be thoroughly understood.
3. **Nationalism & Imperialism (Medium Priority):** Grasping the shift in nationalist ideology from a force for unification to a justification for imperial expansion is essential for understanding 19th and 20th-century history.
4. **Frankfurt Assembly’s Failure & Allegory Depth (Medium Priority):** Understanding the reasons behind the Frankfurt Assembly’s failure and the symbolism of the female allegories will add nuance to their understanding.
5. **Precise Dates & Spelling (Low Priority):** While accuracy is important, focusing on conceptual understanding is more critical at this stage.



**6. TARGETED RECOMMENDATIONS:**

* **Napoleonic Code & Conservative Reaction:**
    * **Activity:** Create a table comparing and contrasting the key features of the Napoleonic Code with the policies of the conservative regimes that followed.
    * **Resource:** Re-read the section on the Napoleonic Code in the textbook and research the Congress of Vienna.
* **Rise of Middle Class, Liberalism & Industrialization:**
    * **Activity:** Write a short essay explaining how the rise of the middle class, the spread of liberal ideas, and industrialization contributed to the growth of nationalism in Europe.
    * **Resource:** Consult additional resources on the Industrial Revolution and the rise of liberalism in 19th-century Europe.
* **Nationalism & Imperialism:**
    * **Activity:** Research examples of how nationalism was used to justify imperialistic expansion (e.g., French colonialism in Africa, British imperialism in India).
    * **Resource:** Explore secondary sources that discuss the relationship between nationalism and imperialism.
* **Frankfurt Assembly’s Failure & Allegory Depth:**
    * **Activity:** Research the social groups (Junkers, military) that opposed the Frankfurt Assembly and explain their motivations.  Write a paragraph explaining why a *mother* figure was chosen to represent the nation.
    * **Resource:** Consult historical analyses of the Frankfurt Assembly and explore the concept of national allegory.
* **Dates & Spelling:**
    * **Activity:** Create flashcards with key dates and names to aid memorization.



This detailed analysis should provide the instructor with valuable insights into the student’s understanding and guide targeted interventions to address the identified learning gaps. It also provides the student with a clear roadmap for focused review and improvement.

# Generating question from the above report 

In [21]:
from components.question_generator import questions

question = questions.question_gen(
    llm=llm,
    doc_result=doc_result,
    trans_result=trans_result,
    analysis=analysis,
)

In [24]:
print("Questions\n")
for i,que in enumerate(question):
    print(f"{i+1}. {que}")
print()    

Questions

1. You mentioned Napoleon made everyone equal before the law. Can you elaborate on *how* he achieved this equality, and what other significant changes did the Napoleonic Code introduce that impacted French society beyond just legal rights?
2. After Napoleon's defeat, you noted a conservative reaction. What specific steps did conservative governments take to *undo* the changes brought about by the French Revolution and Napoleon, and how successful were they in suppressing nationalist ideas?
3. The rise of the middle class isn't mentioned in your response. How do you think the growth of a middle class in Europe during the 19th century contributed to the development of nationalist sentiment? Consider their economic interests and social values.
4. You correctly identified the role of Prussia in German unification. However, the Frankfurt Assembly attempted unification *before* Bismarck. Why did this attempt ultimately fail, and what obstacles did they face that Bismarck was able 

In [None]:
answers = []

for i in range(len(question)):
    ans = input(f"""Enter the answer to:\n {question[i]}""")
    answers.append(ans)

# Create a list of question answer pairs {"question", "answer"} from 2 list that contains question and answer
qa_pairs = [{"question":q, "answer":a} for q,a in zip(question, answers)]

NameError: name 'question' is not defined

In [None]:
print(qa_pairs)

In [None]:
# learner will provide answer to every questions in the list where the model will have the knowledge of the Material, transcript and analysis to access the student's answer.
from langchain.prompts import PromptTemplate

answer_checker_prompt = PromptTemplate.from_template(
    """
    You are a Teaching Assistant responsible for evaluating a student's answers to questions based on course material. Your goal is to provide detailed feedback on the student's understanding and identify any gaps in their knowledge.

    LEARNING MATERIAL:
    {doc1}

    STUDENT'S RESPONSE/KNOWLEDGE (from transcript):
    {doc2}

    LEARNING GAP ANALYSIS:
    {doc3}

    Question:
    {question}

    STUDENT'S ANSWER:
    {doc4}

    Instructions:
    - Assess the student's answer against the learning material, transcript, and gap analysis.
    - Provide feedback on the accuracy and completeness of the answer.
    - Identify any remaining gaps or misconceptions.
    - Suggest improvements or additional resources if necessary.
    """
)

chain = answer_checker_prompt | llm

# our approach will not use the for loop. Needs to check all the answers at a time insead of simultanious llm calls(heavy system load)
# for question in questions_list:
#     print(f"Question: {question}")
#     answer = input("Your answer: ")
#     feedback = chain.invoke({"doc1": doc_result, "doc2":trans_result, "doc3": analysis,"question":question, "doc4": answer})
#     print(f"Feedback: {feedback}")