In [None]:
print("Hello")

## Testing out Ollama with Subprocess library

In [None]:
import subprocess

In [None]:
prompt = "Hello"

result = subprocess.run(
    ["ollama", "run", "llama3.2", "--prompt", prompt], 
    capture_output=True, 
    text=True
)

# Print the output from the model
print(result.stdout)

In [None]:
import ollama
response = ollama.chat(model='llama3.2', messages=[
  {
    'role': 'user',
    'content': 'Why is the sky blue?',
  },
])
print(response['message']['content'])

In [None]:
prompt = """
You are part of an application called CLUSTER. This application allows teachers/professors to upload their lecture content and
receive questions on their material from AI students. This is where you come in. You will be tasked with behaving like an AI student
and generating questions to ask on the material. 
"""

In [None]:
prompt = """
You are an AI student within an application called CLUSTER, designed to assist teachers and professors in refining their teaching materials by interacting with virtual students. Your primary role is to analyze uploaded lecture content and generate insightful, relevant, and diverse questions based on the material.

To fulfill this role:
- Review the lecture content carefully, identifying key concepts, challenging areas, and potential ambiguities that a real student might question.
- Generate questions that:
  - Clarify or probe deeper into complex topics.
  - Encourage critical thinking or application of knowledge.
  - Test understanding or recall of essential details.
  - Spark curiosity about broader implications or connections to related fields.
- Consider varying question types, such as factual, conceptual, and application-based, to simulate real student interactions.
  
Your goal is to help educators by mimicking authentic student curiosity and comprehension challenges, ultimately enhancing the clarity and engagement of their lecture content.
"""


## GGUF Tests below with Mistral AI

In [None]:
from llama_cpp import Llama

# model_path = "llm_gguf/mistral-7b-instruct-v0.1.Q4_K_M.gguf"
# model_path = "llm_gguf/mistral-7b-v0.1.Q4_K_M.gguf"
model_path = "llm_gguf/mistral-7b-v0.1.Q3_K_M.gguf"
llm = Llama(model_path=model_path, n_ctx=256, use_gpu=True, n_gpu_layers=20)  

messages = [
    {"role": "system", "content": prompt},
    {"role": "user", "content": "Today we will learn how to add two numbers together. When you add 2 + 2, we get 4."}
]

output = llm.create_chat_completion(messages)
print(output['choices'][0]['message']['content'])

In [None]:
from llama_cpp import Llama
llm = Llama(
      model_path="llm_gguf/mistral-7b-v0.1.Q3_K_M.gguf",
      n_gpu_layers=-1, # will try offload all possible layers to GPU
      )

In [None]:
from llama_cpp import Llama

messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "What's the capital of France?"}
]

output = llm.create_chat_completion(messages)
print(output['choices'][0]['message']['content'])

## Testing New LLM Class

In [None]:
from llm import LLM
llm = LLM()

In [None]:
llm.load_prompt(prompt="default_student")

In [None]:
test1 = llm.query(query="Today we will learn how to add two numbers together. When you add 2 + 2, we get 4.")

In [None]:
test1

## Testing MongoDB Atlas

In [None]:
from dotenv import load_dotenv
import os

load_dotenv() 
uri = os.getenv("MONGO_DB_CONNECTION_STRING")

print(uri)

In [None]:

from pymongo.mongo_client import MongoClient
from dotenv import load_dotenv
import os

load_dotenv() 
uri = os.getenv("MONGO_DB_CONNECTION_STRING")

# Create a new client and connect to the server
client = MongoClient(uri)

# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

In [None]:
db = client['user_db']

In [None]:
collection = db['user_credentials']

In [None]:
json_test = {
    "hello": "world"
}

In [None]:
collection.insert_one(json_test)

In [None]:
from utilities.llm

llm = LLM()

In [None]:
from requests

In [None]:
from utilities.extraction_tool import ExtractionTool

In [None]:
tool = ExtractionTool()

In [None]:
extracted_text = tool.extract_text_from_pdf("C:\\Users\\marti\\CLUSTER\\test_docs\\Lorem_ipsum.pdf")

In [None]:
extracted_text

In [None]:
extracted_pp = tool.extract_text_from_pptx("C:\\Users\\marti\\CLUSTER\\test_docs\\samplepptx.pptx")

In [None]:
extracted_pp

In [None]:
from utilities.llm import LLM

In [None]:
llm = LLM()

In [None]:
llm.query(query = "Why is the sky blue?")

In [None]:
from utilities.ollama_llm import OllamaLLM
ollama_llm = OllamaLLM()

In [None]:
ollama_llm.query("2+2 is 4, are there any questions on this?")

## Testing function to summarise PDF before passing to LLM.

In [1]:
import fitz  # PyMuPDF

def extract_text_from_pdf(file_path):
    text = ""
    try:
        # Open the PDF file
        pdf_document = fitz.open(file_path)
        for page_num in range(len(pdf_document)):
            page = pdf_document.load_page(page_num)
            text += page.get_text() + "\n"
        pdf_document.close()
    except Exception as e:
        print(f"An error occurred: {e}")
    
    return text

In [2]:
pdf_content = extract_text_from_pdf("../CS433_L2_2024.pdf")

In [3]:
len(pdf_content)

13636

In [4]:
from llama_index.core.node_parser import SentenceSplitter

In [5]:
node_parser = SentenceSplitter(chunk_size=512, chunk_overlap=20)

In [6]:
sentences = node_parser.split_text(pdf_content)

In [10]:
sentences

['CS433 Modern Architectures\nVideo 2\nThe machine cycle\nThis video is the copyright of Maynooth University and may not be copied, or reposted.\nCreated for streaming using Panopto within MU Moodle only.\nCM Lecture 2\n\nTopic 1.1: Types of computer\nThere are many ways that we could choose to compute\nThe DEC (Digital Electronic Computer) is by far the most popular\nNeurones\nIBM TrueNorth\nRetinal Prosthesis\nLight\nSpatial filtering     Holography\nOptical transistor\nQuantum\nD-Wave 128 Qubits\nn Qubits contains superposition of 2n states\nDNA\nDifference Engine\nPin Wheel\nMechanical\nStructure\nElectrical\nTransistor\nIC\nn bits can be in only one of 2n states\nValve\nRelay\nCoding of data\n\nAnalogue\nDifferential Equation\nDigital\n+\n-\nA\nB\n-(A+B)\nAnalog adder circuit\n+\n-\nA\nDifferentiator\nResponse\nAn analogue signal is continuous (can have all \nvalues in a range).  The signal is processed by \nelectronics \nthat \ncan \namplify, \nmultiply, \ndifferentiate and integ

In [8]:
from utilities.ollama_llm import OllamaLLM
llm = OllamaLLM()

In [None]:
summaries = []
for sentence in sentences:
    response = llm.summarise(sentence)
    summaries.append(response)

This lecture introduces various types of computers, including analog and digital systems. Analog signals are continuous and can have all values in a range, while digital signals are discrete and can only have two values. The lecture also covers different types of computers such as Neurones, IBM TrueNorth, and Retinal Prosthesis, and discusses the basics of digital logic circuits like AND, XOR, and adder circuits. Additionally, it touches on computer architectures including Harvard and Von Neumann models, where memory and data buses are separate for instructions and operands respectively.
Overview of Computer Architecture:

* ALU (Arithmetic Logic Unit) is responsible for performing arithmetic and logical operations
* Registers store data temporarily during execution
* Bus Interface manages communication with memory and peripherals
* Instructions guide the processor's actions
* Address bus carries address information to memory, while data bus carries data between components

Key Concept

In [11]:
test = """
This lecture introduces various types of computers, including analog and digital systems. Analog signals are continuous and can have all values in a range, while digital signals are discrete and can only have two values. The lecture also covers different types of computers such as Neurones, IBM TrueNorth, and Retinal Prosthesis, and discusses the basics of digital logic circuits like AND, XOR, and adder circuits. Additionally, it touches on computer architectures including Harvard and Von Neumann models, where memory and data buses are separate for instructions and operands respectively.
Overview of Computer Architecture:

* ALU (Arithmetic Logic Unit) is responsible for performing arithmetic and logical operations
* Registers store data temporarily during execution
* Bus Interface manages communication with memory and peripherals
* Instructions guide the processor's actions
* Address bus carries address information to memory, while data bus carries data between components

Key Concepts:

* Von Neumann architecture: instructions and data share same memory space
* Harvard architecture: separate memory for instructions and data (e.g. x86)
* Microprocessors have control unit, execution unit, and bus interface unit
* Key registers include:
  - ES (Extra Segment)
  - CS (Code Segment)
  - SS (Stack Segment)
  - DS (Data Segment)
  - IP (Instruction Pointer)
  - DL (Data Link)
  - SP (Stack Pointer)
  - BP (Base Pointer)
  - SI (Source Index)
  - AH (Auxiliary Header)
  - BL (Byte Length)
  - CL (Character Length)
  - BH (Byte High)
  - CH (Character High)
  - DH (Double-Word High)
  - AL (Arithmetic Logic Unit)

Processor Components:

* CPU (Central Processing Unit)
* Memory
* I/O (Input/Output) components

Microprocessor Block Diagram:
 
* BIU (Bus Interface Unit): manages memory and port access
* EU (Execution Unit): executes instructions, contains ALU and control circuitry
The control unit fetches instructions from a queue, which is a first-in-first-out store of 6 bytes that keeps main memory accessed only for each instruction using pipelining. The Arithmetic Logic Unit (ALU) in the 8086 can perform various operations including addition, subtraction, AND, OR, XOR, and shift on 16-bit binary numbers by connecting inputs to multiple functions in parallel through a multiplexer.
The text discusses digital circuits and logic operations, including multiplexers, AND gates, OR gates, half adders, full adders, and 4-bit adder/subtractors. It explains how these components work together to perform arithmetic operations such as addition, subtraction, and shifting. The text also introduces the concept of twos complement representation for numbers.
Here are the summaries:

Adding two's complement gives the same result as subtracting the number.
Registers and latches store data in D-type flip-flops that change on clock edges.
A latch becomes a register when it stores data on each clock edge.
The CPU uses registers with specific purposes, such as accumulator for calculations or instruction pointer.
A simple calculator uses an ALU to perform arithmetic operations with results stored in the accumulator.
The text describes a simple computer architecture with an Accumulator, Arithmetic Logic Unit (ALU), and Latch. The program uses the ALU to perform basic arithmetic and logical operations, such as addition, subtraction, AND, OR, XOR, and shift. The instruction set includes MOV A, N, which moves a value into the Accumulator, followed by several instructions that evaluate the expression 5 - 3.
The lecture discusses a simplified computer architecture, focusing on a basic ALU (Arithmetic Logic Unit) that can perform basic arithmetic and logical operations. It explains how to program a hypothetical computer using assembly language instructions for a simple machine. The example code evaluates the expression "5 - 3" and demonstrates the use of the accumulator and ALU in performing calculations and comparisons, with outputs determined by the rising edge of a clock signal.
The lecture discusses the basics of digital circuits, focusing on a 4-bit Arithmetic Logic Unit (ALU) with its components: Latch, Instruction Decoder, Accumulator, Clock, and Data inputs. The example program evaluates the expression 5-3 in assembly language, using various instructions such as XOR, ADD, AND, OR, and NOP to perform arithmetic operations, demonstrating how data is processed and controlled within the ALU.
A lecture on computer architecture focusing on the assembly language, instruction set, and basic components of a simple microprocessor. Key points include:

* Instruction set: basic operations such as MOV, AND, OR, XOR, ADD, and SHIFTS
* ALU (Arithmetic Logic Unit) operation in evaluating expressions like 5-3
* Addition of memory to store program data and an instruction pointer
* Creation of a simple microprocessor with registers A, B, and IP.
The text describes a binary counter and a simple computer system, specifically the ZX81 Based Microcomputer. It explains how the accumulator (A) and instruction pointer (IP) work together to execute instructions, including fetching and decoding opcodes. The Z80 microprocessor is also discussed, including its architecture, register bank, ALU, and instruction cycle.
The Control Section: 
1. Sends Program Counter to address bus and memory read to control bus.
2. External Memory responds by placing byte (instruction) on Data Bus.
3. Instruction Register loads the opcode.

Instruction Decode:
Decodes instruction, generating control signals for processor readiness and ALU setup.

Machine Cycle:
1. Fetch: Reads first byte from memory (opcode).
2. Decode: Decodes instruction and generates control signals.
   a. Fetch Cycle: Loads value into register H.
   b. Write Cycle: Writes to selected address register IY.
3. Execute/Store sequence, typically requiring one or more clock cycles per step.

"""

In [12]:
len(test)

5887

## Interim Report Work

In [3]:
from pypdf import PdfReader, PdfWriter

# Load the PDFs
pdf1 = PdfReader("../pdf_test/19313673_White.pdf")
pdf2 = PdfReader("../pdf_test/Appendix.pdf")

# Create a PDF writer object
writer = PdfWriter()

# Add specific pages from the first PDF (e.g., pages 1 and 2)
writer.add_page(pdf1.pages[0])  # Page 1 (0-indexed)
writer.add_page(pdf1.pages[1])  # Page 2

# Add pages from the second PDF (e.g., pages 3 onward)
for page in pdf2.pages[2:]:  # Pages 3 onward (0-indexed starts at 2)
    writer.add_page(page)

# Write the combined PDF to a new file
with open("../test_folder_pdf/19313673_White.pdf", "wb") as output_file:
    writer.write(output_file)

print("PDFs have been combined successfully!")


PDFs have been combined successfully!
