In [57]:
# Add the project root to sys.path
import sys
import os
project_root = os.path.abspath(os.path.join(os.getcwd(), "../../"))
sys.path.append(project_root)

## Part 1: PDF Extraction of Lecture Material

In [18]:
import fitz  # PyMuPDF for PDF handling
import os

# Directory to store extracted lecture images
content_folder = 'lecture_images'
os.makedirs(content_folder, exist_ok=True)  # Ensure the directory exists

# Path containing the lecture PDFs
path = r'C:\Users\lberm\OneDrive\Documents\Github\mechedu1.0\test_images\lecture_collection'

# Collect all PDF file paths from the specified directory and its subdirectories
file_paths = []
for root, dirs, files in os.walk(path):
    file_paths.extend(
        [os.path.join(root, file) for file in files if file.endswith('.pdf')]
    )

# Initialize a summary string to log the process
summary = f"Summary\n{'*' * 25}\n"

# Process each PDF file
for filename in file_paths:
    pdf_document = fitz.open(filename)  # Open the PDF

    # Create a folder for the current PDF's extracted images
    pdf_name = os.path.splitext(os.path.basename(filename))[0]  # Extract the PDF name without extension
    folder_path = os.path.join(content_folder, pdf_name)
    os.makedirs(folder_path, exist_ok=True)  # Ensure the directory exists

    summary += f"Folder Created at: {folder_path}\n"

    # Process each page in the PDF
    for page_number in range(pdf_document.page_count):
        page = pdf_document.load_page(page_number)  # Load the current page
        page.set_rotation(45)  # Rotate the page by 45 degrees (optional)

        # Render the page as an image
        pix = page.get_pixmap()

        # Construct the output image filename
        img_filename = f"{pdf_name}_page_{page_number + 1}.png"
        output_path = os.path.join(folder_path, img_filename)

        # Save the rendered image to the output directory
        pix.save(output_path)

        # Log the created image in the summary
        summary += f"New Image Created: {img_filename}\n"

    summary += "\n"  # Add a blank line between entries for different PDFs

# Print the summary log
print(summary)


Summary
*************************
Folder Created at : lecture_images\Lecture_02_03
New Image Created:Lecture_02_03_page_1
New Image Created:Lecture_02_03_page_2
New Image Created:Lecture_02_03_page_3
New Image Created:Lecture_02_03_page_4
New Image Created:Lecture_02_03_page_5
New Image Created:Lecture_02_03_page_6
New Image Created:Lecture_02_03_page_7
New Image Created:Lecture_02_03_page_8
New Image Created:Lecture_02_03_page_9
New Image Created:Lecture_02_03_page_10
New Image Created:Lecture_02_03_page_11
New Image Created:Lecture_02_03_page_12
New Image Created:Lecture_02_03_page_13

Folder Created at : lecture_images\Lecture_02_06
New Image Created:Lecture_02_06_page_1
New Image Created:Lecture_02_06_page_2
New Image Created:Lecture_02_06_page_3
New Image Created:Lecture_02_06_page_4
New Image Created:Lecture_02_06_page_5
New Image Created:Lecture_02_06_page_6
New Image Created:Lecture_02_06_page_7
New Image Created:Lecture_02_06_page_8
New Image Created:Lecture_02_06_page_9
New I

## Part 2 Extracting Content from Lecture

In [65]:
import sys
import os
import asyncio
from langchain import hub
from typing import Optional
from src.llm_module_generator.image_extraction.image_llm import ImageLLMProcessor
from pydantic import BaseModel,Field
from typing import List, Dict, Optional

# Define Structure Output Models for Lecture Summary (Whole PDF)
class KeyWord(BaseModel):
    keyword:str = Field(...,description="Keyword")
    description: str = Field(...,description = "Description of the keyword.Use LaTeX for any mathematical symbols or equations. ")
class LectureSummary(BaseModel):
    lecture_name: str = Field(...,description = "A concise and descriptive title of the lecture")
    lecture_subtitle: str= Field (...,description = "A subtitle that is essentially a super consise summary meant to add more information about the general lecture")
    summary: str = Field(..., description="A summary of the lecture material, describing the essence of what the lecture was about. Use LaTeX for any mathematical symbols or equations.")
    key_concepts: List[KeyWord] = Field(..., description="A list of key concepts covered in the lecture. Use LaTeX for any mathematical symbols or equations.")
    foundational_concepts: List[KeyWord] = Field(..., description="A list of prerequisite concepts that the lecture builds upon. Use LaTeX for any mathematical symbols or equations.")
    search_keywords: List[str] = Field(..., description="Return a list of relevant search queries which will be used to find external references for generating additional info", max_items=3)


lecture_summary_prompt = hub.pull("lecture-summary")
print('Prompt : Lecture Summary')
print(lecture_summary_prompt[0].pretty_print())

Prompt : Lecture Summary


**Prompt for Analyzing Lecture Content**
You are tasked with analyzing the **entire** provided lecture slides on a specific class topic. Follow the instructions below to generate a detailed analysis, and return the results as a structured JSON object with the specified keys:
### 0. **Lecture Title**:
   - Generate a concise, descriptive title that encapsulates the primary focus and subject of the lecture. Ensure that it accurately reflects the core content.
### 1. **Lecture Subtitle**:
   - Provide a short subtitle that adds further context to the title. The subtitle should briefly summarize the lecture's scope, giving additional insight into the key points or themes covered.
### 2. **Summary**:
   - Offer a clear, concise, and comprehensive summary of the **entire lecture material**, capturing the most important concepts, ideas, and discussions. Focus on the overall flow and core messages of the lecture.
   - Ensure that complex ideas are simplified for broa

In [66]:
# Define Structure Output Model for Individial Pages
class SinglePage(BaseModel):
    summary: str = Field(..., description="A summary of the lecture material, describing the essence of what the lecture was about. Use LaTeX for any mathematical symbols or equations.And the description of charts and images")
    image_description: Optional[str] = Field(...,description='If any images, charts or diagrams are present describe them and include the text "DESCRIPTION OF THE IMAGE OR CHART"')
single_page_prompt = hub.pull("rag-lecture-extract")
print('Prompt : Single Page Summary')
print(single_page_prompt[1].pretty_print())

Prompt : Single Page Summary

You are tasked with extracting information from engineering lecture slides. Please adhere to the following guidelines:

1. Generate a concise summary of the content depicted on each lecture slide.
2. For any mathematical formulas, use LaTeX formatting: enclose block-level math with $$ and inline math with $.
3. If there are images, diagrams, or charts present on the slide, describe them clearly and include the text "DESCRIPTION OF THE IMAGE OR CHART" to indicate the nature of the visual content.If there are no images return None

<example>
Slide Content: A diagram showing the flow of electrical current in a circuit with labeled components.
Summary: This slide illustrates the flow of electrical current in a circuit, highlighting key components such as the power source, resistors, and conductors. The diagram includes arrows indicating the direction of current flow. DESCRIPTION OF THE IMAGE OR CHART
</example>
None


In [119]:
import re
from tkinter import Image

from src.process_prairielearn import extract

path = r'C:\Users\lberm\OneDrive\Documents\Github\mechedu1.0\src\notebooks\lecture_images'
def natural_sort_key(filename):
    return [int(part) if part.isdigit() else part for part in re.split(r'(\d+)', filename)]
   
# async def process_lecture(path):
#     extract_lecture_summary = ImageLLMProcessor(
#     prompt = lecture_summary_prompt,
#     response = LectureSummary,
#     model = 'gpt-4o')
    
#     async def process_entire_lecture(root,files):
#         try:
#             pdf_name = files[0].split('_page')[0]
#             lecture_files = sorted([os.path.join(root, file) for file in files], key=natural_sort_key)
#             result = await extract_lecture_summary.send_request(lecture_files)
#             return pdf_name, result
#         except Exception as e:
#             print(f"Error processing files in {root}: {e}")
#             return None, None
        
        
#     tasks = []
#     for root,dirs,files in os.walk(path):
#         if not dirs:
#             if files:
#                 tasks.append(process_entire_lecture(root,files))
#     results = await(asyncio.gather(*tasks))
#     data = {pdf_name: result for pdf_name, result in results if pdf_name and result}
#     return data

# data = await process_lecture(path)
extract_page_summary = ImageLLMProcessor(
    prompt = single_page_prompt,
    response=SinglePage,
    model = 'gpt-4o'
)

async def process_single_page(root, files):
    pdf_name = files[0].split('_page')[0]
    for file in files:
        page_num = file.split('_page_')[1].split('.png')[0]
        file_path = os.path.join(root, file)
        result = await extract_page_summary.send_request([file_path])
        return pdf_name,page_num,result
    
    
for root,dirs,files in os.walk(path):
    if not dirs:
        if files:
            pdf_name = files[0].split('_page')[0]
            for file in files:
                page_num = file.split('_page_')[1].split('.png')[0]
                file_path = os.path.join(root, file)
                result = await extract_page_summary.send_request([file_path])
                print(result)
        

2024-12-30 20:32:05 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" [_client.py - 1773]


{'summary': 'This lecture material discusses the stability of a floating cylindrical log. It involves calculations using the densities of wood (\\(\\rho_{wood} = 0.6\\)) and oil (\\(\\rho_{oil} = 0.8\\)), balancing forces for buoyancy (\\(\\Sigma F_y = 0\\)), and determining the submerged length \\(L_s\\) of the cylinder. \\(L_s\\) is derived using the formula \\(L_s = \\frac{\\rho_{wood}}{\\rho_{oil}} L = \\frac{3}{4} L\\). The metacentric height (GM), defined as \\(GM = \\frac{I_c}{V} - BG\\), is calculated to ensure stability, where \\(I_c\\) is the moment of inertia and \\(V\\) is the submerged volume. The condition for stability is \\(GM > 0\\), leading to the inequality \\(\\frac{D^2}{16 L_s} - \\frac{L}{8} > 0\\). This results in a condition for the greatest length for stability: \\(L < \\frac{\\sqrt{2}}{3} D\\).', 'image_description': 'The image depicts a cylindrical log floating vertically in a fluid. The log has diameter \\(D\\) and length \\(L\\). It shows the positions of t

2024-12-30 20:32:11 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" [_client.py - 1773]


{'summary': 'The lecture discusses the behavior of a fluid in a rotating system, focusing on the formation of a paraboloid of revolution. The analysis begins with the assumption that the pressure difference \\( dp \\) is zero, leading to an equation involving partial derivatives of pressure with respect to radial and vertical coordinates (\\( \\frac{\\partial p}{\\partial r} \\) and \\( \\frac{\\partial p}{\\partial z} \\)). The angular velocity \\( \\omega \\) is constant, and the fluid experiences a centripetal acceleration \\( a_z = \\frac{r \\omega^2}{g} \\). By integrating the derived equation \\( \\frac{dz}{dr} = \\frac{\\omega^2 r}{g} \\) from \\( z_0 \\) to \\( z \\), the equation \\( z - z_0 = \\frac{\\omega^2 r^2}{2g} \\) is obtained, which represents a paraboloid of revolution.', 'image_description': 'The image includes a diagram of a fluid surface in a rotating container, showing a parabolic shape. Equations and derivations are written out, starting with the pressure equati

CancelledError: 

In [149]:
import re
from src.process_prairielearn import extract

path = r'C:\Users\lberm\OneDrive\Documents\Github\mechedu1.0\src\notebooks\lecture_images'
def natural_sort_key(filename):
    return [int(part) if part.isdigit() else part for part in re.split(r'(\d+)', filename)]
   
   
async def process_lecture(path:str):
    extract_lecture_summary= ImageLLMProcessor(
    prompt = lecture_summary_prompt,
    response = LectureSummary,
    model = 'gpt-4o')
    extract_page_summary = ImageLLMProcessor(
        prompt = single_page_prompt,
        response=SinglePage,
        model = 'gpt-4o')
    
    async def process_entire_lecture(root,files):
        data = {}
        try:
            pdf_name = files[0].split('_page')[0]
            lecture_files = sorted([os.path.join(root, file) for file in files], key=natural_sort_key)
            lecture_summary = await extract_lecture_summary.send_request(lecture_files)
            all_page_summaries = []
            for file in files:
                try:
                    page_data = {}
                    page_num = file.split('_page_')[1].split('.png')[0]
                    file_path = os.path.join(root, file)
                    single_page_summary = await extract_page_summary.send_request([file_path])
                    page_data[page_num] = single_page_summary
                    all_page_summaries.append(page_data)
                except Exception as e:
                    print(f"Error processing file {file}: {e}")
            data[pdf_name] = [lecture_summary,all_page_summaries]
            return data
        except Exception as e:
            print(f"Error processing files in {root}: {e}")
            return None
    tasks = []
    for root,dirs,files in os.walk(path):
        if not dirs:
            if files:
                tasks.append(process_entire_lecture(root,files))
    results = await(asyncio.gather(*tasks))
    return results
results = await process_lecture(path)             

2024-12-30 21:03:49 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" [_client.py - 1773]
2024-12-30 21:03:50 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" [_client.py - 1773]
2024-12-30 21:03:52 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" [_client.py - 1773]
2024-12-30 21:03:52 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" [_client.py - 1773]
2024-12-30 21:03:52 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" [_client.py - 1773]
2024-12-30 21:03:52 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" [_client.py - 1773]
2024-12-30 21:03:52 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" [_client.py - 1773]
2024-12-30 21:03:53 - httpx - INFO - HTTP

Error processing file Lecture_03_13_page_5.png: 1 validation error for SinglePage
image_description
  Field required [type=missing, input_value={'summary': 'This lecture... highlighted in boxes.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.8/v/missing


2024-12-30 21:04:41 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" [_client.py - 1773]
2024-12-30 21:04:42 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" [_client.py - 1773]
2024-12-30 21:04:43 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" [_client.py - 1773]
2024-12-30 21:04:43 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" [_client.py - 1773]
2024-12-30 21:04:43 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" [_client.py - 1773]
2024-12-30 21:04:45 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" [_client.py - 1773]
2024-12-30 21:04:46 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK" [_client.py - 1773]
2024-12-30 21:04:46 - httpx - INFO - HTTP

In [164]:
import pandas as pd

# Assuming `results` is your input list of dictionaries
cleaned_data = []

for r in results:
    for lecture_id, content in r.items():
        for item in content:
            if isinstance(item, dict):  # Process only dictionaries
                # Extract basic information
                lecture_name = item.get('lecture_name', 'N/A')
                summary = item.get('summary', 'N/A')
                lecture_subtitle = item.get('lecture_subtitle', 'N/A')
                key_concepts = item.get('key_concepts', [])
                foundational_concepts = item.get('foundational_concepts', [])

                # Combine key concepts into a string
                key_concepts_text = "\n".join(
                    [f"- {concept['keyword']}: {concept['description']}" for concept in key_concepts]
                )

                # Combine foundational concepts into a string
                foundational_concepts_text = "\n".join(
                    [f"- {concept['keyword']}: {concept['description']}" for concept in foundational_concepts]
                )

                # Create the full description
                full_description = (
                    f"Lecture Name: {lecture_name}\n"
                    f"Summary: {summary}\n"
                    f"Subtitle: {lecture_subtitle}\n\n"
                    f"Key Concepts:\n{key_concepts_text}\n\n"
                    f"Foundational Concepts:\n{foundational_concepts_text}"
                )

                # Add the full data to the cleaned_data list
                cleaned_data.append({
                    'lecture_id': lecture_id,
                    'lecture_name': lecture_name,
                    'summary': summary,
                    'lecture_subtitle': lecture_subtitle,
                    'key_concepts': key_concepts,
                    'foundational_concepts': foundational_concepts,
                    'full_description': full_description
                })

# Convert the cleaned data into a DataFrame
df = pd.DataFrame(cleaned_data)

# Display the first few rows of the DataFrame
df.head()

Unnamed: 0,lecture_id,lecture_name,summary,lecture_subtitle,key_concepts,foundational_concepts,full_description
0,Lecture_02_03,Stability and Dynamics of Fluids in Acceleration,The lecture provides an in-depth analysis of t...,"Analyzing buoyancy, stability conditions, and ...","[{'keyword': 'Buoyancy', 'description': 'The f...","[{'keyword': 'Density', 'description': 'The ma...",Lecture Name: Stability and Dynamics of Fluids...
1,Lecture_02_06,Fluid Flow and Conservation Principles,The lecture covers essential principles of flu...,"Understanding Mass, Energy, and Momentum Balan...","[{'keyword': 'Mass Flow Rate', 'description': ...","[{'keyword': 'Density', 'description': 'Mass p...",Lecture Name: Fluid Flow and Conservation Prin...
2,Lecture_02_08,Fluid Dynamics: Mass and Energy Balances,The lecture presents fundamental principles of...,Exploring Mass and Energy Conservation in Flui...,"[{'keyword': 'Mass Balance', 'description': 'I...","[{'keyword': 'Conservation Laws', 'description...",Lecture Name: Fluid Dynamics: Mass and Energy ...
3,Lecture_02_13,Fluid Dynamics in Pipe Systems,This lecture covers the principles of fluid dy...,Analyzing Flow and Pressure Losses in Piping,"[{'keyword': 'Bernoulli Equation', 'descriptio...","[{'keyword': 'Fluid Properties', 'description'...",Lecture Name: Fluid Dynamics in Pipe Systems\n...
4,Lecture_02_27 (1),Velocity Triangles and Euler's Turbomachinery ...,The lecture focuses on the analysis and applic...,Analyzing Inlet and Outlet Velocity Triangles ...,"[{'keyword': 'Velocity Triangle', 'description...","[{'keyword': 'Fluid Dynamics', 'description': ...",Lecture Name: Velocity Triangles and Euler's T...


In [166]:
from dotenv import load_dotenv
from openai import OpenAI
load_dotenv()
client = OpenAI(api_key = os.environ.get('OPENAI_API_KEY'))

def get_embedding(text_input):
    response = client.embeddings.create(
        input=text_input,
        model="text-embedding-3-large"
    )
    return response.data[0].embedding
df['embeddings'] = df['full_description'].apply(get_embedding)

2024-12-30 21:38:08 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK" [_client.py - 1026]
2024-12-30 21:38:09 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK" [_client.py - 1026]
2024-12-30 21:38:09 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK" [_client.py - 1026]
2024-12-30 21:38:09 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK" [_client.py - 1026]
2024-12-30 21:38:10 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK" [_client.py - 1026]
2024-12-30 21:38:10 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK" [_client.py - 1026]
2024-12-30 21:38:10 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK" [_client.py - 1026]
2024-12-30 21:38:10 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/e

In [167]:
df

Unnamed: 0,lecture_id,lecture_name,summary,lecture_subtitle,key_concepts,foundational_concepts,full_description,embeddings
0,Lecture_02_03,Stability and Dynamics of Fluids in Acceleration,The lecture provides an in-depth analysis of t...,"Analyzing buoyancy, stability conditions, and ...","[{'keyword': 'Buoyancy', 'description': 'The f...","[{'keyword': 'Density', 'description': 'The ma...",Lecture Name: Stability and Dynamics of Fluids...,"[-0.0362975038588047, 0.02360553666949272, -0...."
1,Lecture_02_06,Fluid Flow and Conservation Principles,The lecture covers essential principles of flu...,"Understanding Mass, Energy, and Momentum Balan...","[{'keyword': 'Mass Flow Rate', 'description': ...","[{'keyword': 'Density', 'description': 'Mass p...",Lecture Name: Fluid Flow and Conservation Prin...,"[-0.03665745630860329, 0.04078352451324463, -0..."
2,Lecture_02_08,Fluid Dynamics: Mass and Energy Balances,The lecture presents fundamental principles of...,Exploring Mass and Energy Conservation in Flui...,"[{'keyword': 'Mass Balance', 'description': 'I...","[{'keyword': 'Conservation Laws', 'description...",Lecture Name: Fluid Dynamics: Mass and Energy ...,"[-0.03860820084810257, 0.028252648189663887, -..."
3,Lecture_02_13,Fluid Dynamics in Pipe Systems,This lecture covers the principles of fluid dy...,Analyzing Flow and Pressure Losses in Piping,"[{'keyword': 'Bernoulli Equation', 'descriptio...","[{'keyword': 'Fluid Properties', 'description'...",Lecture Name: Fluid Dynamics in Pipe Systems\n...,"[-0.0414079986512661, 0.03817657381296158, -0...."
4,Lecture_02_27 (1),Velocity Triangles and Euler's Turbomachinery ...,The lecture focuses on the analysis and applic...,Analyzing Inlet and Outlet Velocity Triangles ...,"[{'keyword': 'Velocity Triangle', 'description...","[{'keyword': 'Fluid Dynamics', 'description': ...",Lecture Name: Velocity Triangles and Euler's T...,"[-0.03929091989994049, 0.009836392477154732, -..."
5,Lecture_03_01 (1),Fundamentals of Pump Dynamics and Performance,The lecture focuses on the principles of pump ...,Analyzing Fluid Flow and Energy Transfer in Pumps,"[{'keyword': 'Flow Rate (Q)', 'description': '...","[{'keyword': 'Fluid Mechanics', 'description':...",Lecture Name: Fundamentals of Pump Dynamics an...,"[-0.04660307615995407, 0.032535798847675323, -..."
6,Lecture_03_08,Fundamentals of Fluid Machinery and Rocket Dyn...,The lecture delves into the principles governi...,Analyzing Centrifugal and Axial Machines with ...,"[{'keyword': 'Centrifugal Machines', 'descript...","[{'keyword': 'Fluid Dynamics', 'description': ...",Lecture Name: Fundamentals of Fluid Machinery ...,"[-0.04120573028922081, 0.02925964631140232, -0..."
7,Lecture_03_13,Differential Analysis of Fluid Flow,This lecture covers the fundamental principles...,Exploring Mass and Momentum Conservation in Fl...,"[{'keyword': 'Continuity Equation', 'descripti...","[{'keyword': 'Fluid Mechanics', 'description':...",Lecture Name: Differential Analysis of Fluid F...,"[-0.03280262276530266, 0.027675466611981392, -..."
8,Lecture_03_15,Laminar Flow in Circular Pipes,The lecture focuses on deriving the velocity p...,Understanding the velocity profile and pressur...,"[{'keyword': 'Steady-State Flow', 'description...","[{'keyword': 'Fluid Viscosity', 'description':...",Lecture Name: Laminar Flow in Circular Pipes\n...,"[-0.04906211420893669, 0.03642693907022476, -0..."


In [176]:
from uuid import uuid4
from langchain_openai import OpenAIEmbeddings
from langchain_core.documents import Document

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
documents = []
for i in range(len(df)):
    d = Document(
        page_content=df.iloc[i].full_description,
        metadata={"source": df.iloc[i].lecture_id},
        id=i
    )
    documents.append(d)
print(documents)
uuids = [str(uuid4()) for _ in range(len(documents))]

from langchain_chroma import Chroma

vector_store = Chroma(
    collection_name="example_collection",
    embedding_function=embeddings,
    persist_directory="./chroma_langchain_db",  # Where to save data locally, remove if not necessary
)

[Document(id='0', metadata={'source': 'Lecture_02_03'}, page_content="Lecture Name: Stability and Dynamics of Fluids in Acceleration\nSummary: The lecture provides an in-depth analysis of the stability and dynamics of fluids under various conditions of acceleration. Initial discussions focus on the stability of a floating log, highlighting the conditions for stability and the calculation of submerged volume. This involves concepts like buoyancy and the center of gravity. The lecture then transitions to the behavior of fluids in accelerating containers, discussing how pressure gradients are affected by acceleration in different directions, and how this influences fluid surface slopes. Mathematical derivations are provided to determine the slope of the fluid surface and the conditions for stability. The lecture further explores the dynamics of rotating fluids, using cylindrical coordinates to derive expressions for pressure distribution. Concepts such as the parabolic shape of rotating f

2024-12-30 21:44:12 - chromadb.telemetry.product.posthog - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information. [posthog.py - 20]


In [177]:
vector_store.add_documents(documents=documents, ids=uuids)

2024-12-30 21:44:25 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK" [_client.py - 1026]


['66d7001a-19b5-444b-b3dc-00dc0144925b',
 '759ad572-53b6-458c-a277-95916086d3a5',
 '396ad5d6-ef0f-4405-9289-687b4ebb5a0a',
 '916e8029-3455-4199-8197-da5d8926a860',
 'bb32bb3d-a66a-471c-a2cb-d98927b56850',
 '03469b52-fd37-4da1-82f5-f9f570e77679',
 'cb0abe7f-8022-47c7-8eac-45a0328ea93c',
 '7aa15e9d-9b8f-4bab-8edc-fdf473d4efbe',
 'f8539352-c789-4bc6-8eff-e6233e975c08']

In [180]:
results = vector_store.similarity_search(
    """### Concise Physics Question:

A diesel fluid flows through a pipeline with a diameter of \( 25 \, \text{mm} \) and a total length of \( 10 \, \text{m} \). The flow rate is \( 30 \, \text{liters/min} \), and the kinematic viscosity of the fluid is \( 2.5 \times 10^{-6} \, \text{m}^2/\text{s} \). 

1. **Determine the Reynolds number for the flow.**  
2. Based on the Reynolds number, explain whether the flow is laminar or turbulent.""",
    k=1,
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

2024-12-30 21:48:49 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK" [_client.py - 1026]


* Lecture Name: Fluid Dynamics in Pipe Systems
Summary: This lecture covers the principles of fluid dynamics as applied to pipe systems, focusing on flow calculations, pressure losses, and energy considerations. It applies the Bernoulli equation and concepts of momentum balance to analyze the flow of diesel through a pipeline. The lecture explores head loss due to friction and minor losses, Reynolds number for flow regime determination, and the calculation of power requirements for pumping. It also includes the analysis of net positive suction head (NPSH) to ensure cavitation avoidance, and discusses the application of the momentum equation in control volumes to calculate forces on surfaces.
Subtitle: Analyzing Flow and Pressure Losses in Piping

Key Concepts:
- Bernoulli Equation: An energy conservation equation used to relate pressures, velocities, and heights along a streamline in a fluid flow. Expressed as \( \frac{p_1}{\rho} + \frac{V_1^2}{2} + gz_1 = \frac{p_2}{\rho} + \frac{V_2^