In [6]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import trimesh
import pyrender
import os
from PIL import Image
import json
from torchvision import transforms
from sentence_transformers import SentenceTransformer, util
import warnings
warnings.filterwarnings("ignore")

# Install required packages first
def install_requirements():
    import subprocess
    packages = [
        'trimesh',
        'pyrender',
        'torch',
        'torchvision',
        'pillow',
        'numpy',
        'pyassimp'
    ]
    for package in packages:
        subprocess.check_call(['pip', 'install', package])

class ModelRenderer:
    def __init__(self):
        self.scene = pyrender.Scene()
        
    def setup_camera(self):
        """Setup camera for rendering"""
        camera = pyrender.PerspectiveCamera(yfov=np.pi / 3.0, aspectRatio=1.0)
        self.camera_node = pyrender.Node(camera=camera)
        self.scene.add_node(self.camera_node)
def look_at(pos, target, up):
    """Create look-at matrix for camera"""
    forward = np.array(target) - np.array(pos)
    forward = forward / np.linalg.norm(forward)
    
    right = np.cross(forward, up)
    right = right / np.linalg.norm(right)
    
    new_up = np.cross(right, forward)
    new_up = new_up / np.linalg.norm(new_up)
    
    mat = np.eye(4)
    mat[:3, 0] = right
    mat[:3, 1] = new_up
    mat[:3, 2] = -forward
    mat[:3, 3] = pos
    
    return mat

class EnhancedModelRenderer(ModelRenderer):
    def render_viewpoints(self, mesh_path, num_views=8, output_dir="renders"):
        os.makedirs(output_dir, exist_ok=True)
        images = []
        
        # Load FBX using trimesh
        mesh = trimesh.load(mesh_path, file_type='fbx')
        if isinstance(mesh, trimesh.Scene):
            mesh = mesh.dump(concatenate=True)
        mesh = pyrender.Mesh.from_trimesh(mesh)
        mesh_node = pyrender.Node(mesh=mesh)
        self.scene.add_node(mesh_node)

        # Add light
        light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=5.0)
        light_node = pyrender.Node(light=light)
        self.scene.add_node(light_node)
        r = pyrender.OffscreenRenderer(viewport_width=640, viewport_height=480)
        
        for i in range(num_views):
            angle = (2.0 * np.pi * i) / num_views
            radius = 2.0  # Distance from center
            
            # Position camera
            cam_pos = np.array([
                radius * np.cos(angle),
                radius * np.sin(angle),
                1.0
            ])
            
            # Look at center
            cam_target = np.array([0.0, 0.0, 0.0])
            cam_up = np.array([0.0, 0.0, 1.0])
            
            # Compute camera matrix
            cam_matrix = look_at(cam_pos, cam_target, cam_up)
            self.scene.set_pose(self.camera_node, cam_matrix)
            
            # Render
            color, depth = r.render(self.scene)
            image = Image.fromarray(color)
            
            output_path = os.path.join(output_dir, f"view_{i}.png")
            image.save(output_path)
            images.append(output_path)
        
        r.delete()
        return images
class Model3DDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.image_paths)
        
    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx])
        if self.transform:
            image = self.transform(image)
        return image, self.labels[idx]
    
class QAProcessor:
    def __init__(self, explanation_text):
        self.model = SentenceTransformer('all-MiniLM-L6-v2')
        self.knowledge_base = self.process_text(explanation_text)
        self.embeddings = self.model.encode(self.knowledge_base)
        
    def process_text(self, text):
        # Split text into meaningful chunks
        return [
            "Connect battery to DPDT terminals to start the system",
            "DC motor rotates gears to lift weight against gravity",
            "System converts electrical to mechanical energy when powered",
            "When power is off, gravity rotates gears in reverse direction",
            "DC motor acts as generator during reverse rotation",
            "Spinning coils in magnetic field generates electricity",
            "Generated electricity lights up LED",
            "System demonstrates energy conversion between electrical/mechanical"
        ]
    
    def answer_question(self, question):
        question_embed = self.model.encode(question)
        similarities = util.cos_sim(question_embed, self.embeddings)[0]
        most_similar = torch.argmax(similarities).item()
        return self.knowledge_base[most_similar]

class ComponentClassifier(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.base_model = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Conv2d(128, 256, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(256*28*28, 512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, num_classes)
        )
    
    def forward(self, x):
        x = self.base_model(x)
        x = torch.flatten(x, 1)
        return self.classifier(x)

def load_component_dataset(image_dir):
    classes = ['battery', 'motor', 'gear', 'led', 'weight', 'switch']
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
    image_paths = []
    labels = []
    for class_idx, class_name in enumerate(classes):
        class_dir = os.path.join(image_dir, class_name)
        for img_file in os.listdir(class_dir):
            if img_file.endswith(('.png', '.jpg')):
                image_paths.append(os.path.join(class_dir, img_file))
                labels.append(class_idx)
    
    return Model3DDataset(image_paths, labels, transform)

def train_component_classifier(dataset):
    model = ComponentClassifier(num_classes=6)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    loader = DataLoader(dataset, batch_size=32, shuffle=True)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    
    for epoch in range(15):
        model.train()
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        
        print(f'Epoch {epoch+1} complete')
    
    return model

class IntegratedSystem:
    def __init__(self, fbx_path, component_images_dir, explanation_text):
        # Initialize subsystems
        self.renderer = EnhancedModelRenderer()
        self.renderer.setup_camera()
        
        # Load component classifier
        self.classes = ['battery', 'motor', 'gear', 'led', 'weight', 'switch']
        self.classifier = torch.jit.load('component_classifier.pt')
        self.classifier.eval()
        
        # Initialize QA system
        self.qa = QAProcessor(explanation_text)
        
        # Load 3D model
        self.rendered_views = self.renderer.render_viewpoints(
            fbx_path, 
            output_dir="current_model_views"
        )
    
    def identify_component(self, image_path):
        transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        
        img = Image.open(image_path).convert('RGB')
        img_tensor = transform(img).unsqueeze(0)
        
        with torch.no_grad():
            outputs = self.classifier(img_tensor)
            _, pred = torch.max(outputs, 1)
        
        return self.classes[pred.item()]
    
    def answer_question(self, question):
        return self.qa.answer_question(question)
    
    def analyze_view(self, view_path):
        component = self.identify_component(view_path)
        explanation = self.qa.answer_question(f"Explain the {component} in this system")
        return {
            'component': component,
            'explanation': explanation,
            'related_views': [v for v in self.rendered_views if component in v]
        }

def main():
    install_requirements()
    
    # Configuration
    FBX_PATH = "/Users/vyakaranamsowmya/Desktop/Bot-LLM-1/Zoho WorkDrive/force.fbx"
    COMPONENT_IMAGES_DIR = "power_generation_components"
    EXPLANATION_TEXT = """Your full working explanation text here..."""
    
    # Step 1: Train component classifier
    print("Training component classifier...")
    component_dataset = load_component_dataset(COMPONENT_IMAGES_DIR)
    trained_model = train_component_classifier(component_dataset)
    torch.jit.save(torch.jit.script(trained_model), 'component_classifier.pt')
    
    # Step 2: Initialize integrated system
    print("Initializing integrated system...")
    system = IntegratedSystem(
        fbx_path=FBX_PATH,
        component_images_dir=COMPONENT_IMAGES_DIR,
        explanation_text=EXPLANATION_TEXT
    )
    
    # Example usage
    print("\nTESTING SYSTEM:")
    
    # Component identification test
    test_image = "path/to/test_component.jpg"
    print(f"Component identification: {system.identify_component(test_image)}")
    
    # QA test
    question = "How does the system generate electricity?"
    print(f"Q: {question}\nA: {system.answer_question(question)}")
    
    # Full analysis of a view
    view_path = system.rendered_views[0]
    analysis = system.analyze_view(view_path)
    print(f"\nView analysis for {view_path}:")
    print(f"Main component: {analysis['component']}")
    print(f"Explanation: {analysis['explanation']}")

if __name__ == "__main__":
    main()

Training component classifier...


FileNotFoundError: [Errno 2] No such file or directory: 'power_generation_components/battery'

In [9]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import trimesh
import pyrender
import os
from PIL import Image
import json
from torchvision import transforms
from sentence_transformers import SentenceTransformer, util
import warnings
warnings.filterwarnings("ignore")

# Install required packages first
def install_requirements():
    import subprocess
    packages = [
        'trimesh',
        'pyrender',
        'torch',
        'torchvision',
        'pillow',
        'numpy',
        'pyassimp'
    ]
    for package in packages:
        subprocess.check_call(['pip', 'install', package])

class ModelRenderer:
    def __init__(self):
        self.scene = pyrender.Scene()
        
    def setup_camera(self):
        """Setup camera for rendering"""
        camera = pyrender.PerspectiveCamera(yfov=np.pi / 3.0, aspectRatio=1.0)
        self.camera_node = pyrender.Node(camera=camera)
        self.scene.add_node(self.camera_node)

def look_at(pos, target, up):
    """Create look-at matrix for camera"""
    forward = np.array(target) - np.array(pos)
    forward = forward / np.linalg.norm(forward)
    
    right = np.cross(forward, up)
    right = right / np.linalg.norm(right)
    
    new_up = np.cross(right, forward)
    new_up = new_up / np.linalg.norm(new_up)
    
    mat = np.eye(4)
    mat[:3, 0] = right
    mat[:3, 1] = new_up
    mat[:3, 2] = -forward
    mat[:3, 3] = pos
    
    return mat

class EnhancedModelRenderer(ModelRenderer):
    def render_viewpoints(self, mesh_path, num_views=8, output_dir="renders"):
        os.makedirs(output_dir, exist_ok=True)
        images = []
        
        # Load FBX using trimesh
        mesh = trimesh.load(mesh_path, file_type='fbx')
        if isinstance(mesh, trimesh.Scene):
            mesh = mesh.dump(concatenate=True)
        mesh = pyrender.Mesh.from_trimesh(mesh)
        mesh_node = pyrender.Node(mesh=mesh)
        self.scene.add_node(mesh_node)

        # Add light
        light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=5.0)
        light_node = pyrender.Node(light=light)
        self.scene.add_node(light_node)
        r = pyrender.OffscreenRenderer(viewport_width=640, viewport_height=480)
        
        for i in range(num_views):
            angle = (2.0 * np.pi * i) / num_views
            radius = 2.0  # Distance from center
            
            # Position camera
            cam_pos = np.array([
                radius * np.cos(angle),
                radius * np.sin(angle),
                1.0
            ])
            
            # Look at center
            cam_target = np.array([0.0, 0.0, 0.0])
            cam_up = np.array([0.0, 0.0, 1.0])
            
            # Compute camera matrix
            cam_matrix = look_at(cam_pos, cam_target, cam_up)
            self.scene.set_pose(self.camera_node, cam_matrix)
            
            # Render
            color, depth = r.render(self.scene)
            image = Image.fromarray(color)
            
            output_path = os.path.join(output_dir, f"view_{i}.png")
            image.save(output_path)
            images.append(output_path)
        
        r.delete()
        return images

class Model3DDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.image_paths)
        
    def __getitem__(self, idx):
        # Convert to RGB to handle alpha channels and grayscale images
        image = Image.open(self.image_paths[idx]).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, self.labels[idx]

class QAProcessor:
    def __init__(self, explanation_text):
        self.model = SentenceTransformer('all-MiniLM-L6-v2')
        self.knowledge_base = self.process_text(explanation_text)
        self.embeddings = self.model.encode(self.knowledge_base)
        
    def process_text(self, text):
        # Split text into meaningful chunks
        return [
            "Connect battery to DPDT terminals to start the system",
            "DC motor rotates gears to lift weight against gravity",
            "System converts electrical to mechanical energy when powered",
            "When power is off, gravity rotates gears in reverse direction",
            "DC motor acts as generator during reverse rotation",
            "Spinning coils in magnetic field generates electricity",
            "Generated electricity lights up LED",
            "System demonstrates energy conversion between electrical/mechanical"
        ]
    
    def answer_question(self, question):
        question_embed = self.model.encode(question)
        similarities = util.cos_sim(question_embed, self.embeddings)[0]
        most_similar = torch.argmax(similarities).item()
        return self.knowledge_base[most_similar]

class PartClassifier(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.base_model = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Conv2d(128, 256, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(256*28*28, 512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, num_classes)
        )
    
    def forward(self, x):
        x = self.base_model(x)
        x = torch.flatten(x, 1)
        return self.classifier(x)

def load_part_dataset(image_dir):
    """Load images and convert 1-based labels to 0-based"""
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
    image_paths = []
    labels = []
    for img_file in os.listdir(image_dir):
        if img_file.endswith(('.png', '.jpg')):
            # Convert 1-based to 0-based labels
            part_number = int(img_file.split(' ')[-1].split('.')[0])  # e.g., "01" → 1
            label = part_number - 1  # Convert to 0-based
            
            image_paths.append(os.path.join(image_dir, img_file))
            labels.append(label)
    
    return Model3DDataset(image_paths, labels, transform)

def train_part_classifier(dataset):
    model = PartClassifier(num_classes=len(set(dataset.labels)))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    loader = DataLoader(dataset, batch_size=32, shuffle=True)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    
    for epoch in range(15):
        model.train()
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        
        print(f'Epoch {epoch+1} complete')
    
    return model

class IntegratedSystem:
    def __init__(self, fbx_path, component_images_dir, explanation_text):
        # Initialize subsystems
        self.renderer = EnhancedModelRenderer()
        self.renderer.setup_camera()
        
        # Load part classifier
        self.classifier = torch.jit.load('part_classifier.pt')
        self.classifier.eval()
        
        # Initialize QA system
        self.qa = QAProcessor(explanation_text)
        
        # Load 3D model
        self.rendered_views = self.renderer.render_viewpoints(
            fbx_path, 
            output_dir="current_model_views"
        )
    
    def identify_part(self, image_path):
        transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        
        img = Image.open(image_path).convert('RGB')
        img_tensor = transform(img).unsqueeze(0)
        
        with torch.no_grad():
            outputs = self.classifier(img_tensor)
            _, pred = torch.max(outputs, 1)
        
        return f"part_{pred.item():02d}"
    
    def answer_question(self, question):
        return self.qa.answer_question(question)
    
    def analyze_view(self, view_path):
        part = self.identify_part(view_path)
        explanation = self.qa.answer_question(f"Explain the {part} in this system")
        return {
            'part': part,
            'explanation': explanation,
            'related_views': [v for v in self.rendered_views if part in v]
        }

def main():
    install_requirements()
    
    # Configuration
    FBX_PATH = "/Users/vyakaranamsowmya/Desktop/Bot-LLM-1/Zoho WorkDrive/force.fbx"
    COMPONENT_IMAGES_DIR = "/Users/vyakaranamsowmya/Desktop/Bot-LLM-1/power generation png"
    EXPLANATION_TEXT = """
Once the model has been setup as per the instruction manual, connect the battery as indicated next to the DPDT terminals.

As the supply from the battery starts, the force generated by the DC motor rotates the connected gears. This turning of the gear, in turn rotates the associated gear and pulls the weight against gravity. Electric energy is converted to mechanical energy during this process because it involves movement caused by a force generated by a DC motor .

The DC motor in the model also works as a generator here, lets see how?

When the supply from the battery is turned off, the force of gravity acts on the weight and causes the gears to rotate in another direction. The DC motor contains coils of wire and magnets, when the gear turns, it makes the coils of wire spin inside the magnetic field created by the magnets.This spinning motion induces an electric current to flow through the wires into the LED ,which glows!! Converting the mechanical energy of the gear rotation into the electrical energy through the generator. 

This way! We can harness the force of gravity to generate electricity."""
    
    # Step 1: Train part classifier
    print("Training part classifier...")
    part_dataset = load_part_dataset(COMPONENT_IMAGES_DIR)
    trained_model = train_part_classifier(part_dataset)
    torch.jit.save(torch.jit.script(trained_model), 'part_classifier.pt')
    
    # Step 2: Initialize integrated system
    print("Initializing integrated system...")
    system = IntegratedSystem(
        fbx_path=FBX_PATH,
        component_images_dir=COMPONENT_IMAGES_DIR,
        explanation_text=EXPLANATION_TEXT
    )
    
    # Example usage
    print("\nTESTING SYSTEM:")
    
    # Part identification test
    test_image = os.path.join(COMPONENT_IMAGES_DIR, "pic 01.png")
    print(f"Part identification: {system.identify_part(test_image)}")
    
    # QA test
    question = "How does the system generate electricity?"
    print(f"Q: {question}\nA: {system.answer_question(question)}")
    
    # Full analysis of a view
    view_path = system.rendered_views[0]
    analysis = system.analyze_view(view_path)
    print(f"\nView analysis for {view_path}:")
    print(f"Main part: {analysis['part']}")
    print(f"Explanation: {analysis['explanation']}")

if __name__ == "__main__":
    main()

Training part classifier...
Epoch 1 complete
Epoch 2 complete
Epoch 3 complete
Epoch 4 complete
Epoch 5 complete
Epoch 6 complete
Epoch 7 complete
Epoch 8 complete
Epoch 9 complete
Epoch 10 complete
Epoch 11 complete
Epoch 12 complete
Epoch 13 complete
Epoch 14 complete
