In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import bpy
import mathutils
import os
from PIL import Image
import json
from fbx import *

class FBXProcessor:
    def __init__(self, fbx_path):
        self.fbx_path = fbx_path
        self.manager = FbxManager.Create()
        self.scene = FbxScene.Create(self.manager, "Scene")
        
    def load_fbx(self):
        importer = FbxImporter.Create(self.manager, "")
        importer.Initialize(self.fbx_path)
        importer.Import(self.scene)
        importer.Destroy()
        return self.scene

    def extract_labels(self):
        """Extract labels and annotations from FBX file"""
        labels = {}
        root_node = self.scene.GetRootNode()
        
        def process_node(node):
            for i in range(node.GetNodeAttributeCount()):
                attr = node.GetNodeAttributeByIndex(i)
                if attr.GetAttributeType() == FbxNodeAttribute.eMarker:
                    marker_name = node.GetName()
                    position = node.LclTranslation.Get()
                    labels[marker_name] = {
                        "position": position,
                        "type": "marker"
                    }
            
            for i in range(node.GetChildCount()):
                process_node(node.GetChild(i))
                
        process_node(root_node)
        return labels

class ModelRenderer:
    def __init__(self):
        self.scene = bpy.context.scene
        
    def setup_camera(self):
        """Setup camera for rendering from different viewpoints"""
        bpy.ops.object.camera_add()
        self.camera = bpy.context.active_object
        self.scene.camera = self.camera
        
    def render_viewpoints(self, num_views=8, output_dir="renders"):
        """Render model from different viewpoints"""
        os.makedirs(output_dir, exist_ok=True)
        images = []
        
        for i in range(num_views):
            angle = (2.0 * np.pi * i) / num_views
            radius = 5.0  # Distance from center
            
            # Position camera
            self.camera.location = (
                radius * np.cos(angle),
                radius * np.sin(angle),
                2.0
            )
            
            # Point camera to center
            direction = mathutils.Vector((0, 0, 0)) - self.camera.location
            rot_quat = direction.to_track_quat('-Z', 'Y')
            self.camera.rotation_euler = rot_quat.to_euler()
            
            # Render
            output_path = os.path.join(output_dir, f"view_{i}.png")
            self.scene.render.filepath = output_path
            bpy.ops.render.render(write_still=True)
            images.append(output_path)
            
        return images

class Model3DDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.image_paths)
        
    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx])
        if self.transform:
            image = self.transform(image)
        return image, self.labels[idx]

class PartDetectionModel(nn.Module):
    def __init__(self, num_classes):
        super(PartDetectionModel, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(256 * 28 * 28, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(512, num_classes)
        )
        
    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            
        print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')
    
    return model

def main(fbx_path, output_dir):
    # Process FBX file
    fbx_processor = FBXProcessor(fbx_path)
    scene = fbx_processor.load_fbx()
    labels = fbx_processor.extract_labels()
    
    # Render views
    renderer = ModelRenderer()
    renderer.setup_camera()
    image_paths = renderer.render_viewpoints(output_dir=output_dir)
    
    # Prepare dataset
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                           std=[0.229, 0.224, 0.225])
    ])
    
    dataset = Model3DDataset(image_paths, labels, transform=transform)
    train_loader = DataLoader(dataset, batch_size=32, shuffle=True)
    
    # Train model
    model = PartDetectionModel(num_classes=len(set(labels)))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    trained_model = train_model(model, train_loader, criterion, optimizer)
    
    # Save model
    torch.save(trained_model.state_dict(), os.path.join(output_dir, 'model.pth'))
    
    # Save labels
    with open(os.path.join(output_dir, 'labels.json'), 'w') as f:
        json.dump(labels, f)

if __name__ == "__main__":
    fbx_path = "/Users/vyakaranamsowmya/Desktop/Bot-LLM-1/Zoho WorkDrive (1)/force.fbx"
    output_dir = "output"
    main(fbx_path, output_dir)

ModuleNotFoundError: No module named 'bpy'

In [2]:
%pip install bpy

[31mERROR: Could not find a version that satisfies the requirement bpy (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for bpy[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.


In [19]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import trimesh
import pyrender
import os
from PIL import Image
import json
from torchvision import transforms
from fbx import *

def install_requirements():
    """
    Install required packages using pip
    """
    import subprocess
    packages = [
        'trimesh',
        'pyrender',
        'torch',
        'torchvision',
        'pillow',
        'numpy',
        'fbx'
    ]
    for package in packages:
        subprocess.check_call(['pip', 'install', package])

class FBXProcessor:
    def __init__(self, fbx_path):
        self.fbx_path = fbx_path
        self.manager = FbxManager.Create()
        self.scene = FbxScene.Create(self.manager, "Scene")
        
    def load_fbx(self):
        importer = FbxImporter.Create(self.manager, "")
        importer.Initialize(self.fbx_path)
        importer.Import(self.scene)
        importer.Destroy()
        return self.scene

    def extract_labels(self):
        """Extract labels and annotations from FBX file"""
        labels = {}
        root_node = self.scene.GetRootNode()
        
        def process_node(node):
            for i in range(node.GetNodeAttributeCount()):
                attr = node.GetNodeAttributeByIndex(i)
                if attr.GetAttributeType() == FbxNodeAttribute.eMarker:
                    marker_name = node.GetName()
                    position = node.LclTranslation.Get()
                    labels[marker_name] = {
                        "position": position,
                        "type": "marker"
                    }
            
            for i in range(node.GetChildCount()):
                process_node(node.GetChild(i))
                
        process_node(root_node)
        return labels

class ModelRenderer:
    def __init__(self):
        self.scene = pyrender.Scene()
        
    def setup_camera(self):
        """Setup camera for rendering"""
        camera = pyrender.PerspectiveCamera(yfov=np.pi / 3.0, aspectRatio=1.0)
        self.camera_node = pyrender.Node(camera=camera)
        self.scene.add_node(self.camera_node)
        
    def render_viewpoints(self, mesh_path, num_views=8, output_dir="renders"):
        """Render model from different viewpoints using pyrender"""
        os.makedirs(output_dir, exist_ok=True)
        images = []
        
        # Load mesh using trimesh and convert to pyrender
        mesh = trimesh.load(mesh_path)
        mesh = pyrender.Mesh.from_trimesh(mesh)
        mesh_node = pyrender.Node(mesh=mesh)
        self.scene.add_node(mesh_node)
        
        # Add light
        light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=5.0)
        light_node = pyrender.Node(light=light)
        self.scene.add_node(light_node)
        
        r = pyrender.OffscreenRenderer(viewport_width=640, viewport_height=480)
        
        for i in range(num_views):
            angle = (2.0 * np.pi * i) / num_views
            radius = 2.0  # Distance from center
            
            # Position camera
            cam_pos = np.array([
                radius * np.cos(angle),
                radius * np.sin(angle),
                1.0
            ])
            
            # Look at center
            cam_target = np.array([0.0, 0.0, 0.0])
            cam_up = np.array([0.0, 0.0, 1.0])
            
            # Compute camera matrix
            cam_matrix = look_at(cam_pos, cam_target, cam_up)
            self.scene.set_pose(self.camera_node, cam_matrix)
            
            # Render
            color, depth = r.render(self.scene)
            image = Image.fromarray(color)
            
            output_path = os.path.join(output_dir, f"view_{i}.png")
            image.save(output_path)
            images.append(output_path)
            
        r.delete()
        return images

def look_at(pos, target, up):
    """Create look-at matrix for camera"""
    forward = np.array(target) - np.array(pos)
    forward = forward / np.linalg.norm(forward)
    
    right = np.cross(forward, up)
    right = right / np.linalg.norm(right)
    
    new_up = np.cross(right, forward)
    new_up = new_up / np.linalg.norm(new_up)
    
    mat = np.eye(4)
    mat[:3, 0] = right
    mat[:3, 1] = new_up
    mat[:3, 2] = -forward
    mat[:3, 3] = pos
    
    return mat

class Model3DDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.image_paths)
        
    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx])
        if self.transform:
            image = self.transform(image)
        return image, self.labels[idx]

class PartDetectionModel(nn.Module):
    def __init__(self, num_classes):
        super(PartDetectionModel, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(256 * 28 * 28, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(512, num_classes)
        )
        
    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        
        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            
        print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')
    
    return model

def main(fbx_path, output_dir):
    # First, convert FBX to a format that trimesh can read (e.g., OBJ or GLB)
    # You'll need to implement this conversion using a tool like Blender or other 3D converters
    mesh_path = convert_fbx_to_mesh(fbx_path)  # You need to implement this function
    
    # Process FBX file for labels
    fbx_processor = FBXProcessor(fbx_path)
    scene = fbx_processor.load_fbx()
    labels = fbx_processor.extract_labels()
    
    # Render views
    renderer = ModelRenderer()
    renderer.setup_camera()
    image_paths = renderer.render_viewpoints(mesh_path, output_dir=output_dir)
    
    # Prepare dataset
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                           std=[0.229, 0.224, 0.225])
    ])
    
    dataset = Model3DDataset(image_paths, labels, transform=transform)
    train_loader = DataLoader(dataset, batch_size=32, shuffle=True)
    
    # Train model
    model = PartDetectionModel(num_classes=len(set(labels)))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    trained_model = train_model(model, train_loader, criterion, optimizer)
    
    # Save model and labels
    torch.save(trained_model.state_dict(), os.path.join(output_dir, 'model.pth'))
    with open(os.path.join(output_dir, 'labels.json'), 'w') as f:
        json.dump(labels, f)

if __name__ == "__main__":
    # Install required packages
    install_requirements()
    
    fbx_path = "/Users/vyakaranamsowmya/Desktop/Bot-LLM-1/Zoho WorkDrive (1)/force.fbx"
    output_dir = "output"
    main(fbx_path, output_dir)

ModuleNotFoundError: No module named 'fbx'

In [18]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import trimesh
import pyrender
import os
from PIL import Image
import json
from torchvision import transforms
import pyassimp

def install_requirements():
    """
    Install required packages using pip
    """
    import subprocess
    packages = [
        'trimesh',
        'pyrender',
        'torch',
        'torchvision',
        'pillow',
        'numpy',
        'pyassimp'
    ]
    for package in packages:
        subprocess.check_call(['pip', 'install', package])

class ModelProcessor:
    def __init__(self, model_path):
        self.model_path = model_path
        
    def load_model(self):
        """Load 3D model using pyassimp"""
        return pyassimp.load(self.model_path)
        
    def extract_labels(self, scene):
        """Extract labels and annotations from 3D model"""
        labels = {}
        
        def process_node(node, parent_name=""):
            node_name = f"{parent_name}/{node.name}" if parent_name else node.name
            
            # Store position information
            position = node.transformation[:3, 3]  # Get translation component
            labels[node_name] = {
                "position": position.tolist(),
                "type": "node"
            }
            
            # Process child nodes
            for child in node.children:
                process_node(child, node_name)
                
        process_node(scene.rootnode)
        return labels

class ModelRenderer:
    def __init__(self):
        self.scene = pyrender.Scene()
        
    def setup_camera(self):
        """Setup camera for rendering"""
        camera = pyrender.PerspectiveCamera(yfov=np.pi / 3.0, aspectRatio=1.0)
        self.camera_node = pyrender.Node(camera=camera)
        self.scene.add_node(self.camera_node)
        
    def render_viewpoints(self, mesh_path, num_views=8, output_dir="renders"):
        """Render model from different viewpoints using pyrender"""
        os.makedirs(output_dir, exist_ok=True)
        images = []
        
        # Load mesh using trimesh and convert to pyrender
        mesh = trimesh.load(mesh_path)
        mesh = pyrender.Mesh.from_trimesh(mesh)
        mesh_node = pyrender.Node(mesh=mesh)
        self.scene.add_node(mesh_node)
        
        # Add light
        light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=5.0)
        light_node = pyrender.Node(light=light)
        self.scene.add_node(light_node)
        
        r = pyrender.OffscreenRenderer(viewport_width=640, viewport_height=480)
        
        for i in range(num_views):
            angle = (2.0 * np.pi * i) / num_views
            radius = 2.0  # Distance from center
            
            # Position camera
            cam_pos = np.array([
                radius * np.cos(angle),
                radius * np.sin(angle),
                1.0
            ])
            
            # Look at center
            cam_target = np.array([0.0, 0.0, 0.0])
            cam_up = np.array([0.0, 0.0, 1.0])
            
            # Compute camera matrix
            cam_matrix = look_at(cam_pos, cam_target, cam_up)
            self.scene.set_pose(self.camera_node, cam_matrix)
            
            # Render
            color, depth = r.render(self.scene)
            image = Image.fromarray(color)
            
            output_path = os.path.join(output_dir, f"view_{i}.png")
            image.save(output_path)
            images.append(output_path)
            
        r.delete()
        return images

# [Previous helper functions remain the same: look_at, Model3DDataset, PartDetectionModel, train_model]

def main(model_path, output_dir):
    # Process 3D model
    processor = ModelProcessor(model_path)
    scene = processor.load_model()
    labels = processor.extract_labels(scene)
    
    # Save the model in a format trimesh can read
    temp_mesh_path = os.path.join(output_dir, "temp_mesh.obj")
    pyassimp.export(scene, temp_mesh_path, "obj")
    
    # Render views
    renderer = ModelRenderer()
    renderer.setup_camera()
    image_paths = renderer.render_viewpoints(temp_mesh_path, output_dir=output_dir)
    
    # Clean up
    pyassimp.release(scene)
    
    # Prepare dataset
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                           std=[0.229, 0.224, 0.225])
    ])
    
    dataset = Model3DDataset(image_paths, labels, transform=transform)
    train_loader = DataLoader(dataset, batch_size=32, shuffle=True)
    
    # Train model
    model = PartDetectionModel(num_classes=len(labels))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    trained_model = train_model(model, train_loader, criterion, optimizer)
    
    # Save model and labels
    torch.save(trained_model.state_dict(), os.path.join(output_dir, 'model.pth'))
    with open(os.path.join(output_dir, 'labels.json'), 'w') as f:
        json.dump(labels, f)

if __name__ == "__main__":
    # Install required packages
    install_requirements()
    
    model_path = "/Users/vyakaranamsowmya/Desktop/Bot-LLM-1/Zoho WorkDrive (1)/force.fbx"
    output_dir = "output"
    main(model_path, output_dir)

AssimpError: assimp library not found

In [23]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import trimesh
import pyrender
import os
from PIL import Image
import json
from torchvision import transforms
import subprocess

def install_requirements():
    """
    Install required packages using pip
    """
    packages = [
        'trimesh',
        'pyrender',
        'torch',
        'torchvision',
        'pillow',
        'numpy',
        'fbx2gltf'
    ]
    for package in packages:
        subprocess.check_call(['pip', 'install', package])

class FBXConverter:
    def __init__(self, fbx_path):
        self.fbx_path = fbx_path
        
    def to_gltf(self, output_dir):
        """Convert FBX to GLTF format"""
        output_path = os.path.join(output_dir, 'model.gltf')
        os.makedirs(output_dir, exist_ok=True)
        
        try:
            from fbx2gltf import FBXtoGLTF
            converter = FBXtoGLTF()
            converter.convert(self.fbx_path, output_path)
        except Exception as e:
            print(f"Error converting FBX to GLTF: {e}")
            print("Attempting command line conversion...")
            
            # Fallback to command line tool if available
            try:
                subprocess.run(['FBX2glTF', '--input', self.fbx_path, '--output', output_path])
            except Exception as e:
                print(f"Command line conversion failed: {e}")
                raise
                
        return output_path

class ModelRenderer:
    def __init__(self):
        self.scene = pyrender.Scene()
        
    def setup_camera(self):
        """Setup camera for rendering"""
        camera = pyrender.PerspectiveCamera(yfov=np.pi / 3.0, aspectRatio=1.0)
        self.camera_node = pyrender.Node(camera=camera)
        self.scene.add_node(self.camera_node)
        
    def render_viewpoints(self, mesh_path, num_views=8, output_dir="renders"):
        """Render model from different viewpoints using pyrender"""
        os.makedirs(output_dir, exist_ok=True)
        images = []
        
        # Load mesh using trimesh and convert to pyrender
        try:
            mesh = trimesh.load(mesh_path)
            mesh = pyrender.Mesh.from_trimesh(mesh)
            mesh_node = pyrender.Node(mesh=mesh)
            self.scene.add_node(mesh_node)
        except Exception as e:
            print(f"Error loading mesh: {e}")
            raise
        
        # Add light
        light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=5.0)
        light_node = pyrender.Node(light=light)
        self.scene.add_node(light_node)
        
        r = pyrender.OffscreenRenderer(viewport_width=640, viewport_height=480)
        
        for i in range(num_views):
            angle = (2.0 * np.pi * i) / num_views
            radius = 2.0  # Distance from center
            
            # Position camera
            cam_pos = np.array([
                radius * np.cos(angle),
                radius * np.sin(angle),
                1.0
            ])
            
            # Look at center
            cam_target = np.array([0.0, 0.0, 0.0])
            cam_up = np.array([0.0, 0.0, 1.0])
            
            # Compute camera matrix
            cam_matrix = look_at(cam_pos, cam_target, cam_up)
            self.scene.set_pose(self.camera_node, cam_matrix)
            
            # Render
            color, depth = r.render(self.scene)
            image = Image.fromarray(color)
            
            output_path = os.path.join(output_dir, f"view_{i}.png")
            image.save(output_path)
            images.append(output_path)
            
        r.delete()
        return images

def look_at(pos, target, up):
    """Create look-at matrix for camera"""
    forward = np.array(target) - np.array(pos)
    forward = forward / np.linalg.norm(forward)
    
    right = np.cross(forward, up)
    right = right / np.linalg.norm(right)
    
    new_up = np.cross(right, forward)
    new_up = new_up / np.linalg.norm(new_up)
    
    mat = np.eye(4)
    mat[:3, 0] = right
    mat[:3, 1] = new_up
    mat[:3, 2] = -forward
    mat[:3, 3] = pos
    
    return mat

# [Previous Dataset and Model classes remain the same]
class Model3DDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.image_paths)
        
    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx])
        if self.transform:
            image = self.transform(image)
        return image, self.labels[idx]

class PartDetectionModel(nn.Module):
    def __init__(self, num_classes):
        super(PartDetectionModel, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(256 * 28 * 28, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(512, num_classes)
        )
        
    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

def main(fbx_path, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    
    # Convert FBX to GLTF
    converter = FBXConverter(fbx_path)
    gltf_path = converter.to_gltf(output_dir)
    
    # Extract basic metadata from the mesh
    mesh = trimesh.load(gltf_path)
    labels = {
        "num_vertices": len(mesh.vertices),
        "num_faces": len(mesh.faces),
        "bounds": mesh.bounds.tolist()
    }
    
    # Render views
    renderer = ModelRenderer()
    renderer.setup_camera()
    image_paths = renderer.render_viewpoints(gltf_path, output_dir=os.path.join(output_dir, "renders"))
    
    # Prepare dataset
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                           std=[0.229, 0.224, 0.225])
    ])
    
    dataset = Model3DDataset(image_paths, labels, transform=transform)
    train_loader = DataLoader(dataset, batch_size=32, shuffle=True)
    
    # Train model
    model = PartDetectionModel(num_classes=len(set(labels)))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    trained_model = train_model(model, train_loader, criterion, optimizer)
    
    # Save model and labels
    torch.save(trained_model.state_dict(), os.path.join(output_dir, 'model.pth'))
    with open(os.path.join(output_dir, 'labels.json'), 'w') as f:
        json.dump(labels, f)

if __name__ == "__main__":
    # Install required packages
    install_requirements()
    
    fbx_path = "/Users/vyakaranamsowmya/Desktop/Bot-LLM-1/Zoho WorkDrive (1)/force.fbx"
    output_dir = "output"
    main(fbx_path, output_dir)

Error converting FBX to GLTF: No module named 'fbx2gltf'
Attempting command line conversion...
Command line conversion failed: [Errno 2] No such file or directory: 'FBX2glTF'


FileNotFoundError: [Errno 2] No such file or directory: 'FBX2glTF'

In [25]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import trimesh
import pyrender
import os
from PIL import Image
import json
from torchvision import transforms

def install_requirements():
    """
    Install required packages using pip
    """
    import subprocess
    packages = [
        'trimesh',
        'pyrender',
        'torch',
        'torchvision',
        'pillow',
        'numpy',
    ]
    for package in packages:
        subprocess.check_call(['pip', 'install', package])

class ModelRenderer:
    def __init__(self):
        self.scene = pyrender.Scene()
        
    def setup_camera(self):
        """Setup camera for rendering"""
        camera = pyrender.PerspectiveCamera(yfov=np.pi / 3.0, aspectRatio=1.0)
        self.camera_node = pyrender.Node(camera=camera)
        self.scene.add_node(self.camera_node)
        
    def render_viewpoints(self, mesh_path, num_views=8, output_dir="renders"):
        """Render model from different viewpoints using pyrender"""
        os.makedirs(output_dir, exist_ok=True)
        images = []
        
        # Load mesh using trimesh and convert to pyrender
        mesh = trimesh.load(mesh_path)
        mesh = pyrender.Mesh.from_trimesh(mesh)
        mesh_node = pyrender.Node(mesh=mesh)
        self.scene.add_node(mesh_node)
        
        # Add light
        light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=5.0)
        light_node = pyrender.Node(light=light)
        self.scene.add_node(light_node)
        
        r = pyrender.OffscreenRenderer(viewport_width=640, viewport_height=480)
        
        for i in range(num_views):
            angle = (2.0 * np.pi * i) / num_views
            radius = 2.0  # Distance from center
            
            # Position camera
            cam_pos = np.array([
                radius * np.cos(angle),
                radius * np.sin(angle),
                1.0
            ])
            
            # Look at center
            cam_target = np.array([0.0, 0.0, 0.0])
            cam_up = np.array([0.0, 0.0, 1.0])
            
            # Compute camera matrix
            cam_matrix = look_at(cam_pos, cam_target, cam_up)
            self.scene.set_pose(self.camera_node, cam_matrix)
            
            # Render
            color, depth = r.render(self.scene)
            image = Image.fromarray(color)
            
            output_path = os.path.join(output_dir, f"view_{i}.png")
            image.save(output_path)
            images.append(output_path)
            
        r.delete()
        return images

def look_at(pos, target, up):
    """Create look-at matrix for camera"""
    forward = np.array(target) - np.array(pos)
    forward = forward / np.linalg.norm(forward)
    
    right = np.cross(forward, up)
    right = right / np.linalg.norm(right)
    
    new_up = np.cross(right, forward)
    new_up = new_up / np.linalg.norm(new_up)
    
    mat = np.eye(4)
    mat[:3, 0] = right
    mat[:3, 1] = new_up
    mat[:3, 2] = -forward
    mat[:3, 3] = pos
    
    return mat

class Model3DDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.image_paths)
        
    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx])
        if self.transform:
            image = self.transform(image)
        return image, self.labels[idx]

class PartDetectionModel(nn.Module):
    def __init__(self, num_classes):
        super(PartDetectionModel, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(256 * 28 * 28, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(512, num_classes)
        )
        
    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        
        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            
        print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')
    
    return model

def main(mesh_path, output_dir, labels):
    # Render views
    renderer = ModelRenderer()
    renderer.setup_camera()
    image_paths = renderer.render_viewpoints(mesh_path, output_dir=output_dir)
    
    # Prepare dataset
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                           std=[0.229, 0.224, 0.225])
    ])
    
    dataset = Model3DDataset(image_paths, labels, transform=transform)
    train_loader = DataLoader(dataset, batch_size=32, shuffle=True)
    
    # Train model
    model = PartDetectionModel(num_classes=len(set(labels)))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    trained_model = train_model(model, train_loader, criterion, optimizer)
    
    # Save model and labels
    torch.save(trained_model.state_dict(), os.path.join(output_dir, 'model.pth'))
    with open(os.path.join(output_dir, 'labels.json'), 'w') as f:
        json.dump(labels, f)

if __name__ == "__main__":
    # Install required packages
    install_requirements()
    
    # Path to your 3D model file (e.g., .obj or .glb)
    mesh_path = "/Users/vyakaranamsowmya/Desktop/Bot-LLM-1/power generation.obj"  # Change this to your model path
    
    # Output directory for rendered images and model
    output_dir = "output"
    
    # Labels (you can manually define these or load them from a file)
    labels = {
        "part1": 0,
        "part2": 1,
        # Add more parts as needed
    }
    
    main(mesh_path, output_dir, labels)



TypeError: Expected a Trimesh or a list, got a <class 'trimesh.scene.scene.Scene'>

In [3]:
import os
import bpy
import json
import torch
import numpy as np
import faiss
from PIL import Image
from transformers import (
    BlipProcessor, 
    BlipForConditionalGeneration,
    CLIPProcessor, 
    CLIPModel
)
from langchain.chat_models import ChatOpenAI
from langchain.schema import Document
from langchain.chains import RetrievalQA

# Configuration
MODEL_DIR = "model_data"
VIEWS_DIR = "rendered_views"
COMPONENTS_DIR = "components"
DESCRIPTIONS_FILE = "descriptions.json"

# Initialize AI models
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

def process_fbx(fbx_path):
    """Process FBX file to generate views and components"""
    # Remove default objects
    bpy.ops.object.delete()

    # Import FBX
    bpy.ops.import_scene.fbx(filepath=fbx_path)

    # Generate views (simplified example)
    camera_positions = [
        (5, 0, 0, (0, 0, 0)),   # Front
        (0, 5, 0, (0, 0, 90)),  # Right
        (0, 0, 5, (90, 0, 0)),  # Top
        (5, 5, 5, (45, 45, 45)) # Isometric
    ]
    
    for idx, (x, y, z, rot) in enumerate(camera_positions):
        # Setup camera and render
        # (Implementation details would go here)
        pass

    # Extract components
    components = [obj for obj in bpy.context.scene.objects if obj.type == 'MESH']
    for comp in components:
        # Isolate and render component
        # (Implementation details would go here)
        pass

def describe_image(image_path):
    """Generate text description of an image"""
    image = Image.open(image_path).convert('RGB')
    inputs = blip_processor(image, return_tensors="pt")
    output = blip_model.generate(**inputs)
    return blip_processor.decode(output[0], skip_special_tokens=True)

def create_knowledge_base():
    """Create multimodal knowledge base"""
    documents = []
    
    # Load component descriptions
    with open(DESCRIPTIONS_FILE) as f:
        components = json.load(f)
    
    # Process components
    for name, desc in components.items():
        # Text description
        text = f"Component {name}: {desc}"
        documents.append(create_document(text))
        
        # Component image
        img_path = os.path.join(COMPONENTS_DIR, f"{name}.png")
        if os.path.exists(img_path):
            caption = describe_image(img_path)
            documents.append(create_document(caption))

    # Process views
    for view in os.listdir(VIEWS_DIR):
        view_path = os.path.join(VIEWS_DIR, view)
        caption = describe_image(view_path)
        documents.append(create_document(caption))

    return build_faiss_index(documents)

def create_document(content):
    """Create FAISS document with CLIP embedding"""
    inputs = clip_processor(text=[content], return_tensors="pt", padding=True)
    features = clip_model.get_text_features(**inputs).detach().numpy()
    return Document(page_content=content, embedding=features[0])

def build_faiss_index(documents):
    """Build FAISS index from documents"""
    embeddings = np.array([doc.embedding for doc in documents])
    index = faiss.IndexFlatL2(embeddings.shape[1])
    index.add(embeddings)
    return index, documents

class QAEngine:
    def __init__(self, index, documents):
        self.index = index
        self.documents = documents
        self.llm = ChatOpenAI(temperature=0)

    def query(self, question):
        # Get relevant context
        query_embed = clip_processor(text=question, return_tensors="pt")
        features = clip_model.get_text_features(**query_embed).detach().numpy()
        _, indices = self.index.search(features, 3)
        
        # Build context
        context = "\n".join(
            [self.documents[i].page_content for i in indices[0]]
        )
        
        # Generate answer
        prompt = f"""Answer based on this context:
        {context}
        
        Question: {question}
        Answer:"""
        
        return self.llm.predict(prompt)

# Usage pipeline
if __name__ == "__main__":
    # Step 1: Process FBX file
    process_fbx("Zoho WorkDrive/force.fbx")
    
    # Step 2: Create knowledge base
    index, docs = create_knowledge_base()
    
    # Step 3: Initialize QA engine
    qa_engine = QAEngine(index, docs)
    
    # Example question
    print(qa_engine.query("Explain the main functionality of this assembly"))

ImportError: dlopen(/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/bpy/__init__.so, 0x0002): Library not loaded: @rpath/liboslexec.dylib
  Referenced from: <66A7D7D7-E9C7-3AE9-AA4C-8CD00CEA36C9> /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/bpy/__init__.so
  Reason: tried: '/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/bpy/lib/liboslexec.dylib' (no such file), '/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/bpy/lib/liboslexec.dylib' (no such file)

In [2]:
%pip install bpy

[33mDEPRECATION: Loading egg at /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/jupyter-1.0.0-py3.11.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330[0m[33m
[0mCollecting bpy
  Downloading bpy-4.3.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (1.4 kB)
Collecting zstandard (from bpy)
  Downloading zstandard-0.23.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (3.0 kB)
Downloading bpy-4.3.0-cp311-cp311-macosx_11_0_arm64.whl (217.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m217.2/217.2 MB[0m [31m996.8 kB/s[0m eta [36m0:00:00[0m0:01[0m00:03[0m
[?25hDownloading zstandard-0.23.0-cp311-cp311-macosx_11_0_arm64.whl (633 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m633.7/633.7 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected pack