# Script to convert .avi to .mp4 lowers the birate and resizes it to 1024x1024 (adds black bars)

In [3]:
import os
import subprocess
from pathlib import Path

# List of full input video paths
video_files = [
	"/Volumes/T7/Dataset_paper/Vacuum_pump/01/Full_recordings/01_run1_cam_2.avi",
	"/Volumes/T7/Dataset_paper/Vacuum_pump/01/Full_recordings/01_run1_cam_3.avi",
	"/Volumes/T7/Dataset_paper/Vacuum_pump/01/Full_recordings/01_run2_cam_2.avi",
	"/Volumes/T7/Dataset_paper/Vacuum_pump/01/Full_recordings/01_run2_cam_3.avi",
	"/Volumes/T7/Dataset_paper/Vacuum_pump/01/Full_recordings/01_run3_cam_2.avi",
	"/Volumes/T7/Dataset_paper/Vacuum_pump/01/Full_recordings/01_run3_cam_3.avi"
]

# Get the directory of the running notebook
notebook_dir = Path().resolve()
output_dir = notebook_dir / "training" / "videos"

# Create output directory if it doesn't exist
output_dir.mkdir(parents=True, exist_ok=True)

# Output filename suffix
output_suffix = "_1024x1024_15fps_3mbps"

for video in video_files:
	video_path = Path(video)
	input_name = video_path.stem  # filename without extension
	output_file = output_dir / f"{input_name}{output_suffix}.mp4"

	command = [
		"ffmpeg",
		"-i", str(video_path),
		"-vf", "scale=w=1024:h=1024:force_original_aspect_ratio=decrease,pad=1024:1024:(ow-iw)/2:(oh-ih)/2:black",
		"-r", "15",				 # Reduce FPS to 15
		"-c:v", "libx264",		 
		"-b:v", "2M",			   # Set video bitrate to 2 Mbps
		"-preset", "slow",		 
		"-y",					   
		str(output_file)
	]

	print(f"Processing: {video_path} → {output_file}")
	subprocess.run(command, check=True)

print("✅ All videos processed (1024x1024, 15fps, 3Mbps) and saved in ./training/videos/")

Processing: /Volumes/T7/Dataset_paper/Vacuum_pump/01/Full_recordings/01_run1_cam_2.avi → /Users/georgye/Documents/repos/ethz/dslab25/training/videos/01_run1_cam_2_1024x1024_15fps_3mbps.mp4


ffmpeg version 7.1 Copyright (c) 2000-2024 the FFmpeg developers
  built with Apple clang version 16.0.0 (clang-1600.0.26.4)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/7.1_4 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --e

Processing: /Volumes/T7/Dataset_paper/Vacuum_pump/01/Full_recordings/01_run1_cam_3.avi → /Users/georgye/Documents/repos/ethz/dslab25/training/videos/01_run1_cam_3_1024x1024_15fps_3mbps.mp4


[out#0/mp4 @ 0x132f247d0] video:30182KiB audio:0KiB subtitle:0KiB other streams:0KiB global headers:0KiB muxing overhead: 0.076143%
frame= 1856 fps= 72 q=-1.0 Lsize=   30205KiB time=00:02:03.60 bitrate=2001.9kbits/s dup=0 drop=1854 speed=4.78x    
[libx264 @ 0x132f25c70] frame I:8     Avg QP:11.99  size: 88796
[libx264 @ 0x132f25c70] frame P:468   Avg QP:15.74  size: 32013
[libx264 @ 0x132f25c70] frame B:1380  Avg QP:18.02  size: 11024
[libx264 @ 0x132f25c70] consecutive B-frames:  0.9%  0.0%  0.0% 99.1%
[libx264 @ 0x132f25c70] mb I  I16..4: 30.2% 51.6% 18.2%
[libx264 @ 0x132f25c70] mb P  I16..4:  1.8%  5.5%  0.7%  P16..4: 37.0% 18.2% 10.2%  0.0%  0.0%    skip:26.6%
[libx264 @ 0x132f25c70] mb B  I16..4:  0.4%  0.8%  0.1%  B16..8: 29.2%  7.2%  1.2%  direct:12.0%  skip:49.1%  L0:55.4% L1:35.7% BI: 8.9%
[libx264 @ 0x132f25c70] final ratefactor: 19.59
[libx264 @ 0x132f25c70] 8x8 transform intra:64.4% inter:54.7%
[libx264 @ 0x132f25c70] direct mvs  spatial:99.4% temporal:0.6%
[libx264 @ 0x1

Processing: /Volumes/T7/Dataset_paper/Vacuum_pump/01/Full_recordings/01_run2_cam_2.avi → /Users/georgye/Documents/repos/ethz/dslab25/training/videos/01_run2_cam_2_1024x1024_15fps_3mbps.mp4


ffmpeg version 7.1 Copyright (c) 2000-2024 the FFmpeg developers
  built with Apple clang version 16.0.0 (clang-1600.0.26.4)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/7.1_4 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --e

Processing: /Volumes/T7/Dataset_paper/Vacuum_pump/01/Full_recordings/01_run2_cam_3.avi → /Users/georgye/Documents/repos/ethz/dslab25/training/videos/01_run2_cam_3_1024x1024_15fps_3mbps.mp4


[out#0/mp4 @ 0x157e119e0] video:29796KiB audio:0KiB subtitle:0KiB other streams:0KiB global headers:0KiB muxing overhead: 0.076146%
frame= 1833 fps= 78 q=-1.0 Lsize=   29819KiB time=00:02:02.06 bitrate=2001.2kbits/s dup=0 drop=1831 speed=5.16x    
[libx264 @ 0x157e12e50] frame I:8     Avg QP:11.97  size: 88304
[libx264 @ 0x157e12e50] frame P:462   Avg QP:15.65  size: 32067
[libx264 @ 0x157e12e50] frame B:1363  Avg QP:17.87  size: 10997
[libx264 @ 0x157e12e50] consecutive B-frames:  0.8%  0.1%  0.0% 99.1%
[libx264 @ 0x157e12e50] mb I  I16..4: 30.4% 50.7% 18.9%
[libx264 @ 0x157e12e50] mb P  I16..4:  1.9%  5.6%  0.7%  P16..4: 37.0% 18.1% 10.1%  0.0%  0.0%    skip:26.6%
[libx264 @ 0x157e12e50] mb B  I16..4:  0.4%  0.8%  0.1%  B16..8: 29.0%  7.2%  1.2%  direct:12.1%  skip:49.1%  L0:55.4% L1:35.8% BI: 8.9%
[libx264 @ 0x157e12e50] final ratefactor: 19.60
[libx264 @ 0x157e12e50] 8x8 transform intra:64.3% inter:54.8%
[libx264 @ 0x157e12e50] direct mvs  spatial:99.4% temporal:0.6%
[libx264 @ 0x1

Processing: /Volumes/T7/Dataset_paper/Vacuum_pump/01/Full_recordings/01_run3_cam_2.avi → /Users/georgye/Documents/repos/ethz/dslab25/training/videos/01_run3_cam_2_1024x1024_15fps_3mbps.mp4


[out#0/mp4 @ 0x14b7246e0] video:29734KiB audio:0KiB subtitle:0KiB other streams:0KiB global headers:0KiB muxing overhead: 0.076332%
frame= 1833 fps= 84 q=-1.0 Lsize=   29757KiB time=00:02:02.06 bitrate=1997.0kbits/s dup=0 drop=1829 speed=5.61x    
[libx264 @ 0x14b725590] frame I:8     Avg QP:11.93  size: 73847
[libx264 @ 0x14b725590] frame P:463   Avg QP:16.27  size: 29034
[libx264 @ 0x14b725590] frame B:1362  Avg QP:18.06  size: 12051
[libx264 @ 0x14b725590] consecutive B-frames:  0.9%  0.1%  0.2% 98.9%
[libx264 @ 0x14b725590] mb I  I16..4: 30.0% 57.2% 12.8%
[libx264 @ 0x14b725590] mb P  I16..4:  2.1%  9.0%  0.7%  P16..4: 37.8% 15.3%  8.2%  0.0%  0.0%    skip:26.8%
[libx264 @ 0x14b725590] mb B  I16..4:  0.5%  1.8%  0.1%  B16..8: 30.3%  7.8%  1.2%  direct:11.9%  skip:46.5%  L0:55.0% L1:36.8% BI: 8.2%
[libx264 @ 0x14b725590] final ratefactor: 19.39
[libx264 @ 0x14b725590] 8x8 transform intra:74.1% inter:61.7%
[libx264 @ 0x14b725590] direct mvs  spatial:99.4% temporal:0.6%
[libx264 @ 0x1

Processing: /Volumes/T7/Dataset_paper/Vacuum_pump/01/Full_recordings/01_run3_cam_3.avi → /Users/georgye/Documents/repos/ethz/dslab25/training/videos/01_run3_cam_3_1024x1024_15fps_3mbps.mp4


frame= 1795 fps= 81 q=23.0 size=   29184KiB time=00:01:59.53 bitrate=2000.1kbits/s dup=0 drop=1829 speed=5.39x    

✅ All videos processed (1024x1024, 15fps, 3Mbps) and saved in ./training/videos/


[out#0/mp4 @ 0x15a72f630] video:29787KiB audio:0KiB subtitle:0KiB other streams:0KiB global headers:0KiB muxing overhead: 0.076327%
frame= 1833 fps= 81 q=-1.0 Lsize=   29810KiB time=00:02:02.06 bitrate=2000.5kbits/s dup=0 drop=1829 speed=5.39x    
[libx264 @ 0x15a730470] frame I:8     Avg QP:11.99  size: 87637
[libx264 @ 0x15a730470] frame P:462   Avg QP:15.72  size: 32020
[libx264 @ 0x15a730470] frame B:1363  Avg QP:17.99  size: 11010
[libx264 @ 0x15a730470] consecutive B-frames:  0.8%  0.1%  0.0% 99.1%
[libx264 @ 0x15a730470] mb I  I16..4: 30.1% 51.8% 18.2%
[libx264 @ 0x15a730470] mb P  I16..4:  1.9%  5.7%  0.7%  P16..4: 36.9% 18.0% 10.2%  0.0%  0.0%    skip:26.6%
[libx264 @ 0x15a730470] mb B  I16..4:  0.4%  0.8%  0.1%  B16..8: 29.2%  7.2%  1.2%  direct:11.9%  skip:49.0%  L0:55.6% L1:35.5% BI: 8.9%
[libx264 @ 0x15a730470] final ratefactor: 19.61
[libx264 @ 0x15a730470] 8x8 transform intra:64.8% inter:54.9%
[libx264 @ 0x15a730470] direct mvs  spatial:99.4% temporal:0.6%
[libx264 @ 0x1

# Blender part

In [6]:
import bpy
import math
import mathutils
import random
import os


# Enable STL import addon if not already enabled
bpy.ops.preferences.addon_enable(module="io_mesh_stl")

# Clear the scene (delete all objects)
def clear_scene():
	bpy.ops.object.select_all(action='SELECT')
	bpy.ops.object.delete(use_global=False)
	# Also clear all materials and meshes if necessary
	for block in bpy.data.meshes:
		bpy.data.meshes.remove(block)
	for block in bpy.data.materials:
		bpy.data.materials.remove(block)

# Create a ground plane with a white material
def setup_ground():
	bpy.ops.mesh.primitive_plane_add(size=10, location=(0, 0, 0))
	plane = bpy.context.active_object
	mat = bpy.data.materials.new(name="GroundMaterial")
	# RGBA: white color
	mat.diffuse_color = (1.0, 1.0, 1.0, 1.0)
	plane.data.materials.append(mat)
	return plane

# Set up lights: one main area light and eight secondary point lights arranged in a circle
def setup_lights():
	lights = []
	scene = bpy.context.scene
	# Main light: an area light positioned above the center (simulating an LED panel)
	bpy.ops.object.light_add(type='AREA', location=(0, 0, 5))
	main_light = bpy.context.active_object
	main_light.data.energy = 2500  # Adjust as needed (energy units in Blender)
	main_light.data.size = 5	   # Size of the area light
	lights.append(main_light)
	
	# Eight secondary lights arranged in a circle around the object
	num_secondary = 8
	radius = 5  # Radius of the circle where lights are placed
	for i in range(num_secondary):
		angle = 2 * math.pi * i / num_secondary
		x = radius * math.cos(angle)
		y = radius * math.sin(angle)
		z = 5  # same height as the main light
		bpy.ops.object.light_add(type='POINT', location=(x, y, z))
		light = bpy.context.active_object
		light.data.energy = 400  # Lower energy for secondary lights
		lights.append(light)
	return lights

# Import the STL file and return the imported object
def import_stl(filepath):
	bpy.ops.import_mesh.stl(filepath=filepath)
	obj = bpy.context.selected_objects[0]
	return obj

# Create a camera if not already present
def setup_camera():
	bpy.ops.object.camera_add(location=(0, -7, 3))
	cam = bpy.context.active_object
	cam.data.lens = 35  # set focal length (can be adjusted)
	return cam

# Simulate a drop by placing the object high, applying random rotation, adding rigid body physics,
# and letting gravity act for a few frames. Then remove the physics and keep the final transform.
def simulate_drop(obj):
	scene = bpy.context.scene
	# Remove existing rigid body if present
	if obj.rigid_body:
		bpy.ops.rigidbody.object_remove()
	
	# Set initial position high above the ground and apply a random rotation
	obj.location = (0, 0, 5)
	obj.rotation_euler = (
		random.uniform(0, math.pi),
		random.uniform(0, math.pi),
		random.uniform(0, math.pi)
	)
	
	# Add rigid body physics
	bpy.ops.rigidbody.object_add()
	obj.rigid_body.type = 'ACTIVE'
	obj.rigid_body.use_deactivation = False  # keep simulation active
	
	# Run the simulation for a number of frames (here, 50 frames as an example)
	start_frame = scene.frame_current
	end_frame = start_frame + 50
	for frame in range(start_frame, end_frame):
		scene.frame_set(frame)
	
	# Record the final transformation after simulation
	final_loc = obj.location.copy()
	final_rot = obj.rotation_euler.copy()
	
	# Remove rigid body physics and set object to the final state
	bpy.ops.rigidbody.object_remove()
	obj.location = final_loc
	obj.rotation_euler = final_rot
	
	return final_loc, final_rot

# Render images from multiple camera positions (4 positions around the object)
def render_views(obj, cam, output_dir, drop_index):
	scene = bpy.context.scene
	num_views = 4
	for i in range(num_views):
		angle = 2 * math.pi * i / num_views
		# Place camera at a fixed radius around the object
		radius = 7
		cam_x = radius * math.cos(angle)
		cam_y = radius * math.sin(angle)
		cam_z = 3  # Adjust camera height as needed
		cam.location = (cam_x, cam_y, cam_z)
		
		# Point the camera toward the object’s center
		direction = mathutils.Vector(obj.location) - cam.location
		rot_quat = direction.to_track_quat('-Z', 'Y')
		cam.rotation_euler = rot_quat.to_euler()
		
		# Set the output filepath for this view
		filename = f"drop_{drop_index:02d}_view_{i+1}.jpg"
		scene.render.filepath = os.path.join(output_dir, filename)
		bpy.ops.render.render(write_still=True)

def main():
	# Settings – adjust these paths to your environment
	# Get the current directory where this script is located
	current_dir = os.getcwd()
	stl_filepath = os.path.join(current_dir, "training/vacuum_pump/stl/Vacuum_pump.stl")   # STL file in the same directory
	output_dir = os.path.join(current_dir, "training/vacuum_pump/generated")  # Output directory in the same directory
	if not os.path.exists(output_dir):
		os.makedirs(output_dir)
		
	# Clear the scene
	clear_scene()
	
	# Set up the ground plane and lights
	setup_ground()
	setup_lights()
	
	# Import the STL model
	obj = import_stl(stl_filepath)
	
	# Create a camera
	cam = setup_camera()
	
	# Set render resolution
	scene = bpy.context.scene
	scene.render.resolution_x = 256
	scene.render.resolution_y = 256
	
	# Number of simulated drops and camera views per drop:
	num_drops = 64  # as per paper (64 drops * 4 views = 256 images per part)
	
	for drop in range(num_drops):
		# Simulate a drop to get a realistic orientation
		simulate_drop(obj)
		# Render images from 4 camera perspectives for this drop
		render_views(obj, cam, output_dir, drop)
	
	print("Rendering complete!")

# Run the script when executed in Blender
if __name__ == "__main__":
	main()

ModuleNotFoundError: No module named 'bpy'