llvm · Groverkss · Aug 31, 2025 · Sep 28, 2025 · Sep 28, 2025 · Sep 28, 2025
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+build
+.cache
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,6 @@
+[submodule "third_party/llvm-project"]
+	path = third_party/llvm-project
+	url = https://github.com/llvm/llvm-project.git
+[submodule "third_party/torch-mlir"]
+	path = third_party/torch-mlir
+	url = https://github.com/llvm/torch-mlir.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,170 @@
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+cmake_minimum_required(VERSION 3.21...3.24)
+
+# LLVM requires CMP0116 for tblgen: https://reviews.llvm.org/D101083
+# CMP0116: Ninja generators transform `DEPFILE`s from `add_custom_command()`
+# New in CMake 3.20. https://cmake.org/cmake/help/latest/policy/CMP0116.html
+set(CMAKE_POLICY_DEFAULT_CMP0116 OLD)
+if(POLICY CMP0116)
+  cmake_policy(SET CMP0116 OLD)
+endif()
+
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
+project(LIGHTHOUSE C CXX)
+
+set(CMAKE_C_STANDARD 11)
+set(CMAKE_CXX_STANDARD 17)
+
+#-------------------------------------------------------------------------------
+# Input Dialects
+#-------------------------------------------------------------------------------
+
+option(LIGHTHOUSE_INPUT_TORCH "Builds support for compiling Torch MLIR programs" ON)
+
+#-------------------------------------------------------------------------------
+# Python
+# If building features that require Python development, find them early in
+# one invocation (some CMake versions are sensitive to resolving out of order).
+#-------------------------------------------------------------------------------
+option(LIGHTHOUSE_BUILD_PYTHON_BINDINGS "Builds the Lighthouse python bindings" ON)
+
+if (LIGHTHOUSE_BUILD_PYTHON_BINDINGS)
+  # After CMake 3.18, we are able to limit the scope of the search to just
+  # Development.Module. Searching for Development will fail in situations where
+  # the Python libraries are not available. When possible, limit to just
+  # Development.Module.
+  # See https://pybind11.readthedocs.io/en/stable/compiling.html#findpython-mode
+  #
+  # Configuring the Development.Module is flaky in multi-project setups.
+  # "Bootstrapping" by first looking for the optional Development component
+  # seems to be robust generally.
+  # See: https://reviews.llvm.org/D118148
+  # If building Python packages, we have a hard requirement on 3.9+.
+  find_package(Python3 3.9 COMPONENTS Interpreter Development)
+  find_package(Python3 3.9 COMPONENTS Interpreter Development.Module REQUIRED)
+  # Some parts of the build use FindPython instead of FindPython3. Why? No
+  # one knows, but they are different. So make sure to bootstrap this one too.
+  # Not doing this here risks them diverging, which on multi-Python systems,
+  # can be troublesome. Note that nanobind requires FindPython.
+  set(Python_EXECUTABLE "${Python3_EXECUTABLE}")
+  find_package(Python 3.9 COMPONENTS Interpreter Development.Module REQUIRED)
+endif()
+
+if(NOT "${Python_EXECUTABLE}" STREQUAL "${Python3_EXECUTABLE}")
+  message(WARNING "FindPython and FindPython3 found different executables. You may need to pin -DPython_EXECUTABLE and -DPython3_EXECUTABLE (${Python_EXECUTABLE} vs ${Python3_EXECUTABLE})")
+endif()
+
+#-------------------------------------------------------------------------------
+# MLIR/LLVM Dependency
+#-------------------------------------------------------------------------------
+
+# Both the Lighthouse and MLIR Python bindings require nanobind. We initialize
+# it here at the top level so that everything uses ours consistently.
+if(LIGHTHOUSE_BUILD_PYTHON_BINDINGS)
+  # TODO: Configure a flag to use system deps directly.
+  include(FetchContent)
+  FetchContent_Declare(
+      nanobind
+      GIT_REPOSITORY https://github.com/wjakob/nanobind.git
+      GIT_TAG        0f9ce749b257fdfe701edb3cf6f7027ba029434a # v2.4.0
+  )
+  FetchContent_MakeAvailable(nanobind)
+endif()
+
+# Get the main LLVM deps.
+# TODO: Add a flag to use installed llvm instead.
+# TODO: There are a lot of default flags that are missing here. The build
+# system was only configured enough to build mlir python bindings.
+
+# Enable MLIR Python bindings if requested.
+set(MLIR_ENABLE_BINDINGS_PYTHON OFF CACHE BOOL "")
+if (LIGHTHOUSE_BUILD_PYTHON_BINDINGS)
+  set(MLIR_ENABLE_BINDINGS_PYTHON ON)
+endif()
+# Disable MLIR attempting to configure Python dev packages. We take care of
+# that in Lighthouse as a super-project.
+set(MLIR_DISABLE_CONFIGURE_PYTHON_DEV_PACKAGES ON CACHE BOOL "" FORCE)
+
+# LLVM defaults to building all targets. We always enable targets that we need
+# as we need them, so default to none. The user can override this as needed,
+# which is fine.
+set(LLVM_TARGETS_TO_BUILD "" CACHE STRING "")
+
+# These defaults are moderately important to us, but the user *can*
+# override them (enabling some of these brings in deps that will conflict,
+# so ymmv).
+set(LLVM_INCLUDE_EXAMPLES OFF CACHE BOOL "")
+set(LLVM_INCLUDE_TESTS OFF CACHE BOOL "")
+set(LLVM_INCLUDE_BENCHMARKS OFF CACHE BOOL "")
+set(LLVM_APPEND_VC_REV OFF CACHE BOOL "")
+set(LLVM_ENABLE_IDE ON CACHE BOOL "")
+set(LLVM_ENABLE_BINDINGS OFF CACHE BOOL "")
+
+# Force LLVM to avoid dependencies, which we don't ever really want in our
+# limited builds.
+set(LLVM_ENABLE_LIBEDIT OFF CACHE BOOL "Default disable")
+set(LLVM_ENABLE_LIBXML2 OFF CACHE BOOL "Default disable")
+set(LLVM_ENABLE_TERMINFO OFF CACHE BOOL "Default disable")
+set(LLVM_ENABLE_ZLIB OFF CACHE BOOL "Default disable")
+set(LLVM_ENABLE_ZSTD OFF CACHE BOOL "Default disable")
+set(LLVM_FORCE_ENABLE_STATS ON CACHE BOOL "Default enable")
+
+# Unconditionally enable mlir.
+set(LLVM_ENABLE_PROJECTS "" CACHE STRING "")
+list(APPEND LLVM_ENABLE_PROJECTS "mlir")
+
+# Setup LLVM lib and bin directories.
+set(LLVM_LIBRARY_OUTPUT_INTDIR "${CMAKE_BINARY_DIR}/llvm-project/lib")
+set(LLVM_RUNTIME_OUTPUT_INTDIR "${CMAKE_BINARY_DIR}/llvm-project/bin")
+
+set(_BUNDLED_LLVM_CMAKE_SOURCE_SUBDIR "third_party/llvm-project/llvm")
+add_subdirectory("${_BUNDLED_LLVM_CMAKE_SOURCE_SUBDIR}" "llvm-project" EXCLUDE_FROM_ALL)
+
+# Set some CMake variables that mirror things exported in the find_package
+# world. Source of truth for these is in an installed LLVMConfig.cmake,
+# MLIRConfig.cmake, LLDConfig.cmake (etc) and in the various standalone
+# build segments of each project's top-level CMakeLists.
+set(LLVM_CMAKE_DIR "${CMAKE_BINARY_DIR}/llvm-project/lib/cmake/llvm")
+list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}")
+# TODO: Fix MLIR upstream so it doesn't spew into the containing project
+# binary dir. See mlir/cmake/modules/CMakeLists.txt
+# (and other LLVM sub-projects).
+set(MLIR_CMAKE_DIR "${CMAKE_BINARY_DIR}/lib/cmake/mlir")
+list(APPEND CMAKE_MODULE_PATH "${MLIR_CMAKE_DIR}")
+
+message(STATUS "LLVM_CMAKE_DIR: ${LLVM_CMAKE_DIR}")
+message(STATUS "MLIR_CMAKE_DIR: ${MLIR_CMAKE_DIR}")
+
+set(LLVM_INCLUDE_DIRS
+  ${CMAKE_SOURCE_DIR}/${_BUNDLED_LLVM_CMAKE_SOURCE_SUBDIR}/include
+  ${CMAKE_BINARY_DIR}/llvm-project/include
+)
+set(MLIR_INCLUDE_DIRS
+  ${CMAKE_SOURCE_DIR}/third_party/llvm-project/mlir/include
+  ${CMAKE_BINARY_DIR}/llvm-project/tools/mlir/include
+)
+
+include_directories(${LLVM_INCLUDE_DIRS})
+include_directories(${MLIR_INCLUDE_DIRS})
+
+#-------------------------------------------------------------------------------
+# Torch-MLIR Dependency
+#-------------------------------------------------------------------------------
+
+if (LIGHTHOUSE_INPUT_TORCH)
+  set(TORCH_MLIR_ROOT_DIR "${CMAKE_SOURCE_DIR}/third_party/torch-mlir" CACHE PATH "")
+endif()
+
+#-------------------------------------------------------------------------------
+# Top-level libraries
+#-------------------------------------------------------------------------------
+
+include_directories(${CMAKE_SOURCE_DIR}/include)
+include_directories(${CMAKE_BINARY_DIR}/include)
+
+add_subdirectory(lib)
+add_subdirectory(python)
diff --git a/include/lighthouse-c/Init.h b/include/lighthouse-c/Init.h
@@ -0,0 +1,17 @@
+#ifndef LIGHTHOUSE_INIT_C_H
+#define LIGHTHOUSE_INIT_C_H
+
+#include "mlir-c/IR.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/// Initializes poseidon.
+MLIR_CAPI_EXPORTED void lighthouseRegisterDialects(MlirDialectRegistry registry);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // LIGHTHOUSE_INIT_C_H
diff --git a/lib/CAPI/CAPI.cpp b/lib/CAPI/CAPI.cpp
@@ -0,0 +1,9 @@
+#include "lighthouse-c/Init.h"
+#include "mlir-c/Dialect/Func.h"
+
+MLIR_CAPI_EXPORTED void lighthouseRegisterDialects(MlirDialectRegistry registry) {
+  // TODO: Probably have a function call to the C++ lib to register things
+  // here. This is just a placeholder for now.
+  MlirDialectHandle funcDialect = mlirGetDialectHandle__func__();
+  mlirDialectHandleInsertDialect(funcDialect, registry);
+}
diff --git a/lib/CAPI/CMakeLists.txt b/lib/CAPI/CMakeLists.txt
@@ -0,0 +1,40 @@
+# Based on IREE's CAPI CMakeLists.txt :
+# https://github.com/iree-org/iree/blob/39c572deb0d388eb86d78295896a6e38835ccc8c/compiler/src/iree/compiler/API/CMakeLists.txt
+set(C_LIBS
+  MLIRCAPIIR
+  MLIRCAPIDebug
+  MLIRCAPIFunc
+  MLIRCAPIInterfaces
+  MLIRCAPITransforms
+)
+
+foreach (lib ${C_LIBS})
+  list(APPEND _OBJECTS $<TARGET_OBJECTS:obj.${lib}>)
+  # GENEX_EVAL is not recursive: it evaluates one level of generator
+  # expressions. There is a way to hold it just right so that GENEX_EVAL is
+  # used at the call site, but it is really tricky and people can't be expected
+  # to get it right. In fact, at the time of this writing, some misc usage
+  # in LLVM causes a second level of generator expressions in some base
+  # libraries. For good measure, we do GENEX_EVAL four times. If you get errors
+  # that show like generator expressions are showing up in link lines, this is
+  # the culprit. Look at the export_objects_debug.txt to confirm. Then, add
+  # another level of fix upstream if you like pain.
+  list(APPEND _DEPS "$<GENEX_EVAL:$<GENEX_EVAL:$<GENEX_EVAL:$<GENEX_EVAL:$<TARGET_PROPERTY:${lib},LINK_LIBRARIES>>>>>")
+endforeach ()
+
+set(CMAKE_CXX_VISIBILITY_PRESET "hidden")
+set(CMAKE_VISIBILITY_INLINES_HIDDEN "ON")
+
+add_library(Lighthouse-C SHARED
+  CAPI.cpp
+)
+
+target_sources(Lighthouse-C PRIVATE
+  ${_OBJECTS}
+)
+
+target_link_libraries(Lighthouse-C PRIVATE
+  ${_DEPS}
+)
+
+target_link_options(Lighthouse-C PRIVATE "-Wl,-exclude-libs,ALL")
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(CAPI)
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
@@ -0,0 +1,60 @@
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+################################################################################
+# This directly uses the LLVM build system in order to create bundled API
+# binaries that are consistent with LLVM. Consult upstream CMake macros if you
+# don't understand what this does.
+################################################################################
+
+include(AddMLIRPython)
+
+# Specifies that all MLIR packages are co-located under lighthouse.
+# TODO: Currently, we are building this under lighthouse. , but we should be
+# ideally building this under lighthouse.compiler. , when we have a proper
+# seperation between the compiler and the runtime. The runtime bindings have no
+# reason to ship with the mlir python bindings.
+# TODO: Add an upstream cmake param for this vs having a global here.
+add_compile_definitions("MLIR_PYTHON_PACKAGE_PREFIX=lighthouse.")
+
+set(_PYTHON_BUILD_PREFIX "${CMAKE_BINARY_DIR}/python")
+set(_PYTHON_INSTALL_PREFIX "python_packages/lighthouse")
+
+set(CMAKE_PLATFORM_NO_VERSIONED_SONAME 1)
+
+declare_mlir_python_sources(LighthousePythonSources)
+
+# The Python bindings are monolithic and we don't have a good way for the
+# torch plugin to contribute Python sources, so we just gate it here
+# versus having more complicated indirection. May want to rethink this
+# if others need it.
+if(LIGHTHOUSE_INPUT_TORCH)
+  declare_mlir_python_sources(LighthousePythonSources.Torch.Importers
+    ADD_TO_PARENT LighthousePythonSources
+    ROOT_DIR "${TORCH_MLIR_ROOT_DIR}/python/torch_mlir"
+    SOURCES
+      extras/fx_importer.py
+      extras/onnx_importer.py
+  )
+endif()
+
+set(_SOURCE_COMPONENTS
+  # Local sources.
+  LighthousePythonSources
+
+  MLIRPythonSources.Core
+
+  # TODO: We currently include func, because that's what the torch importer
+  # needs. We can include more as we need.
+  # MLIR Dialects.
+  MLIRPythonSources.Dialects.func
+)
+
+add_mlir_python_modules(LighthousePythonModules
+  ROOT_PREFIX "${_PYTHON_BUILD_PREFIX}/lighthouse"
+  INSTALL_PREFIX "${_PYTHON_INSTALL_PREFIX}/lighthouse"
+  DECLARED_SOURCES ${_SOURCE_COMPONENTS}
+  COMMON_CAPI_LINK_LIBS
+    Lighthouse-C
+)
diff --git a/third_party/llvm-project b/third_party/llvm-project
diff --git a/third_party/torch-mlir b/third_party/torch-mlir