diff --git a/.github/workflows/cpp_tests.yaml b/.github/workflows/cpp_tests.yaml
index 6ae56c70f..5e31babc7 100644
--- a/.github/workflows/cpp_tests.yaml
+++ b/.github/workflows/cpp_tests.yaml
@@ -34,12 +34,12 @@ jobs:
           python-version: '3.12'
       - name: Update pip
         run: python -m pip install --upgrade pip
-      - name: Install dependencies
+      - name: Install torch dependencies
         run: |
           python -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
-      - name: Install ffmpeg and pkg-config
+      - name: Install ffmpeg, pkg-config and pybind11
         run: |
-          conda install "ffmpeg=${{ matrix.ffmpeg-version-for-tests }}" pkg-config -c conda-forge
+          conda install "ffmpeg=${{ matrix.ffmpeg-version-for-tests }}" pkg-config pybind11 -c conda-forge
           ffmpeg -version
       - name: Build and run C++ tests
         run: |
diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 89e8401de..60bfbfa2e 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -38,6 +38,7 @@ jobs:
       test-infra-repository: pytorch/test-infra
       test-infra-ref: main
       build-matrix: ${{ needs.generate-matrix.outputs.matrix }}
+      pre-script: packaging/pre_build_script.sh
       post-script: packaging/post_build_script.sh
       smoke-test-script: packaging/fake_smoke_test.py
       package-name: torchcodec
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
index 71cc071c8..c156a833c 100644
--- a/.github/workflows/lint.yaml
+++ b/.github/workflows/lint.yaml
@@ -63,7 +63,7 @@ jobs:
       - name: Install dependencies and FFmpeg
         run: |
           python -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
-          conda install "ffmpeg=7.0.1" pkg-config -c conda-forge
+          conda install "ffmpeg=7.0.1" pkg-config pybind11 -c conda-forge
           ffmpeg -version
       - name: Build and install torchcodec
         run: |
diff --git a/.github/workflows/linux_cuda_wheel.yaml b/.github/workflows/linux_cuda_wheel.yaml
index 65b06e933..53b5bfc20 100644
--- a/.github/workflows/linux_cuda_wheel.yaml
+++ b/.github/workflows/linux_cuda_wheel.yaml
@@ -48,6 +48,7 @@ jobs:
       test-infra-repository: pytorch/test-infra
       test-infra-ref: main
       build-matrix: ${{ needs.generate-matrix.outputs.matrix }}
+      pre-script: packaging/pre_build_script.sh
       post-script: packaging/post_build_script.sh
       smoke-test-script: packaging/fake_smoke_test.py
       package-name: torchcodec
diff --git a/.github/workflows/linux_wheel.yaml b/.github/workflows/linux_wheel.yaml
index f5e665f55..cd53219f7 100644
--- a/.github/workflows/linux_wheel.yaml
+++ b/.github/workflows/linux_wheel.yaml
@@ -49,6 +49,7 @@ jobs:
       test-infra-repository: pytorch/test-infra
       test-infra-ref: main
       build-matrix: ${{ needs.generate-matrix.outputs.matrix }}
+      pre-script: packaging/pre_build_script.sh
       post-script: packaging/post_build_script.sh
       smoke-test-script: packaging/fake_smoke_test.py
       package-name: torchcodec
diff --git a/.github/workflows/macos_wheel.yaml b/.github/workflows/macos_wheel.yaml
index b7cc965eb..d9472765c 100644
--- a/.github/workflows/macos_wheel.yaml
+++ b/.github/workflows/macos_wheel.yaml
@@ -49,6 +49,7 @@ jobs:
       test-infra-repository: pytorch/test-infra
       test-infra-ref: main
       build-matrix: ${{ needs.generate-matrix.outputs.matrix }}
+      pre-script: packaging/pre_build_script.sh
       post-script: packaging/post_build_script.sh
       smoke-test-script: packaging/fake_smoke_test.py
       runner-type: macos-m1-stable
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index bc3ec3bb0..d516bc272 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -20,6 +20,7 @@ test locally you will need the following dependencies:
   installation already.
 - cmake
 - pkg-config
+- pybind11
 - FFmpeg
 - PyTorch nightly
 
@@ -29,7 +30,7 @@ Start by installing the **nightly** build of PyTorch following the
 Then, the easiest way to install the rest of the dependencies is to run:
 
 ```bash
-conda install cmake pkg-config ffmpeg -c conda-forge
+conda install cmake pkg-config pbyind11 ffmpeg -c conda-forge
 ```
 
 ### Clone and build
diff --git a/packaging/pre_build_script.sh b/packaging/pre_build_script.sh
new file mode 100644
index 000000000..f22244e9c
--- /dev/null
+++ b/packaging/pre_build_script.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -ex
+
+# We need to install pybind11 because we need its CMake helpers in order to
+# compile correctly on Mac. Pybind11 is actually a C++ header-only library,
+# and PyTorch actually has it included. PyTorch, however, does not have the
+# CMake helpers.
+conda install -y pybind11 -c conda-forge
diff --git a/setup.py b/setup.py
index 9120c7fe0..f16521764 100644
--- a/setup.py
+++ b/setup.py
@@ -68,7 +68,7 @@ def run(self):
         super().run()
 
     def build_extension(self, ext):
-        """Call our CMake build system to build libtorchcodec?.so"""
+        """Call our CMake build system to build libtorchcodec*.so"""
         # Setuptools was designed to build one extension (.so file) at a time,
         # calling this method for each Extension object. We're using a
         # CMake-based build where all our extensions are built together at once.
@@ -136,21 +136,27 @@ def copy_extensions_to_source(self):
         This is called by setuptools at the end of .run() during editable installs.
         """
         self.get_finalized_command("build_py")
-        extension = ""
+        extensions = []
         if sys.platform == "linux":
-            extension = "so"
+            extensions = ["so"]
         elif sys.platform == "darwin":
-            extension = "dylib"
+            # Mac has BOTH .dylib and .so as library extensions. Short version
+            # is that a .dylib is a shared library that can be both dynamically
+            # loaded and depended on by other libraries; a .so can only be a
+            # dynamically loaded module. For more, see:
+            #   https://stackoverflow.com/a/2339910
+            extensions = ["dylib", "so"]
         else:
             raise NotImplementedError(
                 "Platforms other than linux/darwin are not supported yet"
             )
 
-        for so_file in self._install_prefix.glob(f"*.{extension}"):
-            assert "libtorchcodec" in so_file.name
-            destination = Path("src/torchcodec/") / so_file.name
-            print(f"Copying {so_file} to {destination}")
-            self.copy_file(so_file, destination, level=self.verbose)
+        for ext in extensions:
+            for lib_file in self._install_prefix.glob(f"*.{ext}"):
+                assert "libtorchcodec" in lib_file.name
+                destination = Path("src/torchcodec/") / lib_file.name
+                print(f"Copying {lib_file} to {destination}")
+                self.copy_file(lib_file, destination, level=self.verbose)
 
 
 NOT_A_LICENSE_VIOLATION_VAR = "I_CONFIRM_THIS_IS_NOT_A_LICENSE_VIOLATION"
diff --git a/src/torchcodec/_internally_replaced_utils.py b/src/torchcodec/_internally_replaced_utils.py
index 0833eb82f..a5a3ffa39 100644
--- a/src/torchcodec/_internally_replaced_utils.py
+++ b/src/torchcodec/_internally_replaced_utils.py
@@ -7,11 +7,12 @@
 import importlib
 import sys
 from pathlib import Path
+from types import ModuleType
 
 
 # Copy pasted from torchvision
 # https://github.com/pytorch/vision/blob/947ae1dc71867f28021d5bc0ff3a19c249236e2a/torchvision/_internally_replaced_utils.py#L25
-def _get_extension_path(lib_name):
+def _get_extension_path(lib_name: str) -> str:
     extension_suffixes = []
     if sys.platform == "linux":
         extension_suffixes = importlib.machinery.EXTENSION_SUFFIXES
@@ -31,6 +32,22 @@ def _get_extension_path(lib_name):
     )
     ext_specs = extfinder.find_spec(lib_name)
     if ext_specs is None:
-        raise ImportError
+        raise ImportError(f"No spec found for {lib_name}")
+
+    if ext_specs.origin is None:
+        raise ImportError(f"Existing spec found for {lib_name} does not have an origin")
 
     return ext_specs.origin
+
+
+def _load_pybind11_module(module_name: str, library_path: str) -> ModuleType:
+    spec = importlib.util.spec_from_file_location(
+        module_name,
+        library_path,
+    )
+    if spec is None:
+        raise ImportError(
+            f"Unable to load spec for module {module_name} from path {library_path}"
+        )
+
+    return importlib.util.module_from_spec(spec)
diff --git a/src/torchcodec/decoders/_core/AVIOBytesContext.cpp b/src/torchcodec/decoders/_core/AVIOBytesContext.cpp
new file mode 100644
index 000000000..0d1e9d413
--- /dev/null
+++ b/src/torchcodec/decoders/_core/AVIOBytesContext.cpp
@@ -0,0 +1,70 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include "src/torchcodec/decoders/_core/AVIOBytesContext.h"
+#include <torch/types.h>
+
+namespace facebook::torchcodec {
+
+AVIOBytesContext::AVIOBytesContext(const void* data, int64_t dataSize)
+    : dataContext_{static_cast<const uint8_t*>(data), dataSize, 0} {
+  TORCH_CHECK(data != nullptr, "Video data buffer cannot be nullptr!");
+  TORCH_CHECK(dataSize > 0, "Video data size must be positive");
+  createAVIOContext(&read, &seek, &dataContext_);
+}
+
+// The signature of this function is defined by FFMPEG.
+int AVIOBytesContext::read(void* opaque, uint8_t* buf, int buf_size) {
+  auto dataContext = static_cast<DataContext*>(opaque);
+  TORCH_CHECK(
+      dataContext->current <= dataContext->size,
+      "Tried to read outside of the buffer: current=",
+      dataContext->current,
+      ", size=",
+      dataContext->size);
+
+  int64_t numBytesRead = std::min(
+      static_cast<int64_t>(buf_size), dataContext->size - dataContext->current);
+
+  TORCH_CHECK(
+      numBytesRead >= 0,
+      "Tried to read negative bytes: numBytesRead=",
+      numBytesRead,
+      ", size=",
+      dataContext->size,
+      ", current=",
+      dataContext->current);
+
+  if (numBytesRead == 0) {
+    return AVERROR_EOF;
+  }
+
+  std::memcpy(buf, dataContext->data + dataContext->current, numBytesRead);
+  dataContext->current += numBytesRead;
+  return numBytesRead;
+}
+
+// The signature of this function is defined by FFMPEG.
+int64_t AVIOBytesContext::seek(void* opaque, int64_t offset, int whence) {
+  auto dataContext = static_cast<DataContext*>(opaque);
+  int64_t ret = -1;
+
+  switch (whence) {
+    case AVSEEK_SIZE:
+      ret = dataContext->size;
+      break;
+    case SEEK_SET:
+      dataContext->current = offset;
+      ret = offset;
+      break;
+    default:
+      break;
+  }
+
+  return ret;
+}
+
+} // namespace facebook::torchcodec
diff --git a/src/torchcodec/decoders/_core/AVIOBytesContext.h b/src/torchcodec/decoders/_core/AVIOBytesContext.h
new file mode 100644
index 000000000..411866dc0
--- /dev/null
+++ b/src/torchcodec/decoders/_core/AVIOBytesContext.h
@@ -0,0 +1,32 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#pragma once
+
+#include "src/torchcodec/decoders/_core/AVIOContextHolder.h"
+
+namespace facebook::torchcodec {
+
+// Enables users to pass in the entire video as bytes. Our read and seek
+// functions then traverse the bytes in memory.
+class AVIOBytesContext : public AVIOContextHolder {
+ public:
+  explicit AVIOBytesContext(const void* data, int64_t dataSize);
+
+ private:
+  struct DataContext {
+    const uint8_t* data;
+    int64_t size;
+    int64_t current;
+  };
+
+  static int read(void* opaque, uint8_t* buf, int buf_size);
+  static int64_t seek(void* opaque, int64_t offset, int whence);
+
+  DataContext dataContext_;
+};
+
+} // namespace facebook::torchcodec
diff --git a/src/torchcodec/decoders/_core/AVIOContextHolder.cpp b/src/torchcodec/decoders/_core/AVIOContextHolder.cpp
new file mode 100644
index 000000000..1fc4f5ecf
--- /dev/null
+++ b/src/torchcodec/decoders/_core/AVIOContextHolder.cpp
@@ -0,0 +1,50 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include "src/torchcodec/decoders/_core/AVIOContextHolder.h"
+#include <torch/types.h>
+
+namespace facebook::torchcodec {
+
+void AVIOContextHolder::createAVIOContext(
+    AVIOReadFunction read,
+    AVIOSeekFunction seek,
+    void* heldData,
+    int bufferSize) {
+  TORCH_CHECK(
+      bufferSize > 0,
+      "Buffer size must be greater than 0; is " + std::to_string(bufferSize));
+  auto buffer = static_cast<uint8_t*>(av_malloc(bufferSize));
+  TORCH_CHECK(
+      buffer != nullptr,
+      "Failed to allocate buffer of size " + std::to_string(bufferSize));
+
+  avioContext_.reset(avio_alloc_context(
+      buffer,
+      bufferSize,
+      0,
+      heldData,
+      read,
+      nullptr, // write function; not supported yet
+      seek));
+
+  if (!avioContext_) {
+    av_freep(&buffer);
+    TORCH_CHECK(false, "Failed to allocate AVIOContext");
+  }
+}
+
+AVIOContextHolder::~AVIOContextHolder() {
+  if (avioContext_) {
+    av_freep(&avioContext_->buffer);
+  }
+}
+
+AVIOContext* AVIOContextHolder::getAVIOContext() {
+  return avioContext_.get();
+}
+
+} // namespace facebook::torchcodec
diff --git a/src/torchcodec/decoders/_core/AVIOContextHolder.h b/src/torchcodec/decoders/_core/AVIOContextHolder.h
new file mode 100644
index 000000000..26bb06f08
--- /dev/null
+++ b/src/torchcodec/decoders/_core/AVIOContextHolder.h
@@ -0,0 +1,65 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#pragma once
+
+#include "src/torchcodec/decoders/_core/FFMPEGCommon.h"
+
+namespace facebook::torchcodec {
+
+// The AVIOContextHolder serves several purposes:
+//
+//   1. It is a smart pointer for the AVIOContext. It has the logic to create
+//      a new AVIOContext and will appropriately free the AVIOContext when it
+//      goes out of scope. Note that this requires more than just having a
+//      UniqueAVIOContext, as the AVIOContext points to a buffer which must be
+//      freed.
+//   2. It is a base class for AVIOContext specializations. When specializing a
+//      AVIOContext, we need to provide four things:
+//        1. A read callback function.
+//        2. A seek callback function.
+//        3. A write callback function. (Not supported yet; it's for encoding.)
+//        4. A pointer to some context object that has the same lifetime as the
+//           AVIOContext itself. This context object holds the custom state that
+//           tracks the custom behavior of reading, seeking and writing. It is
+//           provided upon AVIOContext creation and to the read, seek and
+//           write callback functions.
+//      While it's not required, it is natural for the derived classes to make
+//      all of the above members. Base classes need to call
+//      createAVIOContext(), ideally in their constructor.
+//  3. A generic handle for those that just need to manage having access to an
+//     AVIOContext, but aren't necessarily concerned with how it was customized:
+//     typically, the VideoDecoder.
+class AVIOContextHolder {
+ public:
+  virtual ~AVIOContextHolder();
+  AVIOContext* getAVIOContext();
+
+ protected:
+  // Make constructor protected to prevent anyone from constructing
+  // an AVIOContextHolder without deriving it. (Ordinarily this would be
+  // enforced by having a pure virtual methods, but we don't have any.)
+  AVIOContextHolder() = default;
+
+  // These signatures are defined by FFmpeg.
+  using AVIOReadFunction = int (*)(void*, uint8_t*, int);
+  using AVIOSeekFunction = int64_t (*)(void*, int64_t, int);
+
+  // Deriving classes should call this function in their constructor.
+  void createAVIOContext(
+      AVIOReadFunction read,
+      AVIOSeekFunction seek,
+      void* heldData,
+      int bufferSize = defaultBufferSize);
+
+ private:
+  UniqueAVIOContext avioContext_;
+
+  // Defaults to 64 KB
+  static const int defaultBufferSize = 64 * 1024;
+};
+
+} // namespace facebook::torchcodec
diff --git a/src/torchcodec/decoders/_core/AVIOFileLikeContext.cpp b/src/torchcodec/decoders/_core/AVIOFileLikeContext.cpp
new file mode 100644
index 000000000..60d1503ae
--- /dev/null
+++ b/src/torchcodec/decoders/_core/AVIOFileLikeContext.cpp
@@ -0,0 +1,80 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include "src/torchcodec/decoders/_core/AVIOFileLikeContext.h"
+#include <torch/types.h>
+
+namespace facebook::torchcodec {
+
+AVIOFileLikeContext::AVIOFileLikeContext(py::object fileLike)
+    : fileLike_{UniquePyObject(new py::object(fileLike))} {
+  {
+    // TODO: Is it necessary to acquire the GIL here? Is it maybe even
+    // harmful? At the moment, this is only called from within a pybind
+    // function, and pybind guarantees we have the GIL.
+    py::gil_scoped_acquire gil;
+    TORCH_CHECK(
+        py::hasattr(fileLike, "read"),
+        "File like object must implement a read method.");
+    TORCH_CHECK(
+        py::hasattr(fileLike, "seek"),
+        "File like object must implement a seek method.");
+  }
+  createAVIOContext(&read, &seek, &fileLike_);
+}
+
+int AVIOFileLikeContext::read(void* opaque, uint8_t* buf, int buf_size) {
+  auto fileLike = static_cast<UniquePyObject*>(opaque);
+
+  // Note that we acquire the GIL outside of the loop. This is likely more
+  // efficient than releasing and acquiring it each loop iteration.
+  py::gil_scoped_acquire gil;
+
+  int totalNumRead = 0;
+  while (totalNumRead < buf_size) {
+    int request = buf_size - totalNumRead;
+
+    // The Python method returns the actual bytes, which we access through the
+    // py::bytes wrapper. That wrapper, however, does not provide us access to
+    // the underlying data pointer, which we need for the memcpy below. So we
+    // convert the bytes to a string_view to get access to the data pointer.
+    // Becauase it's a view and not a copy, it should be cheap.
+    auto bytesRead = static_cast<py::bytes>((*fileLike)->attr("read")(request));
+    auto bytesView = static_cast<std::string_view>(bytesRead);
+
+    int numBytesRead = static_cast<int>(bytesView.size());
+    if (numBytesRead == 0) {
+      break;
+    }
+
+    TORCH_CHECK(
+        numBytesRead <= request,
+        "Requested up to ",
+        request,
+        " bytes but, received ",
+        numBytesRead,
+        " bytes. The given object does not conform to read protocol of file object.");
+
+    std::memcpy(buf, bytesView.data(), numBytesRead);
+    buf += numBytesRead;
+    totalNumRead += numBytesRead;
+  }
+
+  return totalNumRead == 0 ? AVERROR_EOF : totalNumRead;
+}
+
+int64_t AVIOFileLikeContext::seek(void* opaque, int64_t offset, int whence) {
+  // We do not know the file size.
+  if (whence == AVSEEK_SIZE) {
+    return AVERROR(EIO);
+  }
+
+  auto fileLike = static_cast<UniquePyObject*>(opaque);
+  py::gil_scoped_acquire gil;
+  return py::cast<int64_t>((*fileLike)->attr("seek")(offset, whence));
+}
+
+} // namespace facebook::torchcodec
diff --git a/src/torchcodec/decoders/_core/AVIOFileLikeContext.h b/src/torchcodec/decoders/_core/AVIOFileLikeContext.h
new file mode 100644
index 000000000..7be07f2b6
--- /dev/null
+++ b/src/torchcodec/decoders/_core/AVIOFileLikeContext.h
@@ -0,0 +1,54 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#pragma once
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "src/torchcodec/decoders/_core/AVIOContextHolder.h"
+
+namespace py = pybind11;
+
+namespace facebook::torchcodec {
+
+// Enables uers to pass in a Python file-like object. We then forward all read
+// and seek calls back up to the methods on the Python object.
+class AVIOFileLikeContext : public AVIOContextHolder {
+ public:
+  explicit AVIOFileLikeContext(py::object fileLike);
+
+ private:
+  static int read(void* opaque, uint8_t* buf, int buf_size);
+  static int64_t seek(void* opaque, int64_t offset, int whence);
+
+  // Note that we dynamically allocate the Python object because we need to
+  // strictly control when its destructor is called. We must hold the GIL
+  // when its destructor gets called, as it needs to update the reference
+  // count. It's easiest to control that when it's dynamic memory. Otherwise,
+  // we'd have to ensure whatever enclosing scope holds the object has the GIL,
+  // and that's, at least, hard. For all of the common pitfalls, see:
+  //
+  //   https://pybind11.readthedocs.io/en/stable/advanced/misc.html#common-sources-of-global-interpreter-lock-errors
+  //
+  // We maintain a reference to the file-like object because the file-like
+  // object that was created on the Python side must live as long as our
+  // potential use. That is, even if there are no more references to the object
+  // on the Python side, we require that the object is still live.
+  struct PyObjectDeleter {
+    inline void operator()(py::object* obj) const {
+      if (obj) {
+        py::gil_scoped_acquire gil;
+        delete obj;
+      }
+    }
+  };
+
+  using UniquePyObject = std::unique_ptr<py::object, PyObjectDeleter>;
+  UniquePyObject fileLike_;
+};
+
+} // namespace facebook::torchcodec
diff --git a/src/torchcodec/decoders/_core/CMakeLists.txt b/src/torchcodec/decoders/_core/CMakeLists.txt
index 8e4e6c5df..f0a8568fe 100644
--- a/src/torchcodec/decoders/_core/CMakeLists.txt
+++ b/src/torchcodec/decoders/_core/CMakeLists.txt
@@ -3,62 +3,167 @@ project(TorchCodec)
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
+set(PYBIND11_FINDPYTHON ON)
+find_package(pybind11 REQUIRED)
 find_package(Torch REQUIRED)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -Werror ${TORCH_CXX_FLAGS}")
 find_package(Python3 ${PYTHON_VERSION} EXACT COMPONENTS Development)
 
-function(make_torchcodec_library library_name ffmpeg_target)
-    set(
-        sources
-        FFMPEGCommon.h
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -Werror ${TORCH_CXX_FLAGS}")
+
+function(make_torchcodec_sublibrary
+    library_name
+    type
+    sources
+    library_dependencies)
+
+    add_library(${library_name} ${type} ${sources})
+    set_target_properties(${library_name} PROPERTIES CXX_STANDARD 17)
+    target_include_directories(${library_name}
+        PRIVATE
+        ./../../../../
+        "${TORCH_INSTALL_PREFIX}/include"
+        ${Python3_INCLUDE_DIRS}
+    )
+
+    # Avoid adding the "lib" prefix which we already add explicitly.
+    set_target_properties(${library_name} PROPERTIES PREFIX "")
+
+    target_link_libraries(
+        ${library_name}
+        PUBLIC
+        ${library_dependencies}
+    )
+endfunction()
+
+function(make_torchcodec_libraries
+    ffmpeg_major_version
+    ffmpeg_target)
+
+    # We create three shared libraries per version of FFmpeg, where the version
+    # is denoted by N:
+    #
+    # 1. libtorchcodec_decoderN.{ext}: Base library which contains the
+    #    implementation of VideoDecoder and everything VideoDecoder needs. On
+    #    Linux, {ext} is so. On Mac, it is dylib.
+    #
+    # 2. libtorchcodec_custom_opsN.{ext}: Implementation of the PyTorch custom
+    #    ops. Depends on libtorchcodec_decoderN.{ext}. On Linux, {ext} is so.
+    #    On Mac, it is dylib.
+    #
+    # 3. libtorchcodec_pybind_opsN.{ext}: Implementation of the pybind11 ops. We
+    #    keep these separate from the PyTorch custom ops because we have to
+    #    load these libraries separately on the Python side. Depends on
+    #    libtorchcodec_decoderN.{ext}. On BOTH Linux and Mac {ext} is so.
+
+    # 1. Create libtorchcodec_decoderN.{ext}.
+    set(decoder_library_name "libtorchcodec_decoder${ffmpeg_major_version}")
+    set(decoder_sources
+        AVIOContextHolder.cpp
         FFMPEGCommon.cpp
-        VideoDecoder.h
         VideoDecoder.cpp
-        VideoDecoderOps.h
-        VideoDecoderOps.cpp
-        DeviceInterface.h
     )
+
     if(ENABLE_CUDA)
-        list(APPEND sources CudaDevice.cpp)
+        list(APPEND decoder_sources CudaDevice.cpp)
     else()
-        list(APPEND sources CPUOnlyDevice.cpp)
+        list(APPEND decoder_sources CPUOnlyDevice.cpp)
     endif()
-    add_library(${library_name} SHARED ${sources})
-    set_property(TARGET ${library_name} PROPERTY CXX_STANDARD 17)
 
-    target_include_directories(
-        ${library_name}
-        PRIVATE
-        ./../../../../
-        "${TORCH_INSTALL_PREFIX}/include"
-        ${Python3_INCLUDE_DIRS}
+    set(decoder_library_dependencies
+        ${ffmpeg_target}
+        ${TORCH_LIBRARIES}
     )
 
-    set(NEEDED_LIBRARIES ${ffmpeg_target} ${TORCH_LIBRARIES}
-        ${Python3_LIBRARIES})
     if(ENABLE_CUDA)
-        list(APPEND NEEDED_LIBRARIES
-            ${CUDA_nppi_LIBRARY} ${CUDA_nppicc_LIBRARY} )
+        list(APPEND decoder_library_dependencies
+            ${CUDA_nppi_LIBRARY}
+            ${CUDA_nppicc_LIBRARY}
+        )
     endif()
-    target_link_libraries(
-        ${library_name}
+
+    make_torchcodec_sublibrary(
+        "${decoder_library_name}"
+        SHARED
+        "${decoder_sources}"
+        "${decoder_library_dependencies}"
+    )
+
+    # 2. Create libtorchcodec_custom_opsN.{ext}.
+    set(custom_ops_library_name "libtorchcodec_custom_ops${ffmpeg_major_version}")
+    set(custom_ops_sources
+        AVIOBytesContext.cpp
+        VideoDecoderOps.cpp
+    )
+    set(custom_ops_dependencies
+        ${decoder_library_name}
+        ${Python3_LIBRARIES}
+    )
+    make_torchcodec_sublibrary(
+        "${custom_ops_library_name}"
+        SHARED
+        "${custom_ops_sources}"
+        "${custom_ops_dependencies}"
+    )
+
+    # 3. Create libtorchcodec_pybind_opsN.so.
+    set(pybind_ops_library_name "libtorchcodec_pybind_ops${ffmpeg_major_version}")
+    set(pybind_ops_sources
+        AVIOFileLikeContext.cpp
+        PyBindOps.cpp
+    )
+    set(pybind_ops_dependencies
+       ${decoder_library_name}
+       pybind11::module # This library dependency makes sure we have the right
+                        # Python libraries included as well as all of the right
+                        # settings so that we can successfully load the shared
+                        # library as a Python module on Mac. If we instead use
+                        # ${Python3_LIBRARIES}, it works on Linux but not on
+                        # Mac.
+    )
+    make_torchcodec_sublibrary(
+        "${pybind_ops_library_name}"
+        MODULE # Note that this not SHARED; otherwise we build the wrong kind
+               # of library on Mac. On Mac, SHARED becomes .dylib and MODULE becomes
+               # a .so. We want pybind11 libraries to become .so. If this is
+               # changed to SHARED, we will be able to succesfully compile a
+               # .dylib, but we will not be able to succesfully import that as
+               # a Python module on Mac.
+        "${pybind_ops_sources}"
+        "${pybind_ops_dependencies}"
+    )
+    # pybind11 limits the visibility of symbols in the shared library to prevent
+    # stray initialization of py::objects. The rest of the object code must
+    # match. See:
+    #   https://pybind11.readthedocs.io/en/stable/faq.html#someclass-declared-with-greater-visibility-than-the-type-of-its-field-someclass-member-wattributes
+    target_compile_options(
+        ${pybind_ops_library_name}
+        PUBLIC
+      "-fvisibility=hidden"
+    )
+    # If we don't make sure this flag is set, we run into segfauls at import
+    # time on Mac. See:
+    #    https://github.com/pybind/pybind11/issues/3907#issuecomment-1170412764
+    target_link_options(
+        ${pybind_ops_library_name}
         PUBLIC
-        ${NEEDED_LIBRARIES}
+        "-undefined dynamic_lookup"
     )
 
-    # We already set the library_name to be libtorchcodecN, so we don't want
-    # cmake to add another "lib" prefix. We do it this way because it makes it
-    # easier to find references to libtorchcodec in the code (e.g. via `git
-    # grep`)
-    set_target_properties(${library_name} PROPERTIES PREFIX "")
+    # Install all libraries.
+    set(
+        all_libraries
+        ${decoder_library_name}
+        ${custom_ops_library_name}
+        ${pybind_ops_library_name}
+    )
 
     # The install step is invoked within CMakeBuild.build_library() in
-    # setup.py and just copies the built .so files from the temp
+    # setup.py and just copies the built files from the temp
     # cmake/setuptools build folder into the CMAKE_INSTALL_PREFIX folder. We
     # still need to manually pass "DESTINATION ..." for cmake to copy those
     # files in CMAKE_INSTALL_PREFIX instead of CMAKE_INSTALL_PREFIX/lib.
     install(
-        TARGETS ${library_name}
+        TARGETS ${all_libraries}
         LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}
     )
 endfunction()
@@ -76,12 +181,10 @@ if(DEFINED ENV{BUILD_AGAINST_ALL_FFMPEG_FROM_S3})
         ${CMAKE_CURRENT_SOURCE_DIR}/fetch_and_expose_non_gpl_ffmpeg_libs.cmake
     )
 
-
-	make_torchcodec_library(libtorchcodec7 ffmpeg7)
-	make_torchcodec_library(libtorchcodec6 ffmpeg6)
-	make_torchcodec_library(libtorchcodec5 ffmpeg5)
-	make_torchcodec_library(libtorchcodec4 ffmpeg4)
-
+    make_torchcodec_libraries(7 ffmpeg7)
+    make_torchcodec_libraries(6 ffmpeg6)
+    make_torchcodec_libraries(4 ffmpeg4)
+    make_torchcodec_libraries(5 ffmpeg5)
 else()
     message(
         STATUS
@@ -121,10 +224,12 @@ else()
         )
     endif()
 
-    set(libtorchcodec_target_name libtorchcodec${ffmpeg_major_version})
-    # Make libtorchcodec_target_name available in the parent's scope, for the
-    # test's CMakeLists.txt
-    set(libtorchcodec_target_name ${libtorchcodec_target_name} PARENT_SCOPE)
+    make_torchcodec_libraries(${ffmpeg_major_version} PkgConfig::LIBAV)
 
-    make_torchcodec_library(${libtorchcodec_target_name} PkgConfig::LIBAV)
+    # Expose these values updwards so that the test compilation does not need
+    # to re-figure it out. FIXME: it's not great that we just copy-paste the
+    # library names.
+    set(libtorchcodec_library_name "libtorchcodec_decoder${ffmpeg_major_version}" PARENT_SCOPE)
+    set(libtorchcodec_custom_ops_name "libtorchcodec_custom_ops${ffmpeg_major_version}" PARENT_SCOPE)
+    set(libav_include_dirs ${LIBAV_INCLUDE_DIRS} PARENT_SCOPE)
 endif()
diff --git a/src/torchcodec/decoders/_core/FFMPEGCommon.cpp b/src/torchcodec/decoders/_core/FFMPEGCommon.cpp
index 1e3a1421f..8bb41a85d 100644
--- a/src/torchcodec/decoders/_core/FFMPEGCommon.cpp
+++ b/src/torchcodec/decoders/_core/FFMPEGCommon.cpp
@@ -126,88 +126,4 @@ SwrContext* allocateSwrContext(
   return swrContext;
 }
 
-AVIOBytesContext::AVIOBytesContext(
-    const void* data,
-    size_t dataSize,
-    size_t bufferSize)
-    : bufferData_{static_cast<const uint8_t*>(data), dataSize, 0} {
-  auto buffer = static_cast<uint8_t*>(av_malloc(bufferSize));
-  TORCH_CHECK(
-      buffer != nullptr,
-      "Failed to allocate buffer of size " + std::to_string(bufferSize));
-
-  avioContext_.reset(avio_alloc_context(
-      buffer,
-      bufferSize,
-      0,
-      &bufferData_,
-      &AVIOBytesContext::read,
-      nullptr,
-      &AVIOBytesContext::seek));
-
-  if (!avioContext_) {
-    av_freep(&buffer);
-    TORCH_CHECK(false, "Failed to allocate AVIOContext");
-  }
-}
-
-AVIOBytesContext::~AVIOBytesContext() {
-  if (avioContext_) {
-    av_freep(&avioContext_->buffer);
-  }
-}
-
-AVIOContext* AVIOBytesContext::getAVIO() {
-  return avioContext_.get();
-}
-
-// The signature of this function is defined by FFMPEG.
-int AVIOBytesContext::read(void* opaque, uint8_t* buf, int buf_size) {
-  auto bufferData = static_cast<AVIOBufferData*>(opaque);
-  TORCH_CHECK(
-      bufferData->current <= bufferData->size,
-      "Tried to read outside of the buffer: current=",
-      bufferData->current,
-      ", size=",
-      bufferData->size);
-
-  buf_size =
-      FFMIN(buf_size, static_cast<int>(bufferData->size - bufferData->current));
-  TORCH_CHECK(
-      buf_size >= 0,
-      "Tried to read negative bytes: buf_size=",
-      buf_size,
-      ", size=",
-      bufferData->size,
-      ", current=",
-      bufferData->current);
-
-  if (!buf_size) {
-    return AVERROR_EOF;
-  }
-  memcpy(buf, bufferData->data + bufferData->current, buf_size);
-  bufferData->current += buf_size;
-  return buf_size;
-}
-
-// The signature of this function is defined by FFMPEG.
-int64_t AVIOBytesContext::seek(void* opaque, int64_t offset, int whence) {
-  auto bufferData = static_cast<AVIOBufferData*>(opaque);
-  int64_t ret = -1;
-
-  switch (whence) {
-    case AVSEEK_SIZE:
-      ret = bufferData->size;
-      break;
-    case SEEK_SET:
-      bufferData->current = offset;
-      ret = offset;
-      break;
-    default:
-      break;
-  }
-
-  return ret;
-}
-
 } // namespace facebook::torchcodec
diff --git a/src/torchcodec/decoders/_core/FFMPEGCommon.h b/src/torchcodec/decoders/_core/FFMPEGCommon.h
index c61ae2876..0309bf933 100644
--- a/src/torchcodec/decoders/_core/FFMPEGCommon.h
+++ b/src/torchcodec/decoders/_core/FFMPEGCommon.h
@@ -157,34 +157,4 @@ SwrContext* allocateSwrContext(
 // Returns true if sws_scale can handle unaligned data.
 bool canSwsScaleHandleUnalignedData();
 
-// A struct that holds state for reading bytes from an IO context.
-// We give this to FFMPEG and it will pass it back to us when it needs to read
-// or seek in the memory buffer.
-struct AVIOBufferData {
-  const uint8_t* data;
-  size_t size;
-  size_t current;
-};
-
-// A class that can be used as AVFormatContext's IO context. It reads from a
-// memory buffer that is passed in.
-class AVIOBytesContext {
- public:
-  AVIOBytesContext(const void* data, size_t dataSize, size_t bufferSize);
-  ~AVIOBytesContext();
-
-  // Returns the AVIOContext that can be passed to FFMPEG.
-  AVIOContext* getAVIO();
-
-  // The signature of this function is defined by FFMPEG.
-  static int read(void* opaque, uint8_t* buf, int buf_size);
-
-  // The signature of this function is defined by FFMPEG.
-  static int64_t seek(void* opaque, int64_t offset, int whence);
-
- private:
-  UniqueAVIOContext avioContext_;
-  struct AVIOBufferData bufferData_;
-};
-
 } // namespace facebook::torchcodec
diff --git a/src/torchcodec/decoders/_core/PyBindOps.cpp b/src/torchcodec/decoders/_core/PyBindOps.cpp
new file mode 100644
index 000000000..0b0f6f177
--- /dev/null
+++ b/src/torchcodec/decoders/_core/PyBindOps.cpp
@@ -0,0 +1,45 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include <cstdint>
+#include <string>
+
+#include "src/torchcodec/decoders/_core/AVIOFileLikeContext.h"
+#include "src/torchcodec/decoders/_core/VideoDecoder.h"
+
+namespace py = pybind11;
+
+namespace facebook::torchcodec {
+
+// In principle, this should be able to return a tensor. But when we try that,
+// we run into the bug reported here:
+//
+//   https://github.com/pytorch/pytorch/issues/136664
+//
+// So we instead launder the pointer through an int, and then use a conversion
+// function on the custom ops side to launder that int into a tensor.
+int64_t create_from_file_like(
+    py::object file_like,
+    std::optional<std::string_view> seek_mode) {
+  VideoDecoder::SeekMode realSeek = VideoDecoder::SeekMode::exact;
+  if (seek_mode.has_value()) {
+    realSeek = seekModeFromString(seek_mode.value());
+  }
+
+  auto avioContextHolder = std::make_unique<AVIOFileLikeContext>(file_like);
+
+  VideoDecoder* decoder =
+      new VideoDecoder(std::move(avioContextHolder), realSeek);
+  return reinterpret_cast<int64_t>(decoder);
+}
+
+PYBIND11_MODULE(decoder_core_pybind_ops, m) {
+  m.def("create_from_file_like", &create_from_file_like);
+}
+
+} // namespace facebook::torchcodec
diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp
index 3d51b8a35..a379cc0b0 100644
--- a/src/torchcodec/decoders/_core/VideoDecoder.cpp
+++ b/src/torchcodec/decoders/_core/VideoDecoder.cpp
@@ -66,15 +66,13 @@ VideoDecoder::VideoDecoder(const std::string& videoFilePath, SeekMode seekMode)
   initializeDecoder();
 }
 
-VideoDecoder::VideoDecoder(const void* data, size_t length, SeekMode seekMode)
-    : seekMode_(seekMode) {
-  TORCH_CHECK(data != nullptr, "Video data buffer cannot be nullptr!");
-
+VideoDecoder::VideoDecoder(
+    std::unique_ptr<AVIOContextHolder> context,
+    SeekMode seekMode)
+    : seekMode_(seekMode), avioContextHolder_(std::move(context)) {
   setFFmpegLogLevel();
 
-  constexpr int bufferSize = 64 * 1024;
-  ioBytesContext_.reset(new AVIOBytesContext(data, length, bufferSize));
-  TORCH_CHECK(ioBytesContext_, "Failed to create AVIOBytesContext");
+  TORCH_CHECK(avioContextHolder_, "Context holder cannot be null");
 
   // Because FFmpeg requires a reference to a pointer in the call to open, we
   // can't use a unique pointer here. Note that means we must call free if open
@@ -82,7 +80,7 @@ VideoDecoder::VideoDecoder(const void* data, size_t length, SeekMode seekMode)
   AVFormatContext* rawContext = avformat_alloc_context();
   TORCH_CHECK(rawContext != nullptr, "Unable to alloc avformat context");
 
-  rawContext->pb = ioBytesContext_->getAVIO();
+  rawContext->pb = avioContextHolder_->getAVIOContext();
   int status = avformat_open_input(&rawContext, nullptr, nullptr, nullptr);
   if (status != 0) {
     avformat_free_context(rawContext);
@@ -2067,4 +2065,14 @@ FrameDims getHeightAndWidthFromOptionsOrAVFrame(
       videoStreamOptions.width.value_or(avFrame->width));
 }
 
+VideoDecoder::SeekMode seekModeFromString(std::string_view seekMode) {
+  if (seekMode == "exact") {
+    return VideoDecoder::SeekMode::exact;
+  } else if (seekMode == "approximate") {
+    return VideoDecoder::SeekMode::approximate;
+  } else {
+    TORCH_CHECK(false, "Invalid seek mode: " + std::string(seekMode));
+  }
+}
+
 } // namespace facebook::torchcodec
diff --git a/src/torchcodec/decoders/_core/VideoDecoder.h b/src/torchcodec/decoders/_core/VideoDecoder.h
index 4d3e2f2ce..1f8e423be 100644
--- a/src/torchcodec/decoders/_core/VideoDecoder.h
+++ b/src/torchcodec/decoders/_core/VideoDecoder.h
@@ -12,6 +12,7 @@
 #include <ostream>
 #include <string_view>
 
+#include "src/torchcodec/decoders/_core/AVIOContextHolder.h"
 #include "src/torchcodec/decoders/_core/FFMPEGCommon.h"
 
 namespace facebook::torchcodec {
@@ -34,11 +35,12 @@ class VideoDecoder {
       const std::string& videoFilePath,
       SeekMode seekMode = SeekMode::exact);
 
-  // Creates a VideoDecoder from a given buffer of data. Note that the data is
-  // not owned by the VideoDecoder.
+  // Creates a VideoDecoder using the provided AVIOContext inside the
+  // AVIOContextHolder. The AVIOContextHolder is the base class, and the
+  // derived class will have specialized how the custom read, seek and writes
+  // work.
   explicit VideoDecoder(
-      const void* data,
-      size_t length,
+      std::unique_ptr<AVIOContextHolder> context,
       SeekMode seekMode = SeekMode::exact);
 
   // --------------------------------------------------------------------------
@@ -501,7 +503,7 @@ class VideoDecoder {
   // Stores various internal decoding stats.
   DecodeStats decodeStats_;
   // Stores the AVIOContext for the input buffer.
-  std::unique_ptr<AVIOBytesContext> ioBytesContext_;
+  std::unique_ptr<AVIOContextHolder> avioContextHolder_;
   // Whether or not we have already scanned all streams to update the metadata.
   bool scannedAllStreams_ = false;
   // Tracks that we've already been initialized.
@@ -583,4 +585,6 @@ std::ostream& operator<<(
     std::ostream& os,
     const VideoDecoder::DecodeStats& stats);
 
+VideoDecoder::SeekMode seekModeFromString(std::string_view seekMode);
+
 } // namespace facebook::torchcodec
diff --git a/src/torchcodec/decoders/_core/VideoDecoderOps.cpp b/src/torchcodec/decoders/_core/VideoDecoderOps.cpp
index 786d3f327..bd142d70e 100644
--- a/src/torchcodec/decoders/_core/VideoDecoderOps.cpp
+++ b/src/torchcodec/decoders/_core/VideoDecoderOps.cpp
@@ -11,6 +11,7 @@
 #include <string>
 #include "c10/core/SymIntArrayRef.h"
 #include "c10/util/Exception.h"
+#include "src/torchcodec/decoders/_core/AVIOBytesContext.h"
 #include "src/torchcodec/decoders/_core/VideoDecoder.h"
 
 namespace facebook::torchcodec {
@@ -29,6 +30,7 @@ TORCH_LIBRARY(torchcodec_ns, m) {
   m.def("create_from_file(str filename, str? seek_mode=None) -> Tensor");
   m.def(
       "create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor");
+  m.def("_convert_to_tensor(int decoder_ptr) -> Tensor");
   m.def(
       "_add_video_stream(Tensor(a!) decoder, *, int? width=None, int? height=None, int? num_threads=None, str? dimension_order=None, int? stream_index=None, str? device=None, str? color_conversion_library=None) -> ()");
   m.def(
@@ -63,13 +65,14 @@ TORCH_LIBRARY(torchcodec_ns, m) {
 }
 
 namespace {
+
 at::Tensor wrapDecoderPointerToTensor(
     std::unique_ptr<VideoDecoder> uniqueDecoder) {
   VideoDecoder* decoder = uniqueDecoder.release();
 
   auto deleter = [decoder](void*) { delete decoder; };
   at::Tensor tensor =
-      at::from_blob(decoder, {sizeof(VideoDecoder)}, deleter, {at::kLong});
+      at::from_blob(decoder, {sizeof(VideoDecoder*)}, deleter, {at::kLong});
   auto videoDecoder = static_cast<VideoDecoder*>(tensor.mutable_data_ptr());
   TORCH_CHECK_EQ(videoDecoder, decoder) << "videoDecoder=" << videoDecoder;
   return tensor;
@@ -100,17 +103,6 @@ OpsAudioFramesOutput makeOpsAudioFramesOutput(
       audioFrames.data,
       torch::tensor(audioFrames.ptsSeconds, torch::dtype(torch::kFloat64)));
 }
-
-VideoDecoder::SeekMode seekModeFromString(std::string_view seekMode) {
-  if (seekMode == "exact") {
-    return VideoDecoder::SeekMode::exact;
-  } else if (seekMode == "approximate") {
-    return VideoDecoder::SeekMode::approximate;
-  } else {
-    throw std::runtime_error("Invalid seek mode: " + std::string(seekMode));
-  }
-}
-
 } // namespace
 
 // ==============================
@@ -137,7 +129,10 @@ at::Tensor create_from_tensor(
     at::Tensor video_tensor,
     std::optional<std::string_view> seek_mode) {
   TORCH_CHECK(video_tensor.is_contiguous(), "video_tensor must be contiguous");
-  void* buffer = video_tensor.mutable_data_ptr();
+  TORCH_CHECK(
+      video_tensor.scalar_type() == torch::kUInt8,
+      "video_tensor must be kUInt8");
+  void* data = video_tensor.mutable_data_ptr();
   size_t length = video_tensor.numel();
 
   VideoDecoder::SeekMode realSeek = VideoDecoder::SeekMode::exact;
@@ -145,8 +140,16 @@ at::Tensor create_from_tensor(
     realSeek = seekModeFromString(seek_mode.value());
   }
 
+  auto contextHolder = std::make_unique<AVIOBytesContext>(data, length);
+
   std::unique_ptr<VideoDecoder> uniqueDecoder =
-      std::make_unique<VideoDecoder>(buffer, length, realSeek);
+      std::make_unique<VideoDecoder>(std::move(contextHolder), realSeek);
+  return wrapDecoderPointerToTensor(std::move(uniqueDecoder));
+}
+
+at::Tensor _convert_to_tensor(int64_t decoder_ptr) {
+  auto decoder = reinterpret_cast<VideoDecoder*>(decoder_ptr);
+  std::unique_ptr<VideoDecoder> uniqueDecoder(decoder);
   return wrapDecoderPointerToTensor(std::move(uniqueDecoder));
 }
 
@@ -550,6 +553,7 @@ void scan_all_streams_to_update_metadata(at::Tensor& decoder) {
 TORCH_LIBRARY_IMPL(torchcodec_ns, BackendSelect, m) {
   m.impl("create_from_file", &create_from_file);
   m.impl("create_from_tensor", &create_from_tensor);
+  m.impl("_convert_to_tensor", &_convert_to_tensor);
   m.impl(
       "_get_json_ffmpeg_library_versions", &_get_json_ffmpeg_library_versions);
 }
diff --git a/src/torchcodec/decoders/_core/__init__.py b/src/torchcodec/decoders/_core/__init__.py
index 490e3d834..9de779f60 100644
--- a/src/torchcodec/decoders/_core/__init__.py
+++ b/src/torchcodec/decoders/_core/__init__.py
@@ -20,6 +20,7 @@
     add_video_stream,
     create_from_bytes,
     create_from_file,
+    create_from_file_like,
     create_from_tensor,
     get_ffmpeg_library_versions,
     get_frame_at_index,
diff --git a/src/torchcodec/decoders/_core/ops.py b/src/torchcodec/decoders/_core/ops.py
index e8efa45f2..0f0bdfe25 100644
--- a/src/torchcodec/decoders/_core/ops.py
+++ b/src/torchcodec/decoders/_core/ops.py
@@ -4,41 +4,68 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import io
 import json
 import warnings
-from typing import List, Optional, Tuple
+from types import ModuleType
+from typing import List, Optional, Tuple, Union
 
 import torch
 from torch.library import get_ctx, register_fake
 
 from torchcodec._internally_replaced_utils import (  # @manual=//pytorch/torchcodec/src:internally_replaced_utils
     _get_extension_path,
+    _load_pybind11_module,
 )
 
+_pybind_ops: Optional[ModuleType] = None
 
-def load_torchcodec_extension():
-    # Successively try to load libtorchcodec7.so, libtorchcodec6.so,
-    # libtorchcodec5.so, and libtorchcodec4.so. Each of these correspond to an
+
+def load_torchcodec_shared_libraries():
+    # Successively try to load libtorchcodec_*7.so, libtorchcodec_*6.so,
+    # libtorchcodec_*5.so, and libtorchcodec_*4.so. Each of these correspond to an
     # ffmpeg major version. This should cover all potential ffmpeg versions
     # installed on the user's machine.
     #
     # On fbcode, _get_extension_path() is overridden and directly points to the
     # correct .so file, so this for-loop succeeds on the first iteration.
+    #
+    # Note that we use two different methods for loading shared libraries:
+    #
+    #   1. torch.ops.load_library(): For PyTorch custom ops and the C++ only
+    #      libraries the custom ops depend on. Loading libraries through PyTorch
+    #      registers the custom ops with PyTorch's runtime and the ops can be
+    #      accessed through torch.ops after loading.
+    #
+    #   2. importlib: For pybind11 modules. We load them dynamically, rather
+    #      than using a plain import statement. A plain import statement only
+    #      works when the module name and file name match exactly. Our shared
+    #      libraries do not meet those conditions.
 
     exceptions = []
+    pybind_ops_module_name = "decoder_core_pybind_ops"
     for ffmpeg_major_version in (7, 6, 5, 4):
-        library_name = f"libtorchcodec{ffmpeg_major_version}"
+        decoder_library_name = f"libtorchcodec_decoder{ffmpeg_major_version}"
+        custom_ops_library_name = f"libtorchcodec_custom_ops{ffmpeg_major_version}"
+        pybind_ops_library_name = f"libtorchcodec_pybind_ops{ffmpeg_major_version}"
         try:
-            torch.ops.load_library(_get_extension_path(library_name))
+            torch.ops.load_library(_get_extension_path(decoder_library_name))
+            torch.ops.load_library(_get_extension_path(custom_ops_library_name))
+
+            pybind_ops_library_path = _get_extension_path(pybind_ops_library_name)
+            global _pybind_ops
+            _pybind_ops = _load_pybind11_module(
+                pybind_ops_module_name, pybind_ops_library_path
+            )
             return
         except Exception as e:
             # TODO: recording and reporting exceptions this way is OK for now as  it's just for debugging,
             # but we should probably handle that via a proper logging mechanism.
-            exceptions.append(e)
+            exceptions.append((ffmpeg_major_version, e))
 
     traceback = (
         "\n[start of libtorchcodec loading traceback]\n"
-        + "\n".join(str(e) for e in exceptions)
+        + "\n".join(f"FFmpeg version {v}: {str(e)}" for v, e in exceptions)
         + "\n[end of libtorchcodec loading traceback]."
     )
     raise RuntimeError(
@@ -56,7 +83,7 @@ def load_torchcodec_extension():
     )
 
 
-load_torchcodec_extension()
+load_torchcodec_shared_libraries()
 
 
 # Note: We use disallow_in_graph because PyTorch does constant propagation of
@@ -67,6 +94,9 @@ def load_torchcodec_extension():
 create_from_tensor = torch._dynamo.disallow_in_graph(
     torch.ops.torchcodec_ns.create_from_tensor.default
 )
+_convert_to_tensor = torch._dynamo.disallow_in_graph(
+    torch.ops.torchcodec_ns._convert_to_tensor.default
+)
 add_video_stream = torch.ops.torchcodec_ns.add_video_stream.default
 _add_video_stream = torch.ops.torchcodec_ns._add_video_stream.default
 add_audio_stream = torch.ops.torchcodec_ns.add_audio_stream.default
@@ -110,6 +140,13 @@ def create_from_bytes(
     return create_from_tensor(buffer, seek_mode)
 
 
+def create_from_file_like(
+    file_like: Union[io.RawIOBase, io.BytesIO], seek_mode: Optional[str] = None
+) -> torch.Tensor:
+    assert _pybind_ops is not None
+    return _convert_to_tensor(_pybind_ops.create_from_file_like(file_like, seek_mode))
+
+
 # ==============================
 # Abstract impl for the operators. Needed by torch.compile.
 # ==============================
@@ -125,6 +162,11 @@ def create_from_tensor_abstract(
     return torch.empty([], dtype=torch.long)
 
 
+@register_fake("torchcodec_ns::_convert_to_tensor")
+def _convert_to_tensor_abstract(decoder_ptr: int) -> torch.Tensor:
+    return torch.empty([], dtype=torch.long)
+
+
 @register_fake("torchcodec_ns::_add_video_stream")
 def _add_video_stream_abstract(
     decoder: torch.Tensor,
diff --git a/test/decoders/CMakeLists.txt b/test/decoders/CMakeLists.txt
index 3350c92c5..126dd2794 100644
--- a/test/decoders/CMakeLists.txt
+++ b/test/decoders/CMakeLists.txt
@@ -22,11 +22,13 @@ add_executable(
 )
 
 target_include_directories(VideoDecoderTest SYSTEM PRIVATE ${TORCH_INCLUDE_DIRS})
+target_include_directories(VideoDecoderTest SYSTEM PRIVATE ${libav_include_dirs})
 target_include_directories(VideoDecoderTest PRIVATE ../../)
 
 target_link_libraries(
   VideoDecoderTest
-  ${libtorchcodec_target_name}
+  ${libtorchcodec_library_name}
+  ${libtorchcodec_custom_ops_name}
   GTest::gtest_main
 )
 
diff --git a/test/decoders/VideoDecoderTest.cpp b/test/decoders/VideoDecoderTest.cpp
index f7747ef6b..dabe70cd0 100644
--- a/test/decoders/VideoDecoderTest.cpp
+++ b/test/decoders/VideoDecoderTest.cpp
@@ -5,6 +5,7 @@
 // LICENSE file in the root directory of this source tree.
 
 #include "src/torchcodec/decoders/_core/VideoDecoder.h"
+#include "src/torchcodec/decoders/_core/AVIOBytesContext.h"
 
 #include <c10/util/Flags.h>
 #include <gtest/gtest.h>
@@ -48,10 +49,12 @@ class VideoDecoderTest : public testing::TestWithParam<bool> {
       std::ifstream input(filepath, std::ios::binary);
       outputStringStream << input.rdbuf();
       content_ = outputStringStream.str();
+
       void* buffer = content_.data();
-      size_t length = outputStringStream.str().length();
+      size_t length = content_.length();
+      auto contextHolder = std::make_unique<AVIOBytesContext>(buffer, length);
       return std::make_unique<VideoDecoder>(
-          buffer, length, VideoDecoder::SeekMode::approximate);
+          std::move(contextHolder), VideoDecoder::SeekMode::approximate);
     } else {
       return std::make_unique<VideoDecoder>(
           filepath, VideoDecoder::SeekMode::approximate);
diff --git a/test/decoders/test_ops.py b/test/decoders/test_ops.py
index 264417c3f..9efb33f35 100644
--- a/test/decoders/test_ops.py
+++ b/test/decoders/test_ops.py
@@ -23,6 +23,7 @@
     add_video_stream,
     create_from_bytes,
     create_from_file,
+    create_from_file_like,
     create_from_tensor,
     get_ffmpeg_library_versions,
     get_frame_at_index,
@@ -345,7 +346,10 @@ def get_frame1_and_frame_time6(decoder):
         assert_frames_equal(frame_time6, reference_frame_time6.to(device))
 
     @pytest.mark.parametrize("device", cpu_and_cuda())
-    @pytest.mark.parametrize("create_from", ("file", "tensor", "bytes"))
+    @pytest.mark.parametrize(
+        "create_from",
+        ("file", "tensor", "bytes", "file_like_rawio", "file_like_bufferedio"),
+    )
     def test_create_decoder(self, create_from, device):
         path = str(NASA_VIDEO.path)
         if create_from == "file":
@@ -354,10 +358,18 @@ def test_create_decoder(self, create_from, device):
             arr = np.fromfile(path, dtype=np.uint8)
             video_tensor = torch.from_numpy(arr)
             decoder = create_from_tensor(video_tensor)
-        else:  # bytes
+        elif create_from == "bytes":
             with open(path, "rb") as f:
                 video_bytes = f.read()
             decoder = create_from_bytes(video_bytes)
+        elif create_from == "file_like_rawio":
+            decoder = create_from_file_like(open(path, mode="rb", buffering=0), "exact")
+        elif create_from == "file_like_bufferedio":
+            decoder = create_from_file_like(
+                open(path, mode="rb", buffering=4096), "exact"
+            )
+        else:
+            raise ValueError("Oops, double check the parametrization of this test!")
 
         add_video_stream(decoder, device=device)
         frame0, _, _ = get_next_frame(decoder)