In [112]:
import numpy as np

def conv3d(input_data, in_channels, out_channels, kernel_size, bias = None, stride = 1, padding = 0, dilation = 1):
    kernel_size = (kernel_size, kernel_size, kernel_size) if isinstance(kernel_size, int) else kernel_size
    stride = (stride, stride, stride) if isinstance(stride, int) else stride
    dilation = (dilation, dilation, dilation) if isinstance(dilation, int) else dilation
    
    weight_tensor_torch = torch.randn(1, 1, *kernel_size)
    weight_tensor = weight_tensor_torch.numpy()

    padding = (padding, padding, padding)
    
    batches, _, d_in, h_in, w_in = input_data.shape
    out = []

    for b in range(batches):
        d_out = int((d_in + 2 * padding[0] - dilation[0] * (kernel_size[0] - 1) - 1) / stride[0] + 1)
        h_out = int((h_in + 2 * padding[1] - dilation[1] * (kernel_size[1] - 1) - 1) / stride[1] + 1)
        w_out = int((w_in + 2 * padding[2] - dilation[2] * (kernel_size[2] - 1) - 1) / stride[2] + 1)

        out.append(np.zeros((out_channels, d_out, h_out, w_out)))

        for c_out in range(out_channels):
            for z_out in range(d_out):
                for y_out in range(h_out):
                    for x_out in range(w_out):
                        sum = 0
                        for c_in in range(in_channels):
                            for kernel_z in range(kernel_size[0]):
                                for kernel_y in range(kernel_size[1]):
                                    for kernel_x in range(kernel_size[2]):
                                        z_in = z_out * stride[0] + kernel_z * dilation[0] - padding[0]
                                        y_in = y_out * stride[1] + kernel_y * dilation[1] - padding[1]
                                        x_in = x_out * stride[2] + kernel_x * dilation[2] - padding[2]
                                        if 0 <= z_in < d_in and 0 <= y_in < h_in and 0 <= x_in < w_in:
                                            sum += input_data[b, c_in, z_in, y_in, x_in] * \
                                                    weight_tensor[0, c_out, kernel_z, kernel_y, kernel_x]

                        out[b][c_out][z_out][y_out][x_out] = sum + (bias if bias else 0)

    return np.array(out), weight_tensor_torch

In [113]:
import torch

input_data = torch.randn(1, 1, 5, 5, 5)
my_conv, weight = conv3d(input_data.numpy(), in_channels=1, out_channels=1, kernel_size=4, bias=0.5, stride=1, padding=2, dilation=1)
torch_conv = torch.nn.functional.conv3d(input_data, weight, bias=torch.tensor([0.5]), stride=(1,1,1), padding=(2,2,2), dilation=(1,1,1))

In [116]:
print("Результат работы исходной функции: \n" , torch_conv.squeeze().detach().numpy())
print("Результат работы написанной функции: \n", my_conv[0][0])
print(np.allclose(my_conv[0][0], torch_conv.squeeze().detach().numpy()))

Результат работы исходной функции: 
 [[[ 2.09842443e+00 -3.57018650e-01 -3.80205989e+00  4.28845942e-01
    5.75750411e-01 -5.24891555e-01]
  [ 1.89877367e+00  1.47347689e+00 -1.31848025e+00  5.24502611e+00
    3.79734755e+00  1.59757388e+00]
  [ 1.29019701e+00 -4.01920080e+00 -4.02265978e+00 -5.39329052e+00
    4.44545221e+00  2.44352269e+00]
  [ 1.70783055e+00 -1.54565048e+00 -6.98826551e-01 -6.09273291e+00
   -3.96785617e+00  2.44271946e+00]
  [-1.89330554e+00 -2.43497327e-01  2.92845416e+00  7.57402611e+00
   -8.88401091e-01  4.12355137e+00]
  [ 1.54153311e+00 -5.87321520e-01 -4.50485802e+00 -3.79096699e+00
    3.38161612e+00 -2.18247747e+00]]

 [[-2.31419519e-01 -5.19470596e+00 -3.29496682e-01  7.62101936e+00
    2.77605128e+00 -3.67958117e+00]
  [ 1.86370587e+00 -8.92282605e-01 -5.18372536e+00  1.13405981e+01
    4.79224825e+00  1.01313293e+00]
  [ 1.98136580e+00 -2.54125977e+00  6.67743635e+00 -4.18857718e+00
    4.93766785e+00 -7.22642839e-01]
  [ 6.04501104e+00  4.82004881e+00