Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
[ghstack-poisoned]
  • Loading branch information
amjames committed Jun 18, 2024
2 parents 1c81afc + 3bd7420 commit 2fec428
Show file tree
Hide file tree
Showing 185 changed files with 9,709 additions and 9,692 deletions.
2 changes: 1 addition & 1 deletion .github/ci_commit_pins/torchbench.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
d6015d42d9a1834bc7595c4bd6852562fb80b30b
0dab1dd97709096e8129f8a08115ee83f64f2194
1 change: 1 addition & 0 deletions .github/pytorch-probot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ retryable_workflows:
- windows-binary
labeler_config: labeler.yml
label_to_label_config: label_to_label.yml
mergebot: True
4 changes: 4 additions & 0 deletions .github/workflows/_win-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ jobs:
timeout-minutes: 240
outputs:
test-matrix: ${{ steps.filter.outputs.test-matrix }}
defaults:
run:
shell: bash
steps:
# Duplicated in win-test because this MUST go before a checkout
- name: Enable git symlinks on Windows and disable fsmonitor daemon
Expand Down Expand Up @@ -89,6 +92,7 @@ jobs:

- name: Parse ref
id: parse-ref
shell: bash
run: python3 .github/scripts/parse_ref.py

- name: Get workflow job id
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/_win-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ jobs:
fail-fast: false
runs-on: ${{ matrix.runner }}
timeout-minutes: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
defaults:
run:
shell: bash
steps:
# Duplicated in win-build because this MUST go before a checkout
- name: Enable git symlinks on Windows and disable fsmonitor daemon
Expand Down Expand Up @@ -224,6 +227,7 @@ jobs:

- name: Parse ref
id: parse-ref
shell: bash
run: python3 .github/scripts/parse_ref.py

- name: Uninstall PyTorch
Expand Down
43 changes: 0 additions & 43 deletions .lintrunner.toml
Original file line number Diff line number Diff line change
Expand Up @@ -1643,15 +1643,6 @@ exclude_patterns = [
'torch/linalg/__init__.py',
'torch/monitor/__init__.py',
'torch/nested/__init__.py',
'torch/nn/__init__.py',
'torch/nn/_reduction.py',
'torch/nn/backends/__init__.py',
'torch/nn/backends/thnn.py',
'torch/nn/common_types.py',
'torch/nn/cpp.py',
'torch/nn/functional.py',
'torch/nn/grad.py',
'torch/nn/init.py',
'torch/nn/intrinsic/__init__.py',
'torch/nn/intrinsic/modules/__init__.py',
'torch/nn/intrinsic/modules/fused.py',
Expand All @@ -1668,40 +1659,6 @@ exclude_patterns = [
'torch/nn/intrinsic/quantized/modules/bn_relu.py',
'torch/nn/intrinsic/quantized/modules/conv_relu.py',
'torch/nn/intrinsic/quantized/modules/linear_relu.py',
'torch/nn/modules/__init__.py',
'torch/nn/modules/_functions.py',
'torch/nn/modules/activation.py',
'torch/nn/modules/adaptive.py',
'torch/nn/modules/batchnorm.py',
'torch/nn/modules/channelshuffle.py',
'torch/nn/modules/container.py',
'torch/nn/modules/conv.py',
'torch/nn/modules/distance.py',
'torch/nn/modules/dropout.py',
'torch/nn/modules/flatten.py',
'torch/nn/modules/fold.py',
'torch/nn/modules/instancenorm.py',
'torch/nn/modules/lazy.py',
'torch/nn/modules/linear.py',
'torch/nn/modules/loss.py',
'torch/nn/modules/module.py',
'torch/nn/modules/normalization.py',
'torch/nn/modules/padding.py',
'torch/nn/modules/pixelshuffle.py',
'torch/nn/modules/pooling.py',
'torch/nn/modules/rnn.py',
'torch/nn/modules/sparse.py',
'torch/nn/modules/transformer.py',
'torch/nn/modules/upsampling.py',
'torch/nn/modules/utils.py',
'torch/nn/parallel/__init__.py',
'torch/nn/parallel/_functions.py',
'torch/nn/parallel/comm.py',
'torch/nn/parallel/data_parallel.py',
'torch/nn/parallel/parallel_apply.py',
'torch/nn/parallel/replicate.py',
'torch/nn/parallel/scatter_gather.py',
'torch/nn/parameter.py',
'torch/nn/qat/__init__.py',
'torch/nn/qat/dynamic/__init__.py',
'torch/nn/qat/dynamic/modules/__init__.py',
Expand Down
5 changes: 5 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ RUN case ${TARGETPLATFORM} in \
esac && \
/opt/conda/bin/conda clean -ya
RUN /opt/conda/bin/pip install torchelastic
RUN IS_CUDA=$(python -c 'import torch ; print(torch.cuda._is_compiled())'); \
echo "Is torch compiled with cuda: ${IS_CUDA}"; \
if test "${IS_CUDA}" != "True" -a ! -z "${CUDA_VERSION}"; then \
exit 1; \
fi

FROM ${BASE_IMAGE} as official
ARG PYTORCH_VERSION
Expand Down
9 changes: 7 additions & 2 deletions aten/src/ATen/FunctionalInverses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ Tensor FunctionalInverses::_nested_view_from_buffer_inverse(const Tensor& base,
return Tensor();
}

Tensor FunctionalInverses::_nested_view_from_jagged_inverse(const Tensor& base, const Tensor& mutated_view, InverseReturnMode inverse_return_mode, const Tensor& offsets, const Tensor& dummy, const std::optional<Tensor>& lengths, int64_t ragged_idx) {
Tensor FunctionalInverses::_nested_view_from_jagged_inverse(const Tensor& base, const Tensor& mutated_view, InverseReturnMode inverse_return_mode, const Tensor& offsets, const Tensor& dummy, const std::optional<Tensor>& lengths, int64_t ragged_idx, const c10::optional<Tensor>& min_seqlen, const c10::optional<Tensor>& max_seqlen) {
auto values = at::_nested_get_values(mutated_view);
if (inverse_return_mode != InverseReturnMode::NeverView) {
return values;
Expand All @@ -317,7 +317,12 @@ Tensor FunctionalInverses::_nested_get_values_inverse(const Tensor& base, const
auto lengths = at::_nested_get_lengths(base);
auto ragged_idx = at::_nested_get_ragged_idx(base);
auto dummy = at::_nested_get_jagged_dummy(base);
auto nt = at::_nested_view_from_jagged(mutated_view, offsets, dummy, lengths, ragged_idx);
auto min_seqlen = at::_nested_get_min_seqlen(base);
auto max_seqlen = at::_nested_get_max_seqlen(base);
auto nt = at::_nested_view_from_jagged(
mutated_view, offsets, dummy, lengths, ragged_idx,
(min_seqlen.defined() ? c10::optional<Tensor>(min_seqlen) : c10::nullopt),
(max_seqlen.defined() ? c10::optional<Tensor>(max_seqlen) : c10::nullopt));

if (inverse_return_mode != InverseReturnMode::NeverView) {
return nt;
Expand Down
4 changes: 4 additions & 0 deletions aten/src/ATen/MapAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ class TORCH_API MapAllocator {
return base_ptr_;
}

int flags() const {
return flags_;
}

static MapAllocator* fromDataPtr(const at::DataPtr&);
static at::DataPtr makeDataPtr(
c10::string_view filename,
Expand Down
1 change: 1 addition & 0 deletions aten/src/ATen/cpu/vec/vec_mask.h
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ VEC_MASK_DEFINE_BINARY_OP_WITH_EXPR_GLOBAL(operator<, ~a& b)
VEC_MASK_DEFINE_BINARY_OP_WITH_EXPR_GLOBAL(operator==, ~(a ^ b))
VEC_MASK_DEFINE_BINARY_OP_WITH_EXPR_GLOBAL(operator>=, (a == b) | (a > b))
VEC_MASK_DEFINE_BINARY_OP_WITH_EXPR_GLOBAL(operator<=, (a == b) | (a < b))
VEC_MASK_DEFINE_BINARY_OP_WITH_EXPR_GLOBAL(operator!=, (a ^ b))

#undef VEC_MASK_DEFINE_UNARY_OP_GLOBAL
#undef VEC_MASK_DEFINE_BINARY_OP_GLOBAL
Expand Down
76 changes: 3 additions & 73 deletions aten/src/ATen/cuda/tunable/GemmCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,7 @@ struct GemmParams : OpParams {
}

std::string Signature() const override {
static std::string val = c10::str(transa, transb, "_", m, "_", n, "_", k);
return val;
return c10::str(transa, transb, "_", m, "_", n, "_", k);
}

size_t GetSize(bool duplicate_inputs) const {
Expand Down Expand Up @@ -144,82 +143,14 @@ struct GemmParams : OpParams {
bool duplicate_inputs_;
};

template <typename T>
struct GemmAndBiasParams : OpParams {
std::string Signature() const override {
static std::string val = c10::str(transa, transb, "_", m, "_", n, "_", k);
return val;
}

size_t GetSize(bool duplicate_inputs) const {
size_t size = sizeof(T) * ldc * n;
if (duplicate_inputs) {
size += sizeof(T) * lda * ((transa == 'n' || transa == 'N') ? k : m);
size += sizeof(T) * ldb * ((transb == 'n' || transb == 'N') ? n : k);
}
return size;
}

GemmAndBiasParams* DeepCopy(bool duplicate_inputs) const {
GemmAndBiasParams* copy = new GemmAndBiasParams;
*copy = *this;
c10::DeviceIndex device = 0;
AT_CUDA_CHECK(c10::cuda::GetDevice(&device));
size_t c_size = ldc * n * sizeof(T);
copy->c = static_cast<T*>(c10::cuda::CUDACachingAllocator::raw_alloc(c_size));
AT_CUDA_CHECK(c10::cuda::CUDACachingAllocator::memcpyAsync(
copy->c, device, c, device, c_size, getCurrentCUDAStream(device), true));
if (duplicate_inputs) {
size_t a_size = sizeof(T) * lda * ((transa == 'n' || transa == 'N') ? k : m);
size_t b_size = sizeof(T) * ldb * ((transb == 'n' || transb == 'N') ? n : k);
copy->a = static_cast<const T*>(c10::cuda::CUDACachingAllocator::raw_alloc(a_size));
copy->b = static_cast<const T*>(c10::cuda::CUDACachingAllocator::raw_alloc(b_size));
copy->duplicate_inputs_ = true;
}
return copy;
}

// only call on object returned by DeepCopy
void Delete() {
c10::cuda::CUDACachingAllocator::raw_delete(c);
if (duplicate_inputs_) {
c10::cuda::CUDACachingAllocator::raw_delete(const_cast<T*>(a));
c10::cuda::CUDACachingAllocator::raw_delete(const_cast<T*>(b));
}
}

TuningStatus NumericalCheck(GemmAndBiasParams<T> *other) {
auto c_dtype = c10::CppTypeToScalarType<T>::value;
return detail::NumericalCheck(c_dtype, c, other->c, ldc*n) ? OK : FAIL;
}

char transa;
char transb;
int64_t m;
int64_t n;
int64_t k;
at::opmath_type<T> alpha;
const T* a;
int64_t lda;
const T* b;
int64_t ldb;
T* c;
int64_t ldc;
const T* bias;
at::cuda::blas::GEMMAndBiasActivationEpilogue activation;
private:
bool duplicate_inputs_;
};

template <typename T>
struct GemmStridedBatchedParams : OpParams {
GemmStridedBatchedParams() {
duplicate_inputs_ = false;
}

std::string Signature() const override {
static std::string val = c10::str(transa, transb, "_", m, "_", n, "_", k, "_B_", batch);
return val;
return c10::str(transa, transb, "_", m, "_", n, "_", k, "_B_", batch);
}

size_t GetSize(bool duplicate_inputs) const {
Expand Down Expand Up @@ -292,8 +223,7 @@ struct ScaledGemmParams : OpParams {
}

std::string Signature() const override {
static std::string val = c10::str(transa, transb, "_", m, "_", n, "_", k);
return val;
return c10::str(transa, transb, "_", m, "_", n, "_", k);
}

size_t GetSize(bool duplicate_inputs) const {
Expand Down
Loading

0 comments on commit 2fec428

Please sign in to comment.