From 27ef3b56bb5a83e5984f66276ef7b40cf42f07a4 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Mon, 29 Jun 2020 09:28:33 +0300 Subject: [PATCH] hdfs: implement makedirs separately Clouds that have a non-noop `makedirs` should implement it in their own method. Hdfs somehow slipped under the radar and had `mkdir` used explicitly in `copy` and `upload` methods. --- dvc/remote/hdfs.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dvc/remote/hdfs.py b/dvc/remote/hdfs.py index e9cfe42e8c..fc4c316452 100644 --- a/dvc/remote/hdfs.py +++ b/dvc/remote/hdfs.py @@ -1,7 +1,6 @@ import io import logging import os -import posixpath import re import subprocess from collections import deque @@ -112,10 +111,13 @@ def remove(self, path_info): with self.hdfs(path_info) as hdfs: hdfs.rm(path_info.path) + def makedirs(self, path_info): + with self.hdfs(path_info) as hdfs: + # NOTE: hdfs.mkdir creates parents by default + hdfs.mkdir(path_info.path) + def copy(self, from_info, to_info, **_kwargs): - dname = posixpath.dirname(to_info.path) with self.hdfs(to_info) as hdfs: - hdfs.mkdir(dname) # NOTE: this is how `hadoop fs -cp` works too: it copies through # your local machine. with hdfs.open(from_info.path, "rb") as from_fobj: @@ -169,7 +171,6 @@ def get_file_hash(self, path_info): def _upload(self, from_file, to_info, **_kwargs): with self.hdfs(to_info) as hdfs: - hdfs.mkdir(posixpath.dirname(to_info.path)) tmp_file = tmp_fname(to_info.path) with open(from_file, "rb") as fobj: hdfs.upload(tmp_file, fobj)