forked from readthedocs/readthedocs.org
-
Notifications
You must be signed in to change notification settings - Fork 0
/
storage.py
201 lines (160 loc) · 7.41 KB
/
storage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
import logging
from pathlib import Path
from django.conf import settings
from django.core.exceptions import SuspiciousFileOperation
from django.core.files.storage import FileSystemStorage
from storages.utils import get_available_overwrite_name, safe_join
log = logging.getLogger(__name__)
class BuildMediaStorageMixin:
"""
A mixin for Storage classes needed to write build artifacts.
This adds and modifies some functionality to Django's File Storage API.
By default, classes mixing this in will now overwrite files by default instead
of finding an available name.
This mixin also adds convenience methods to copy and delete entire directories.
See: https://docs.djangoproject.com/en/1.11/ref/files/storage
"""
@staticmethod
def _dirpath(path):
"""
Make the path to end with `/`.
It may just be Azure, but for listdir to work correctly, this is needed.
"""
path = str(path)
if not path.endswith('/'):
path += '/'
return path
def get_available_name(self, name, max_length=None):
"""
Overrides Django's storage to always return the passed name (overwrite).
By default, Django will not overwrite files even if the same name is specified.
This changes that functionality so that the default is to use the same name and overwrite
rather than modify the path to not clobber files.
"""
return get_available_overwrite_name(name, max_length=max_length)
def delete_directory(self, path):
"""
Delete all files under a certain path from storage.
Many storage backends (S3, Azure storage) don't care about "directories".
The directory effectively doesn't exist if there are no files in it.
However, in these backends, there is no "rmdir" operation so you have to recursively
delete all files.
:param path: the path to the directory to remove
"""
if path in ('', '/'):
raise SuspiciousFileOperation('Deleting all storage cannot be right')
log.debug('Deleting directory %s from media storage', path)
folders, files = self.listdir(self._dirpath(path))
for folder_name in folders:
if folder_name:
# Recursively delete the subdirectory
self.delete_directory(self.join(path, folder_name))
for filename in files:
if filename:
self.delete(self.join(path, filename))
def copy_directory(self, source, destination):
"""
Copy a directory recursively to storage.
:param source: the source path on the local disk
:param destination: the destination path in storage
"""
log.debug('Copying source directory %s to media storage at %s', source, destination)
source = Path(source)
for filepath in source.iterdir():
sub_destination = self.join(destination, filepath.name)
if filepath.is_dir():
# Recursively copy the subdirectory
self.copy_directory(filepath, sub_destination)
elif filepath.is_file():
with filepath.open('rb') as fd:
self.save(sub_destination, fd)
def sync_directory(self, source, destination):
"""
Sync a directory recursively to storage.
Overwrites files in remote storage with files from ``source`` (no timstamp/hash checking).
Removes files and folders in remote storage that are not present in ``source``.
:param source: the source path on the local disk
:param destination: the destination path in storage
"""
if destination in ('', '/'):
raise SuspiciousFileOperation('Syncing all storage cannot be right')
log.debug(
'Syncing to media storage. source=%s destination=%s',
source, destination,
)
source = Path(source)
copied_files = set()
copied_dirs = set()
for filepath in source.iterdir():
sub_destination = self.join(destination, filepath.name)
if filepath.is_dir():
# Recursively sync the subdirectory
self.sync_directory(filepath, sub_destination)
copied_dirs.add(filepath.name)
elif filepath.is_file():
with filepath.open('rb') as fd:
self.save(sub_destination, fd)
copied_files.add(filepath.name)
# Remove files that are not present in ``source``
dest_folders, dest_files = self.listdir(self._dirpath(destination))
for folder in dest_folders:
if folder not in copied_dirs:
self.delete_directory(self.join(destination, folder))
for filename in dest_files:
if filename not in copied_files:
filepath = self.join(destination, filename)
log.debug('Deleting file from media storage. file=%s', filepath)
self.delete(filepath)
def join(self, directory, filepath):
return safe_join(directory, filepath)
def walk(self, top):
if top in ('', '/'):
raise SuspiciousFileOperation('Iterating all storage cannot be right')
log.debug('Walking %s in media storage', top)
folders, files = self.listdir(self._dirpath(top))
yield top, folders, files
for folder_name in folders:
if folder_name:
# Recursively walk the subdirectory
yield from self.walk(self.join(top, folder_name))
class BuildMediaFileSystemStorage(BuildMediaStorageMixin, FileSystemStorage):
"""Storage subclass that writes build artifacts in PRODUCTION_MEDIA_ARTIFACTS or MEDIA_ROOT."""
def __init__(self, **kwargs):
location = kwargs.pop('location', None)
if not location:
# Mirrors the logic of getting the production media path
if settings.DEFAULT_PRIVACY_LEVEL == 'public' or settings.DEBUG:
location = settings.MEDIA_ROOT
else:
location = settings.PRODUCTION_MEDIA_ARTIFACTS
super().__init__(location)
def get_available_name(self, name, max_length=None):
"""
A hack to overwrite by default with the FileSystemStorage.
After upgrading to Django 2.2, this method can be removed
because subclasses can set OS_OPEN_FLAGS such that FileSystemStorage._save
will properly overwrite files.
See: https://github.com/django/django/pull/8476
"""
name = super().get_available_name(name, max_length=max_length)
if self.exists(name):
self.delete(name)
return name
def listdir(self, path):
"""
Return empty lists for nonexistent directories.
This mimics what cloud storages do.
"""
if not self.exists(path):
return [], []
return super().listdir(path)
def url(self, name, *args, **kwargs): # noqa
"""
Override to accept extra arguments and ignore them all.
This method helps us to bring compatibility between Azure Blob Storage
(which does not use the HTTP method) and Amazon S3 (who requires HTTP
method to build the signed URL).
``FileSystemStorage`` does not support any other argument than ``name``.
https://docs.djangoproject.com/en/2.2/ref/files/storage/#django.core.files.storage.Storage.url
"""
return super().url(name)