/
utils.py
264 lines (208 loc) · 8.96 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
import csv
import os
import pickle
import sys
import tempfile
import zipfile
import warnings
import imageio
import numpy
import requests
from skimage.transform import resize
import torch
import subprocess
import string
import random
from autokeras.constant import Constant
class NoImprovementError(Exception):
def __init__(self, message):
self.message = message
def ensure_dir(directory):
"""Create directory if it does not exist."""
if not os.path.exists(directory):
os.makedirs(directory)
def ensure_file_dir(path):
"""Create path if it does not exist."""
ensure_dir(os.path.dirname(path))
def has_file(path):
"""Check if the given path exists."""
return os.path.exists(path)
def pickle_from_file(path):
"""Load the pickle file from the provided path and returns the object."""
return pickle.load(open(path, 'rb'))
def pickle_to_file(obj, path):
"""Save the pickle file to the specified path."""
pickle.dump(obj, open(path, 'wb'))
def get_device():
""" If CUDA is available, use CUDA device, else use CPU device.
When choosing from CUDA devices, this function will choose the one with max memory available.
Returns: string device name.
"""
# TODO: could use gputil in the future
device = 'cpu'
if torch.cuda.is_available():
try:
# smi_out=
# Free : xxxxxx MiB
# Free : xxxxxx MiB
# ....
smi_out = subprocess.check_output('nvidia-smi -q -d Memory | grep -A4 GPU|grep Free', shell=True)
if isinstance(smi_out, bytes):
smi_out = smi_out.decode('utf-8')
print(smi_out)
except subprocess.SubprocessError:
warnings.warn('Cuda device successfully detected. However, nvidia-smi cannot be invoked')
return 'cpu'
visible_devices = os.getenv('CUDA_VISIBLE_DEVICES', '').split(',')
if len(visible_devices) == 1 and visible_devices[0] == '':
visible_devices = []
visible_devices = [int(x) for x in visible_devices]
memory_available = [int(x.split()[2]) for x in smi_out.splitlines()]
for cuda_index, _ in enumerate(memory_available):
if cuda_index not in visible_devices and visible_devices:
memory_available[cuda_index] = 0
if memory_available:
if max(memory_available) != 0:
device = 'cuda:' + str(memory_available.index(max(memory_available)))
return device
def temp_folder_generator():
"""Create and return a temporary directory with the path name '/temp_dir_name/autokeras' (E:g:- /tmp/autokeras)."""
chars = string.ascii_uppercase + string.digits
size = 6
sys_temp = tempfile.gettempdir()
random_suffix = ''.join(random.choice(chars) for _ in range(size))
path = os.path.join(sys_temp, 'autokeras_' + random_suffix)
ensure_dir(path)
return path
def download_file(file_link, file_path):
"""Download the file specified in `file_link` and saves it in `file_path`."""
if not os.path.exists(file_path):
with open(file_path, "wb") as f:
print("Downloading %s" % file_path)
response = requests.get(file_link, stream=True)
total_length = response.headers.get('content-length')
if total_length is None: # no content length header
f.write(response.content)
else:
dl = 0
total_length = int(total_length)
for data in response.iter_content(chunk_size=4096):
dl += len(data)
f.write(data)
done = int(50 * dl / total_length)
sys.stdout.write("\r[%s%s]" % ('=' * done, ' ' * (50 - done)))
sys.stdout.flush()
def download_file_with_extract(file_link, file_path, extract_path):
"""Download the file specified in `file_link`, save to `file_path` and extract to the directory `extract_path`."""
if not os.path.exists(extract_path):
download_file(file_link, file_path)
zip_ref = zipfile.ZipFile(file_path, 'r')
print("extracting downloaded file...")
zip_ref.extractall(extract_path)
os.remove(file_path)
print("extracted and removed downloaded zip file")
print("file already extracted in the path %s" % extract_path)
def verbose_print(new_father_id, new_graph):
"""Print information about the operation performed on father model to obtain current model and father's id."""
cell_size = [24, 49]
header = ['Father Model ID', 'Added Operation']
line = '|'.join(str(x).center(cell_size[i]) for i, x in enumerate(header))
print('\n' + '+' + '-' * len(line) + '+')
print('|' + line + '|')
print('+' + '-' * len(line) + '+')
for i in range(len(new_graph.operation_history)):
if i == len(new_graph.operation_history) // 2:
r = [new_father_id, new_graph.operation_history[i]]
else:
r = [' ', new_graph.operation_history[i]]
line = '|'.join(str(x).center(cell_size[i]) for i, x in enumerate(r))
print('|' + line + '|')
print('+' + '-' * len(line) + '+')
def validate_xy(x_train, y_train):
"""Validate `x_train`'s type and the shape of `x_train`, `y_train`."""
try:
x_train = x_train.astype('float64')
except ValueError:
raise ValueError('x_train should only contain numerical data.')
if len(x_train.shape) < 2:
raise ValueError('x_train should at least has 2 dimensions.')
if x_train.shape[0] != y_train.shape[0]:
raise ValueError('x_train and y_train should have the same number of instances.')
def read_csv_file(csv_file_path):
"""Read the csv file and returns two separate list containing file names and their labels.
Args:
csv_file_path: Path to the CSV file.
Returns:
file_names: List containing files names.
file_label: List containing their respective labels.
"""
file_names = []
file_labels = []
with open(csv_file_path, 'r') as files_path:
path_list = csv.DictReader(files_path)
fieldnames = path_list.fieldnames
for path in path_list:
file_names.append(path[fieldnames[0]])
file_labels.append(path[fieldnames[1]])
return file_names, file_labels
def read_image(img_path):
"""Read the image contained in the provided path `image_path`."""
img = imageio.imread(uri=img_path)
return img
def compute_image_resize_params(data):
"""Compute median height and width of all images in data.
These values are used to resize the images at later point. Number of channels do not change from the original
images. Currently, only 2-D images are supported.
Args:
data: 2-D Image data with shape N x H x W x C.
Returns:
median height: Median height of all images in the data.
median width: Median width of all images in the data.
"""
if len(data.shape) == 1 and len(data[0].shape) != 3:
return None, None
median_height, median_width = numpy.median(numpy.array(list(map(lambda x: x.shape, data))), axis=0)[:2]
if median_height * median_width > Constant.MAX_IMAGE_SIZE:
reduction_factor = numpy.sqrt(median_height * median_width / Constant.MAX_IMAGE_SIZE)
median_height = median_height / reduction_factor
median_width = median_width / reduction_factor
return int(median_height), int(median_width)
def resize_image_data(data, height, width):
"""Resize images to provided height and width.
Resize all images in data to size h x w x c, where h is the height, w is the width and c is the number of channels.
The number of channels c does not change from data. The function supports only 2-D image data.
Args:
data: 2-D Image data with shape N x H x W x C.
h: Image resize height.
w: Image resize width.
Returns:
data: Resize data.
"""
if data is None:
return data
if len(data.shape) == 4 and data[0].shape[0] == height and data[0].shape[1] == width:
return data
output_data = []
for im in data:
if len(im.shape) != 3:
return data
output_data.append(resize(image=im,
output_shape=(height, width, im.shape[-1]),
mode='edge',
preserve_range=True))
return numpy.array(output_data)
def get_system():
"""Get the current system environment. If the current system is not supported, raise an exception.
Returns:
A string to represent the current OS name.
"posix" stands for Linux, Mac or Solaris architecture.
"nt" stands for Windows system.
"""
print(os.name)
if 'google.colab' in sys.modules:
return Constant.SYS_GOOGLE_COLAB
if os.name == 'posix':
return Constant.SYS_LINUX
if os.name == 'nt':
return Constant.SYS_WINDOWS
raise EnvironmentError('Unsupported environment')