/
DatasetRepository.java
101 lines (94 loc) · 3.83 KB
/
DatasetRepository.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
/**
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.data;
import javax.annotation.concurrent.Immutable;
/**
* <p>
* A logical repository (storage system) of {@link Dataset}s.
* </p>
* <p>
* Implementations of {@code DatasetRepository} are storage systems that contain
* zero or more {@link Dataset}s. A repository acts as a factory, as well as a
* registry, of datasets. Users can {@link #create(String, DatasetDescriptor)} a
* new {@link Dataset} with a name and schema, or retrieve a handle to an
* existing dataset, by name, by way of the {@link #get(String)} method. While
* not expressly forbidden, most repositories are expected to support only a
* single concrete {@link Dataset} implementation.
* </p>
* <p>
* No guarantees are made as to the durability, reliability, or availability of
* the underlying storage. That is, a {@code DatasetRepository} could be on
* disk, in memory, or some combination. See the implementation class for
* details about the guarantees it provides.
* </p>
* <p>
* Implementations of {@link DatasetRepository} are immutable.
* </p>
*
* @see Dataset
* @see DatasetDescriptor
*/
@Immutable
public interface DatasetRepository {
/**
* Get the latest version of a named {@link Dataset}. If no dataset with the
* provided {@code name} exists, a {@link DatasetRepositoryException} is thrown.
*
* @param name The name of the dataset.
* @throws DatasetRepositoryException
*/
Dataset get(String name);
/**
* Create a {@link Dataset} with the supplied {@code descriptor}. Depending on
* the underlying dataset storage, some schemas types or configurations may
* not be supported. If an illegal schema is supplied, an exception will be
* thrown by the implementing class. It is illegal to create a more than one
* dataset with a given name. If a duplicate name is provided, an exception is
* thrown.
*
* @param name The fully qualified dataset name
* @param descriptor A descriptor that describes the schema and other properties of the
* dataset
* @return The newly created dataset
* @throws DatasetRepositoryException
*/
Dataset create(String name, DatasetDescriptor descriptor);
/**
* Update an existing {@link Dataset} to reflect the supplied {@code descriptor}. The
* common case is updating a dataset schema. Depending on
* the underlying dataset storage, some updates may not be supported,
* such as a change in format or partition strategy.
* Any attempt to make an unsupported or incompatible update will result in an
* exception being thrown and no change being made to the dataset.
*
* @param name The fully qualified dataset name
* @param descriptor A descriptor that describes the schema and other properties of the
* dataset
* @return The newly created dataset
* @throws DatasetRepositoryException
* @since 0.3.0
*/
Dataset update(String name, DatasetDescriptor descriptor);
/**
* Drop the named {@link Dataset}. If no dataset with the
* provided {@code name} exists, a {@link DatasetReaderException} is thrown.
*
* @param name The name of the dataset.
* @return <code>true</code> if the dataset was successfully dropped, false otherwise
* @throws DatasetReaderException
*/
boolean drop(String name);
}