-
Notifications
You must be signed in to change notification settings - Fork 0
/
write_api.py
executable file
·203 lines (163 loc) · 7.06 KB
/
write_api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# unsupervised_garbage_detection.py
# Created by: Drew
# This file implements the unsupervised garbage detection variants and simulates
# accuracy/complexity tradeoffs
from flask import jsonify, request, Blueprint, current_app
from flask_cors import cross_origin
import pkg_resources
import uuid
from .ecosystem_importer import EcosystemImporter
from .utils import write_fixed_data, write_feature_weights
from .read_api import InvalidUsage, handle_invalid_usage
CORPORA_PATH = pkg_resources.resource_filename("validator", "ml/corpora")
bp = Blueprint("write_api", __name__, url_prefix="/")
bp.register_error_handler(InvalidUsage, handle_invalid_usage)
# Instantiate the ecosystem importer that will be used by the import route
ecosystem_importer = EcosystemImporter(
common_vocabulary_filename=f"{CORPORA_PATH}/big.txt"
)
def update_fixed_data(df_domain_, df_innovation_, df_questions_):
# AEW: I feel like I am sinning against nature here . . .
# Do we need to store these in a Redis cache or db???
# This was all well and good before we ever tried to modify things
datasets = current_app.datasets
# Remove any entries from the domain, innovation, and question dataframes
# that are duplicated by the new data
book_id = df_domain_.iloc[0]["vuid"]
if "vuid" in datasets["domain"].columns:
datasets["domain"] = datasets["domain"][datasets["domain"]["vuid"] != book_id]
if "cvuid" in datasets["domain"].columns:
datasets["innovation"] = datasets["innovation"][
~(datasets["innovation"]["cvuid"].star.startswith(book_id))
]
uids = df_questions_["uid"].unique()
if "uid" in datasets["questions"].columns:
datasets["questions"] = datasets["questions"][
~(
datasets["questions"]["uid"].isin(uids)
& datasets["questions"]["cvuid"].str.startswith(book_id)
)
]
# Now append the new dataframes to the in-memory ones
datasets["domain"] = datasets["domain"].append(df_domain_, sort=False)
datasets["innovation"] = datasets["innovation"].append(df_innovation_, sort=False)
datasets["questions"] = datasets["questions"].append(df_questions_, sort=False)
# Update qid sets - for shortcutting question lookup
for idcol in ("uid", "qid"):
current_app.qids[idcol] = set(datasets["questions"][idcol].values.tolist())
# Finally, write the updated dataframes to disk and declare victory
data_dir = current_app.config["DATA_DIR"]
write_fixed_data(datasets["domain"], datasets["innovation"], datasets["questions"], data_dir)
def store_feature_weights(new_feature_weights):
# Allows removing duplicate sets in feature weights
# Sees if the incoming set matches with fw set
datasets = current_app.datasets
for fw_id, existing_feature_weights in datasets["feature_weights"].items():
if existing_feature_weights == new_feature_weights:
result_id = fw_id
break
else:
result_id = uuid.uuid4()
datasets["feature_weights"][str(result_id)] = new_feature_weights
data_dir = current_app.config["DATA_DIR"]
write_feature_weights(datasets["feature_weights"], data_dir)
return result_id
def write_default_feature_weights_id(new_default_id):
# Allows removing duplicate sets in feature weights
# Sees if the incoming set matches with fw set
datasets = current_app.datasets
if new_default_id == datasets["feature_weights"]["default_id"]:
return new_default_id
else:
datasets["feature_weights"]["default_id"] = new_default_id
data_dir = current_app.config["DATA_DIR"]
write_feature_weights(datasets["feature_weights"], data_dir)
return new_default_id
@bp.route("/import", methods=["POST"])
@cross_origin(supports_credentials=True)
def import_ecosystem():
# Extract arguments for the ecosystem to import
# Either be a file location, YAML-as-string, or book_id and list of question uids
yaml_string = request.files["file"].read()
if "file" in request.files:
(
df_domain_,
df_innovation_,
df_questions_,
) = ecosystem_importer.parse_yaml_string(yaml_string)
elif request.json is not None:
yaml_filename = request.json.get("filename", None)
yaml_string = request.json.get("yaml_string", None)
book_id = request.json.get("book_id", None)
exercise_list = request.json.get("question_list", None)
if yaml_filename:
(
df_domain_,
df_innovation_,
df_questions_,
) = ecosystem_importer.parse_yaml_file(yaml_filename)
elif yaml_string:
(
df_domain_,
df_innovation_,
df_questions_,
) = ecosystem_importer.parse_yaml_string(yaml_string)
elif book_id and exercise_list:
(
df_domain_,
df_innovation_,
df_questions_,
) = ecosystem_importer.parse_content(book_id, exercise_list)
else:
return jsonify(
{
"msg": (
"Could not process input. Provide either"
" a location of a YAML file,"
" a string of YAML content,"
" or a book_id and question_list"
)
}
)
update_fixed_data(df_domain_, df_innovation_, df_questions_)
return jsonify({"msg": "Ecosystem successfully imported"})
@bp.route("/datasets/feature_weights", methods=["POST"])
@cross_origin(supports_credentials=True)
def new_feature_weights_set():
feature_weights_keys = set(current_app.config["DEFAULT_FEATURE_WEIGHTS"].keys())
if not request.is_json:
raise InvalidUsage(
"Unable to load feature weights as json file.", status_code=404
)
else:
new_feature_weights = request.json
if set(new_feature_weights.keys()) != feature_weights_keys:
raise InvalidUsage(
"Incomplete or incorrect feature weight keys", status_code=400
)
feature_weight_id = store_feature_weights(new_feature_weights)
return jsonify(
{
"msg": "Feature weights successfully imported.",
"feature_weight_set_id": feature_weight_id,
}
)
@bp.route("/datasets/feature_weights/default", methods=["PUT"])
@cross_origin(supports_credentials=True)
def set_default_feature_weights_id():
datasets = current_app.datasets
if not request.is_json:
raise InvalidUsage(
"Unable to load new default id as json file.", status_code=404
)
else:
new_default_id = request.json
if new_default_id not in datasets["feature_weights"].keys():
raise InvalidUsage("Feature weight id not found.", status_code=400)
default_id = write_default_feature_weights_id(new_default_id)
return jsonify(
{
"msg": "Successfully set default feature weight id.",
"feature_weight_set_id": default_id,
}
)