diff --git a/generate_random_predictions.py b/generate_random_predictions.py index 429d22c..e85158f 100644 --- a/generate_random_predictions.py +++ b/generate_random_predictions.py @@ -32,7 +32,10 @@ def _policy(candidates): for _idx, _impression in enumerate(data): predictions = _policy(_impression["candidates"]) predictionline = _format_predictions(predictions) - output.write(predictionline+"\n") + predictionline + "\n" + predictionline = predictionline.encode() #Note this is important for python3 compatibility as we are writing in "wb" mode + output.write(predictionline) + if _idx % 500 == 0: print("Processed {} impressions...".format(_idx)) diff --git a/utils.py b/utils.py index a8ef59f..ec8190a 100644 --- a/utils.py +++ b/utils.py @@ -7,6 +7,8 @@ def extract_impression_id(line, assert_first_line=False): """ Extracts the impression_id from a line """ + if type(line) == bytes: + line = line.decode() return line[:line.index("|")].strip() def extract_cost_propensity(line): @@ -18,6 +20,8 @@ def extract_cost_propensity(line): line: `string` """ + if type(line) == bytes: + line = line.decode() line_items = line.split("|") assert len(line_items) == 4 cost = float(line_items[1].replace("l ","").strip()) @@ -27,6 +31,8 @@ def extract_cost_propensity(line): return cost, propensity def extract_features(line, debug=False): + if type(line) == bytes: + line = line.decode() features_index = line.index("|f ") feature_string = line[features_index:].replace("|f ","") feature_set = feature_string.split()