In [52]:
import re
import pickle
import json
import datetime

In [53]:
def replace_punctuation_with_space(input_string):
    return re.sub(r'[\.\!\-\_]', ' ', input_string)


def split_string(input_string, chunk_size):
    return [input_string[i:i+chunk_size] for i in range(0, len(input_string), chunk_size)]

def clean_text(input_string):
    no_symbols = replace_punctuation_with_space(input_string).lower().replace(" ", "")
    tokenized = split_string(no_symbols, 3)
    return tokenized

In [54]:
# Load the model and vectorizer
with open('model_autoparse/model.pkl', 'rb') as model_file:
    loaded_model = pickle.load(model_file)

with open('model_autoparse/vectorizer.pkl', 'rb') as vectorizer_file:
    loaded_vectorizer = pickle.load(vectorizer_file)

with open('model_autoparse/label_encoder.pkl', 'rb') as label_encoder_file:
    loaded_label_encoder = pickle.load(label_encoder_file)

In [55]:
def predict(input_keyword):
    word = loaded_vectorizer.transform([input_keyword])
    prediction = loaded_model.predict(word)
    return loaded_label_encoder.classes_[prediction[0]]

In [56]:
predict("Fri")

'opening_hours'

In [57]:
def transform_dict(input_dict):
    def recursive_transform(data, key):
        prediction = predict(key)
        if isinstance(data, dict):
            transformed = {"data_type": "dict", "prediction": prediction, "value": {}}
            for sub_key, value in data.items():
                transformed["value"][sub_key] = recursive_transform(value, sub_key)
            return transformed
        elif isinstance(data, list):
            transformed = {"data_type": "list", "prediction": prediction, "value": []}
            for item in data:
                transformed["value"].append(recursive_transform(item, key))
            return transformed
        else:
            return {"data_type": type(data).__name__, "prediction": prediction, "value": data}

    transformed_dict = recursive_transform(input_dict, "root")
    return transformed_dict

In [58]:
data_sample = {
    "response": {
        "stores": [
            {
                "addr": "Moscow, Smolnaya, 1",
                "phoneNumber": "(967) 2874621",
                "latitude": "47.3872",
                "longitude": "12.8474",
                "workHours": {
                    "Mon": "8:00-18:00",
                    "Tus": "8:00-18:00",
                    "Wed": "8:00-18:00",
                    "Thi": "8:00-18:00",
                    "Fri": "8:00-18:00",
                    "Sat": "8:00-18:00",
                    "Sun": "8:00-18:00",
                } 
            },
        ],
        "userdata": "session-uuid",
        "token": 13726,
        "newuser": True
    }
}

transformed_json = transform_dict(data_sample)

filestamp = str(datetime.datetime.now().timestamp()).split(".")[0]

with open(f"transformed-{filestamp}.json", 'w') as file:
    file.writelines(json.dumps(transformed_json, indent="\t"))

In [59]:
def json_transform(input_json):
    transformed_json = transform_dict(input_json)

    filestamp = str(datetime.datetime.now().timestamp()).split(".")[0]

    with open(f"transformed-{filestamp}.json", 'w') as file:
        file.writelines(json.dumps(transformed_json, indent="\t"))

In [79]:
import requests

In [89]:
# res_json = requests.get("https://stockist.co/api/v1/u15743/locations/all").json()
url = "https://www.avia.ch/tankstellenfinder/jsonpassthru.php"
headers = {}
res_json = requests.get(url, headers=headers).json()
json_transform(res_json)