In [51]:
import json
import redis
import time

In [71]:
data = json.load(open('large-file.json'))
r = redis.StrictRedis(host='localhost', port=6379, db=0)

## Сохранение json файла в базу данных в виде строки

In [104]:
def flatten_dict(d, parent_key='', sep=':'):
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

In [105]:
def set_elem2redis(elem):
    fd = flatten_dict(elem)
    k = fd['id']

    for key, value in fd.items():
        if key == 'id':
            continue
        r.set(f"{k}:{key}", f"{value}")

In [106]:
start = time.time()
for elem in data:
    set_elem2redis(elem)
end = time.time()
print(f"Time: {end - start}")

Time: 54.35731482505798


## Сохранение json файла в базу данных в виде hset

In [100]:
def set_elem2redis_hset(elem):
    fd = flatten_dict(elem)
    k = fd.pop('id')
    for key, value in fd.items():
        fd[key] = str(value)
    r.hset(f"{k}", mapping=fd)

In [101]:
start = time.time()
for elem in data:
    set_elem2redis_hset(elem)
end = time.time()
print(f"Time: {end - start}")

Time: 3.844680070877075


## Сохранение json файла в базу данных в виде zset

In [98]:
start = time.time()
ids_sorted = "ids_sorted"
for elem in data:
    r.zadd(ids_sorted, {elem['actor']['login'] : elem['actor']['id']})
end = time.time()
print(f"Time: {end - start}")

Time: 1.419557809829712


## Сохранение json файла в базу данных в виде list

In [99]:
start = time.time()
ids_list = "ids"
for elem in data:
    r.lpush(ids_list, elem['id'])
end = time.time()
print(f"Time: {end - start}")

Time: 1.7995710372924805


## Тестирование времени чтения

In [107]:
start = time.time()
for elem in data:
    r.hget(elem['id'], 'actor:id')
end = time.time()
print(f"Time: {end - start}")

Time: 1.6342508792877197


In [108]:
start = time.time()
for elem in data:
    r.get(f"{elem['id']}:actor:id")
end = time.time()
print(f"Time: {end - start}")

Time: 1.423008918762207


In [112]:
start = time.time()
for elem in data:
    r.zscore(ids_sorted, elem['actor']['login'])
end = time.time()
print(f"Time: {end - start}")

Time: 1.3145198822021484


In [110]:
start = time.time()
for elem in data:
    r.lpop(ids_list)
end = time.time()
print(f"Time: {end - start}")

Time: 1.4005730152130127
