<a href="https://colab.research.google.com/github/saivarshitnune/NLP_Projects/blob/Varshith/embedding_helpers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import asyncio
import json
from asyncio import create_task
from functools import wraps
from typing import List, Callable, Optional, Awaitable
from fastapi_utils.tasks import repeat_every
from starlette.concurrency import run_in_threadpool
from api.initializer import logger_instance, model, redis_db_instance, firestore_db_instance
from api.main.utility.helpers.keywords_helper import extract_keywords
from api.main.config import settings

logger = logger_instance.get_logger(_name_)

def extract_embeddings(phrases) -> List:
    """
    This function is used to extract embeddings from phrases

    :params phrases
    :return list of embeddings
    """
    return model.encode(phrases).tolist()

async def store_question_embeddings(df_phrases, mid, master_question_dictionary, is_update=False) -> bool:
    """
    This utility function will extract the keywords and store the data into redis and update the collection
    in firestore.

    params: dataframe (with phrases and UUIDs), MID, dictionary (master question, question digest, UUID)
    return: True/False
    """

    list_of_extracted_phrase = []

    # Loop through each phrase and create a single entry object and append it to list
    for index, single_phrase in df_phrases.iterrows():
        keywords_rake = await extract_keywords(single_phrase['phrases'], method="Rake")

        single_phrases_entry = {
            'question': single_phrase['phrases'],
            'keywords_rake': json.dumps(keywords_rake),
            'phraseEmbedding': json.dumps(single_phrase['extracted_embeddings']),
            'uuid': single_phrase['uuid']
        }

        print(single_phrases_entry['question'], single_phrases_entry['uuid'])
        list_of_extracted_phrase.append(single_phrases_entry)

    list_of_uuid = master_question_dictionary['uuids']['uuid']

    # Once all phrases of given question list is ready create a single master object
    master_question_entry = {
        'mid': mid,
        'question': master_question_dictionary['question'],
        'questionDigest': master_question_dictionary['question_digest'],
        'uuid': json.dumps(list_of_uuid),
        'phrases': json.dumps(list_of_extracted_phrase)
    }

    # Insert the question with phrases and embeddings into redis
    is_inserted = redis_db_instance.insert_embedding(mid, master_question_entry)

    if is_inserted:
        logger.info('MID Inserted %s', mid)

    uuid_attribute = {'UU': {'attribute_digest': phrasing_digest} for uuid, attribute_digest in zip((parameter) master_question_dictionary['uuids']['attribute_digest'], master_question_dictionary['uuids']['phrasing_digest'])}
    master_question_dictionary['uuids']['attribute_digest'] = uuid_attribute

    uuid_attribute = dict(zip(master_question_dictionary['uuids']['uuid'], master_question_dictionary['uuids']['attribute_digest']))

    one_to_many_mapping = {
        "SimilarFAQS": uuid_attribute,
        "Question": master_question_dictionary['question'],
        "MID": mid,
        "QuestionDigest": master_question_dictionary['question_digest']
    }

    # Add MID in One-to-Many Mapping
    await firestore_db_instance.insert_doc_into_collection(
        settings.FAQS_UUID_MAPPING_COLLECTION_NAME, master_question_dictionary['question_digest'],
        one_to_many_mapping
    )

    # Change Question status to trained.
    update_question_status = {"Status": 'Trained'}

    # for i in range(len(list_of_uuid)):
    await firestore_db_instance.update_field_document_new(settings.ALL_FAQS_COLLECTION_NAME, "UUID",
                                                                                                                                                                                             "==", list_of_uuid,
                                                                                                                                                                                             update_question_status)

    del list_of_extracted_phrase, list_of_uuid

    return True

else:
    logger.info('MID Error %s with Index %s', mid)
    return False

def delete_mid(key) -> bool:
    """
    This function will delete the specified key.

    :params MID(key)
    :return True/False
    """

    is_deleted = redis_db_instance.delete_key(key)
    if is_deleted:
        logger.info('Deleted MID %s', key)
        return True
    else:
        logger.info('MID %s is not present in database', key)
        return True

def test_function(*, seconds: float, wait_first: bool = False):
    def decorator(func: Callable[[], Optional[Awaitable[None]]]):
        is_coroutine = asyncio.iscoroutinefunction(func)

        @wraps(func)
        async def wrapped():
            async def loop():
                if wait_first:
                    await asyncio.sleep(seconds)
                while True:
                    try:
                        if is_coroutine:
                            await func()
                        else:
                            await run_in_threadpool(func)
                    except Exception as e:
                        logger.error(str(e))
                    await asyncio.sleep(seconds)

            create_task(loop())

        return wrapped

    return decorator

# You can use it like so:
@repeat_every(seconds=24 * 60 * 60)  # 24 hours
def test_function():
    return None