In [26]:
from constituent_treelib import ConstituentTree, Language

# Define the sentence
text_3 = 'The client wants to automatize some indoor routines of his house, where he lives with his family. In other words, internal house processes should be automatized, e.g. temperature and light regulation indoors. The basic idea is to maintain the stable conditions indoors with respect to the conditions outdoors. Thus possible fluctuations of outdoor conditions, like temperature, humidity, air quality, etc should be smoothed by the indoors smart home system. As different family members can have different perception of comfort conditions, the system should be able to adjust itself to different comfort patterns. The indoor system should also be able to identify the activities and adjust the conditions accordingly, e.g. adjust the light intensity based on activity.'
doc_3 = nlp(text_3)

# Define the language that should be considered 
language = Language.English

# Construct the neccesary NLP pipeline by downloading and installing the required models (benepar and spaCy)
nlp = ConstituentTree.create_pipeline(language, download_models = True)

for sentence in doc_3.sents:
    sent = str(sentence)

    # Instantiate a ConstituentTree object and pass it both the sentence and the NLP pipeline
    tree = ConstituentTree(sent, nlp)

    # Finally, extract all phrases 
    phrases = tree.extract_all_phrases(avoid_nested_phrases=True)
    
    print('!!!', phrases)

[nltk_data] Downloading package benepar_en3 to
[nltk_data]     C:\Users\Artem\anaconda3\share\nltk_data...
[nltk_data]   Package benepar_en3 is already up-to-date!
You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


!!! {'SBAR': ['where he lives with his family'], 'VP': ['wants to automatize some indoor routines of his house , where he lives with his family'], 'NP': ['some indoor routines of his house , where he lives with his family', 'The client'], 'S': ['The client wants to automatize some indoor routines of his house , where he lives with his family .'], 'PP': ['of his house , where he lives with his family']}
!!! {'VP': ['should be automatized , e.g. temperature and light regulation indoors'], 'NP': ['temperature and light regulation indoors', 'internal house processes', 'other words'], 'S': ['In other words , internal house processes should be automatized , e.g. temperature and light regulation indoors .'], 'PP': ['In other words']}
!!! {'VP': ['is to maintain the stable conditions indoors with respect to the conditions outdoors'], 'NP': ['respect to the conditions outdoors', 'the stable conditions indoors', 'the stable conditions', 'The basic idea'], 'S': ['The basic idea is to maintain the

In [68]:
service_description = ["""The main focus of the service is the assurance of security and safety for the smart home 
                        users and their property both indoors and on the home adjoining area. In the case of unexpected 
                        behaviour detection, users and responsible authorities will be informed. Another possible 
                        application use-case is children safety assurance, e.g. if a small child enters the kitchen, 
                        the cooker will be blocked or if it is in use, other family members are informed.""",
                       """Mobile Phones, positioning sensors and presence sensors enable the required functionality to 
                       identify the location of a certain smart home user within the smart home. This service can be used in 
                        conjunction with other services to enable extended functionality.""", 
                       """One of the most important tasks of the Smart Home is to monitor and adjust the environment 
                       conditions indoors. Environment conditions include, for instance, temperature and humidity regulation, 
                       light intensity regulation etc.""", 
                       """One of the tasks is measurement and monitoring of vital functions, such as body temperature, 
                       blood pressure, heartbeat rate. Some of these parameters can be unique depending on the age, person, 
                       chronical diseases, etc. Therefore, it should be a personal profile indicating what can be considered 
                       as “normal” depending on the activity. In the case, of anomalous values, an alarm can be sent to other 
                       family members or to the responsible authority.""", 
                       """The entertainment service is responsible for management of all possible entertainment sources, 
                       like sound and video systems all over the house.""", 
                       """This service covers all possible tasks related to the professional occupation. This can include, 
                       for instance, personal electronic assistant that can remind the user about the upcoming meeting and 
                       activities related to work. Another application area is the synchronisation between the working place 
                       at home and in the office."""]

requirements_list = ['automation of house routines', 
                     'indoor conditions (such as temperature and light conditions) should be regulated', 
                     'outdoor conditions have to be considered by indoor condition regulation',
                     'system should consider different family members’ preferences', 
                     'identification of activities indoors and adjustment/adoption of conditions accordingly']

similarities_batch = []

for idxr, requirement in enumerate(requirements_list, start=1):
    
    requirement_mapping = []
    
    for idxd, description in enumerate(service_description, start=1):
        requirement_tok = nlp(requirement)
        description_tok = nlp(description)
        
        similarity_coeff = requirement_tok.similarity(description_tok)
        
        requirement_mapping.append(['Requirement: ', idxr, "is similar to service: ", idxd, 'by', similarity_coeff])
    
    similarities_batch.append(requirement_mapping)
        
print(similarities_batch)
        
mapping_result = []

for batch in similarities_batch:
    batch_coeff = []
    #print(batch)
    sorted_list = sorted(batch, key=lambda x: x[5], reverse=True)#, reverse=True)
    
    best_match = sorted_list[:1]
    #print('...', best_match)
    
    for match in best_match:
        mapping_result.append(['Requirement: ' + str(match[1]), 'has the hichest contextual match with Service: ' + str(match[3])])

print(mapping_result)

[[['Requirement: ', 1, 'is similar to service: ', 1, 'by', 0.21151270506336967], ['Requirement: ', 1, 'is similar to service: ', 2, 'by', 0.16506544297746054], ['Requirement: ', 1, 'is similar to service: ', 3, 'by', 0.2874164212086428], ['Requirement: ', 1, 'is similar to service: ', 4, 'by', 0.22959451281470417], ['Requirement: ', 1, 'is similar to service: ', 5, 'by', 0.17075757948171916], ['Requirement: ', 1, 'is similar to service: ', 6, 'by', 0.18658239823084358]], [['Requirement: ', 2, 'is similar to service: ', 1, 'by', 0.07206381476410427], ['Requirement: ', 2, 'is similar to service: ', 2, 'by', 0.046298687126649245], ['Requirement: ', 2, 'is similar to service: ', 3, 'by', 0.4227933308145428], ['Requirement: ', 2, 'is similar to service: ', 4, 'by', 0.07602966738386788], ['Requirement: ', 2, 'is similar to service: ', 5, 'by', -0.00035703924445026425], ['Requirement: ', 2, 'is similar to service: ', 6, 'by', 0.002049796069070245]], [['Requirement: ', 3, 'is similar to servic