In [1]:
import json
import pandas as pd
import re
import urllib.request

In [2]:
def get_optimal_clusters(dr):
    
    selected_clusters = []
    dictionary_best_matched_clusters = {}
    for key in dr:
        if key!='UOA':
            # load the NRRT associated with the UOA
            with urllib.request.urlopen(f'https://raw.githubusercontent.com/mister-one/onesto/master/NRRT/{key}') as url:
                nrrt = json.loads(url.read().decode()) #load the NRRT associated with the UOA for example the https://onesto.world/NRRT/{Item}
            item_dr = dr.get(key) # for example the item=Car of the dr
            for cluster in nrrt[key]:
                # for each cluster of the NRRT
                #first step is to group all the rf(Ranking Features) by data_type(ATTR,MEAS,LINK)
                ATTR = []
                MEAS = []
                LINK = []
                nuber_of_rf = 0
                for x in cluster.get('rf'):
                    if x['feature_type']=='ATTR':
                        ATTR.append(x['feature_name'])
                    if x['feature_type']=='MEAS':
                        MEAS.append(x['feature_name'])
                    if x['feature_type']=='LINK':
                        LINK.append(x['feature_name'])
                    nuber_of_rf+=1
    
                #after we have grouped all the rf(Ranking Features) by data_type(ATTR,MEAS,LINK)
                # we need to look at the DR and see if all the ATTR,MEAS and LINK in the cluster_id are also in the DR
                true_false = []
                for x in item_dr:
                    if x == 'ATTR':
                        true_false.append(all(item in item_dr[x] for item in ATTR))
                    if x == 'MEAS':
                        true_false.append(all(item in item_dr[x] for item in MEAS))
                    if x == 'LINK':
                        true_false.append(all(item in item_dr[x] for item in LINK))
                            
                #if all the attr/meas/link in the cluster are present in the DR then we append the (cluster_id, nuber_of_rf)
                # the cluster_id is used to retrive the nodes
                # the nuber_of_rf is used as we are going to use the max number of rf available
                if all(true_false)==True:
                    #append the (cluster_id, nuber_of_rf)(1223,4)
                    selected_clusters.append((int(cluster.get('NRRT_ID')),nuber_of_rf))


    # Identifying the max number of features for clusters that have all the ranking features
            n_rf=[]
            for x,y in selected_clusters:
                n_rf.append(y)
            max_available_rf = max(n_rf,default=0)
        
            #select the nrrt_id where the number of rf is the max
            best_matched_clusters = []
            for x,y in selected_clusters:
                if y == max_available_rf:
                    best_matched_clusters.append(x)
            dictionary_best_matched_clusters[key] = best_matched_clusters
        
    return(dictionary_best_matched_clusters)



In [3]:
with urllib.request.urlopen('https://raw.githubusercontent.com/mister-one/onesto/master/DR/1') as url:
    dr = json.loads(url.read().decode())
print(dr)

{'UOA': 'Car', 'Car': {'ATTR': ['model_name', 'color'], 'MEAS': ['max_speed', 'consumption'], 'LINK': ['(Car)<-[PRODUCED]-(Company)', '(Car)<-[OWNED]-(Person)']}, 'Company': {'ATTR': ['name', 'website'], 'MEAS': ['revenue'], 'LINK': ['(Company)-[LOCATED]->(Country)']}, 'Country': {'ATTR': ['name', 'capital'], 'MEAS': ['size']}, 'Person': {'ATTR': ['name', 'surname'], 'MEAS': ['weight', 'height']}}


In [4]:
get_optimal_clusters(dr)

{'Car': [1], 'Company': [1], 'Country': [1], 'Person': [1]}

In [66]:
'''
start from uoa
get_optimal_clusters for the UOA
for each cluster and for each instance in the cluster get the ABM
look at the abm
case_1 (IT HAS a relationship that match in the ABM)
if in the ABM we can find all the ATTR, MEAS & LINK [True,True,True] then we request the link
    IF LINK is DIRECT
        look at the landing instance 
            retrive the ABM
                check if it has all the ATTR & MEAS
                    IF there is a link in the dr check if it also has the link OK
                    IF there is a link in the dr but no link then check if this instance is in the optimal_cluster of the NRRT
    [https://gooe.com/Review/2,{'relationship':'(Company)-[LOCATED]->(Country)','Instances':['a','b']},]
    and for each link
        if the link is best rated 
            look at the landing cluster NRRT/id
        if the link is direct
'''
def f(dr):
    dictionary_best_matched_clusters = get_optimal_clusters(dr)
    for key in dr:
        if key == dr.get('UOA'):
            with urllib.request.urlopen(f'https://raw.githubusercontent.com/mister-one/onesto/master/NRRT/{key}') as url:
                nrrt = json.loads(url.read().decode()) #load the NRRT associated with the UOA for example the https://onesto.world/NRRT/{Item}
            with urllib.request.urlopen(f'https://raw.githubusercontent.com/mister-one/onesto/master/ABM/{key}') as url:
                abm = json.loads(url.read().decode())[key]
            for cluster in nrrt[key]:
                if int(cluster['NRRT_ID']) in dictionary_best_matched_clusters[key]:
                    for instance in cluster['instance_ranking']:
                        print(instance)
                        abm_id = re.findall("([^/]+$)",instance.get('instance_abm'))[0] # this is the ABM_id
                        print(abm_id)
                        for abm_document in abm:
                            if int(abm_document['ABM_ID'])==int(abm_id):
                                ATTR = []
                                MEAS = []
                                LINK = []
                                for x in abm_document['ATTR']:
                                    ATTR.append(x.get('attribute_name'))
                                for x in abm_document['MEAS']:
                                    MEAS.append(x.get('measure_name'))
                                for x in abm_document['LINK']:
                                    LINK.append(x.get('relationship'))
                                
                                
                                true_false = []
                                for x in dr.get(key):
                                    if x == 'ATTR':
                                        true_false.append(all(item in dr.get(key)[x] for item in ATTR))
                                    if x == 'MEAS':
                                        true_false.append(all(item in dr.get(key)[x] for item in MEAS))
                                    if x == 'LINK':
                                        true_false.append(all(item in dr.get(key)[x] for item in LINK))
                                            

                                # the the logical_statement below is true than this means that all the ATTR & MEAS are present
                                if true_false[0]==True & true_false[1]==True:
                                    if true_false[2]==True:
                                        #Start of the option:has link option either Direct or Best Rated
                                        # 
                                        # the link_dr are the links requested in the DR for the UOA
                                        # Eg. ['(Car)<-[PRODUCED]-(Company)', '(Car)<-[OWNED]-(Person)']
                                        link_dr = dr.get(key)['LINK'] 
                                        # we then retrieve the link from the 
                                        retrieved_link = request_link(instance['instance_address'])
                                        # retrieved_link = [{'relationship': '(Company)-[PRODUCED]->(Car)', 'landing_instance': 'NRRT/Car/1'}, {'relationship': '(Company)-[LOCATED]->(Country)', 'landing_instance': 'NRRT/Car/1'}]
                                        for link in retrieved_link:
                                            # if the link is among the requested ones
                                            if link['relationship'] in link_dr:
                                                # if the link is of type best rated hece will be NRRT
                                                if bool(re.search("^NRRT", link['landing_instance'])):
                                                    # the item_ is telleing us where the link is going
                                                    item_ = re.findall("(?<=\/).+?(?=\/)",link['landing_instance'])[0]
                                                    cluster_id_ = re.findall("[^/]*$",link)[0]
                                                    #load the NRRT associated with the UOA for example the https://onesto.worl/NRRT/
                                                    with urllib.request.urlopen(f'https://raw.githubusercontent.com/mister-one/onesto/master/NRRT/{item_}') as url:
                                                        nrrt_item = json.loads(url.read().decode()) 
                                                    #retrieve the specific cluster in the NRRT
                                                    cluster_ = nrrt_item[key][cluster_id_]
                                                    for instance_ in cluster_['instance_ranking']:
                                                        #check if they have all the features request
                                                        pass # ___edit___
                                            # if the above is true then we are talking about a BEST RATED LINK
                                            #1. retive the nrrt fro the 
                                        
                                        
                                        
                                    if true_false[2]==False:
                                        print('false') ########################## TEST
                                        #Start of the option:has no link in the ABM
                                    print('hell')
                                print(true_false)
                                print(ATTR,MEAS,LINK)
                                print(dr.get(key)['LINK'])
                                print(x,key)
                        
                        
                        
                        

In [39]:
f(dr) 

{'instance_address': 'https://gooe.com/Review/1', 'instance_abm': 'https://onesto.world/abm/Review/1', 'trust_score': 0.1, 'frequency_score': 1}
1
[True, False, False]
['model_name', 'color'] ['m_1', 'm_2'] ['(Review)-[WRITTEN_BY]->(User)', '(Review)-[ABOUT]->(Book)']
['(Car)<-[PRODUCED]-(Company)', '(Car)<-[OWNED]-(Person)']
LINK Car
{'instance_address': 'https://gooe.com/Review/2', 'instance_abm': 'https://onesto.world/abm/Review/1', 'trust_score': 0.1, 'frequency_score': 1}
1
[True, False, False]
['model_name', 'color'] ['m_1', 'm_2'] ['(Review)-[WRITTEN_BY]->(User)', '(Review)-[ABOUT]->(Book)']
['(Car)<-[PRODUCED]-(Company)', '(Car)<-[OWNED]-(Person)']
LINK Car


In [165]:
# link = request_link(instance_address['instance_address']))
def request_link(instance_address):
    with urllib.request.urlopen(instance_address) as url:
        instance = json.loads(url.read().decode()) #load the NRRT associated with the UOA for example the https://one
    return (instance['LINK'])

In [50]:
b = request_link('https://raw.githubusercontent.com/mister-one/onesto/master/NODE_ONESTO/INSTANCE/Company/1')

a = {'instance_address': 'https://gooe.com/Review/1', 'instance_abm': 'https://onesto.world/abm/Review/1', 'trust_score': 0.1, 'frequency_score': 1}

In [57]:
for x in b:
    print(x)
print(b)


{'relationship': '(Company)-[PRODUCED]->(Car)', 'landing_instance': 'NRRT/Car/1'}
{'relationship': '(Company)-[LOCATED]->(Country)', 'landing_instance': 'NRRT/Car/1'}
[{'relationship': '(Company)-[PRODUCED]->(Car)', 'landing_instance': 'NRRT/Car/1'}, {'relationship': '(Company)-[LOCATED]->(Country)', 'landing_instance': 'NRRT/Car/1'}]


In [16]:
print(dr)

{'UOA': 'Car', 'Car': {'ATTR': ['model_name', 'color'], 'MEAS': ['max_speed', 'consumption'], 'LINK': ['(Car)<-[PRODUCED]-(Company)', '(Car)<-[OWNED]-(Person)']}, 'Company': {'ATTR': ['name', 'website'], 'MEAS': ['revenue'], 'LINK': ['(Company)-[LOCATED]->(Country)']}, 'Country': {'ATTR': ['name', 'capital'], 'MEAS': ['size']}, 'Person': {'ATTR': ['name', 'surname'], 'MEAS': ['weight', 'height']}}


In [18]:
for x in dr:
    print(dr[x])

Car
{'ATTR': ['model_name', 'color'], 'MEAS': ['max_speed', 'consumption'], 'LINK': ['(Car)<-[PRODUCED]-(Company)', '(Car)<-[OWNED]-(Person)']}
{'ATTR': ['name', 'website'], 'MEAS': ['revenue'], 'LINK': ['(Company)-[LOCATED]->(Country)']}
{'ATTR': ['name', 'capital'], 'MEAS': ['size']}
{'ATTR': ['name', 'surname'], 'MEAS': ['weight', 'height']}


In [58]:
c ={'link_1': {'relationship': '(Company)-[PRODUCED]->(Car)', 'landing_instance': 'NRRT/Car/1'}}
print(nrrt)

NameError: name 'nrrt' is not defined

In [59]:
key = 'Car'
with urllib.request.urlopen(f'https://raw.githubusercontent.com/mister-one/onesto/master/NRRT/{key}') as url:
    nrrt = json.loads(url.read().decode()) #load the NRRT asso

In [65]:
for x in nrrt[key][1]['instance_ranking']:
    print(x)

{'instance_address': 'https://gooe.com/Review/4', 'instance_abm': 'https://onesto.world/abm/Review/1', 'trust_score': 0.1, 'frequency_score': 1}
{'instance_address': 'https://gooe.com/Review/5', 'instance_abm': 'https://onesto.world/abm/Review/2', 'trust_score': 0.1, 'frequency_score': 1}


In [68]:
def function(dr,item):
    # Item = car
    # Dr = {}
    # for the item
        # load the NRRT
        # For each instance in the cluster check if they satisfy the requisite of the DR
        # If tru select the instance
    pass
        
a= {
          "instance_address": "https://gooe.com/Review/1",
          "instance_abm": "https://onesto.world/abm/Review/1",
          "trust_score": 0.1,
          "frequency_score": 1
        }

# selezione giusti nel cluster

In [124]:
def check_attr_meas(item,dr,abm_id):
    
    requetsed_attr = dr.get(item)['ATTR']
    requetsed_meas = dr.get(item)['MEAS']
    
    abm_address = f'https://raw.githubusercontent.com/mister-one/onesto/master/ABM/{item}'
    with urllib.request.urlopen(abm_address) as url:
        instance_abm = (json.loads(url.read().decode()))[item][abm_id-1]
    abm_ATTR = []
    abm_MEAS = []
    for x in instance_abm['ATTR']:
        abm_ATTR.append(x.get('attribute_name'))
    for x in instance_abm['MEAS']:
        abm_MEAS.append(x.get('measure_name'))

    #check if all the attributes and measures requested are in the ABM
    true_false = []
    true_false.append(all(item in abm_ATTR for item in requetsed_attr))
    true_false.append(all(item in abm_MEAS for item in requetsed_meas))
    '''
    print('requetsed_attr')
    print(requetsed_attr)
    print('------')
    print('abm_ATTR')
    print(abm_ATTR)
    print('------')
    print(true_false)
    print('------')
    print('requetsed_meas')
    print(requetsed_meas)
    print('------')
    print('abm_meas')
    print(abm_MEAS)
    '''
    if true_false[0]==True & true_false[1]==True:
        return True
    
    

In [113]:
dr.get('Car')['ATTR']

['model_name', 'color']

In [125]:
check_attr_meas('Car',dr,1)

True

In [109]:
requetsed_attr = [1,2]
abm_ATTR = [1,2,3]
all(item in abm_ATTR for item in requetsed_attr)

True

In [385]:
def check_link(item,item_cluster_id,dr,abm_id,instance_address):
    
    # LIST OF INPUTS
    # item = Car is the item we are analysing (The item of the NRRT we are considering as of right now)
    # item_cluster_id is the cluster id of the lcuster where the instance belongs
    # dr is the Data Request
    # abm_id is the abm
    # the instance we are considering (https://onesto.world/Car/12414)
    
    requetsed_link = dr.get(item)['LINK']
    print('----requetsed_link---')
    print(requetsed_link)
    print('--------')
    
    abm_address = f'https://raw.githubusercontent.com/mister-one/onesto/master/ABM/{item}'
    with urllib.request.urlopen(abm_address) as url:
        instance_abm = (json.loads(url.read().decode()))[item][abm_id-1]
    #print('Instance ABM')
    #print(instance_abm)
    #print('--------')
    abm_LINK = []
    for x in instance_abm['LINK']:
        abm_LINK.append(x.get('relationship'))
    print('----abm_LINK----')
    print(abm_LINK)
    print('--------')
    
    available_link = []
    for _ in requetsed_link:
        if _ in abm_LINK:
            available_link = request_link(instance_address)
            break
    print('----available_link----')
    print(available_link)
    print('--------')
    matched_landing_instances = []
    missing_relationships = []
    for link in requetsed_link:
        if link in abm_LINK:
            for x in available_link:
                if x['relationship'] == link:
                    matched_landing_instances.append(x)
        else:
            missing_relationships.append({'relationship': f'{link}', 'landing_instance':[]})
    print('---matched landing instances--')
    print(matched_landing_instances)
    print('-----')
    print('---missing_relationships--')
    print(missing_relationships)
    # for the missing relationships look in the link ranking of the table 
    # and for each link that has the origin associted with the item in the last bracket
    for relat in missing_relationships:
        # (s) is the landing item that we are missing. So we need to scan if there are links coming from this item
        s = re.findall("\([^()]+\)(?=[^()]*$)",relat['relationship'])[0][1:(len(re.findall("\([^()]+\)(?=[^()]*$)",relat['relationship'])[0])-1)]
        print('s is the item linked that we need to find')
        print(s)
        print('-------------')
        with urllib.request.urlopen(f'https://raw.githubusercontent.com/mister-one/onesto/master/NRRT/{item}') as url:
            item_nrrt_cluster = json.loads(url.read().decode())[item][item_cluster_id]['link_ranking']
        print(item_nrrt_cluster)
        for x in item_nrrt_cluster:
            if x['relationship']=='(Allert)-[NEAR]-(Car)':
                print('sfjnskjdfnskj')
                print(x)
                print('jwenfqijwn')
                print(x['incoming_instance_address'])
                matched_landing_instances.append({'relationship': x['relationship'], 'landing_instance':x['incoming_instance_address']})
                for inst in x['incoming_instance_address']:
                    print(inst)
                    #check if the instance is ok
                print('last')
                print(matched_landing_instances)

        
        

In [386]:


check_link('Car',1,dr,1,'https://raw.githubusercontent.com/mister-one/onesto/master/NODE_ONESTO/INSTANCE/Car/1')



----requetsed_link---
['(Car)<-[PRODUCED]-(Company)', '(Car)<-[OWNED]-(Person)']
--------
----abm_LINK----
['(Car)<-[PRODUCED]-(Company)', '(Car)<-[LOCATED]-(Country)']
--------
----available_link----
[{'relationship': '(Car)<-[PRODUCED]-(Company)', 'landing_instance': ['NRRT/Engine/2', 'NRRT/Engine/3']}, {'relationship': '(Company)-[LOCATED]->(Country)', 'landing_instance': 'NRRT/Car/1'}]
--------
---matched landing instances--
[{'relationship': '(Car)<-[PRODUCED]-(Company)', 'landing_instance': ['NRRT/Engine/2', 'NRRT/Engine/3']}]
-----
---missing_relationships--
[{'relationship': '(Car)<-[OWNED]-(Person)', 'landing_instance': []}]
s is the item linked that we need to find
Person
-------------
[{'origin': 'Allert', 'relationship': '(Allert)-[ABB]->(Car)', 'incoming_instance_address': ['https://hello.com/Alert/123', 'https://hello.com/Alert/123', 'https://hello.com/Alert/123', 'https://hello.com/Alert/123']}, {'origin': 'Allert', 'relationship': '(Allert)-[NEAR]-(Car)', 'incoming_inst

In [173]:
a = [{'relationship': '(Company)-[PRODUCED]->(Car)', 'landing_instance': 'NRRT/Car/1'}, {'relationship': '(Company)-[LOCATED]->(Country)', 'landing_instance': 'NRRT/Car/1'}]

In [177]:
b = '(Company)-[PRODUCED]->(Car)'
for x in a:
    if x['relationship'] == b:
        print(x['landing_instance'])
    else:
        print('hello')

NRRT/Car/1
hello


In [225]:
dict = {'key1':'geeks', 'key2':'for'}  
print("Current Dict is: ", dict)  
    
# using the subscript notation  
# Dictionary_Name[New_Key_Name] = New_Key_Value  
  
dict['key3'] = 'Geeks'
dict['key4'] = 'is'
dict['key5'] = 'portal'
dict['key6'] = 'Computer'
print("Updated Dict is: ", dict) 

Current Dict is:  {'key1': 'geeks', 'key2': 'for'}
Updated Dict is:  {'key1': 'geeks', 'key2': 'for', 'key3': 'Geeks', 'key4': 'is', 'key5': 'portal', 'key6': 'Computer'}


In [234]:
dict.keys()

dict_keys(['key1', 'key2', 'key3', 'key4', 'key5', 'key6'])

In [235]:
a = [{'relationship': '(Car)<-[PRODUCED]-(Company)', 'landing_instance': ['NRRT/Engine/2', 'NRRT/Engine/3']}, {'relationship': '(Company)-[LOCATED]->(Country)', 'landing_instance': 'NRRT/Car/1'}]

In [236]:
for x in a:
    print(x)

{'relationship': '(Car)<-[PRODUCED]-(Company)', 'landing_instance': ['NRRT/Engine/2', 'NRRT/Engine/3']}
{'relationship': '(Company)-[LOCATED]->(Country)', 'landing_instance': 'NRRT/Car/1'}


In [258]:
requetsed_link = ['(Car)<-[PRODUCED]-(Company)', '(Car)<-[OWNED]-(Person)']

abm_LINK =['(Car)<-[PRODUCED]-(Company)', '(Ca)<-[OWNED]-(Person)', '(Car)<-[LOCATED]-(Country)']

available_link = [{'relationship': '(Car)<-[PRODUCED]-(Company)', 'landing_instance': ['NRRT/Engine/2', 'NRRT/Engine/3']}, {'relationship': '(Company)-[LOCATED]->(Country)', 'landing_instance': 'NRRT/Car/1'}]

In [259]:
landing_instances = []
unmateched = []
for link in requetsed_link:
        print('c')
        if link in abm_LINK:
            for x in available_link:
                if x['relationship'] == link:
                    landing_instances.append(x)
                    print('b')
                else:
                    print('a')
                    print(link)
                    print(landing_instances)
        else:
            unmateched.append({'relationship': f'{link}', 'landing_instance':[]})
print(unmateched)

c
b
a
(Car)<-[PRODUCED]-(Company)
[{'relationship': '(Car)<-[PRODUCED]-(Company)', 'landing_instance': ['NRRT/Engine/2', 'NRRT/Engine/3']}]
c
[{'relationship': '(Car)<-[OWNED]-(Person)', 'landing_instance': []}]


In [299]:

a = ' test a(bb) ci (cccc)'
b = re.findall("\([^()]+\)(?=[^()]*$)",a)[0][1:(len(re.findall("\([^()]+\)(?=[^()]*$)",a)[0])-1)]

In [300]:
print(b)

cccc


In [292]:
len(re.findall("\([^()]+\)(?=[^()]*$)",a)[0])-1

5

In [296]:
txt = "hello, (my) name is Peter, I am 26 years old"

x = txt.split("()")

print(x)

['hello, (my) name is Peter, I am 26 years old']


In [297]:
a = ' test a(bb) ci (cccc)'
b = re.findall("\([^()]+\)(?=[^()]*$)",a)

In [298]:
print(b)

['(cccc)']


In [306]:
a = [{'relationship': '(Car)<-[OWNED]-(Person)', 'landing_instance': []},{'relationship': '(Car)<-[OWNED]-(Person)', 'landing_instance': []}]
for x in a:
    s = x['relationship']
    print(re.findall("\([^()]+\)(?=[^()]*$)",s)[0][1:(len(re.findall("\([^()]+\)(?=[^()]*$)",s)[0])-1)])

(Car)<-[OWNED]-(Person)
Person
(Car)<-[OWNED]-(Person)
Person


In [388]:
a = [{'relationship': '(Car)<-[PRODUCED]-(Company)', 'landing_instance': ['NRRT/Engine/2', 'NRRT/Engine/3']}, {'relationship': '(Allert)-[NEAR]-(Car)', 'landing_instance': ['https://hello.com/Alert/123', 'https://hello.com/Alert/123', 'https://hello.com/Alert/123', 'https://hello.com/Alert/123']}]
for x in a:
    print(x['relationship'])

(Car)<-[PRODUCED]-(Company)
(Allert)-[NEAR]-(Car)
