In [1]:
import pandas as pd
import numpy as np
#import graphlab as tc
import turicreate as tc
from tqdm.notebook import tqdm
import turicreate.aggregate as agg

In [2]:
def f_create_network(data, gamma):
    apks = data['apk'].unique()
    k = apks.shape[0]
    sim_recom = tc.item_similarity_recommender.create(data, 
                                                      user_id='function', 
                                                      item_id='apk', 
                                                      similarity_type='jaccard', 
                                                      only_top_k=k, verbose=False)
    itms = sim_recom.get_similar_items(apks, k=k)
    # missing more "distant nodes", "not aggregating nodes"
    gw=itms[itms['score']>=1-gamma].groupby(key_column_names='apk', operations={'sims': agg.DISTINCT('similar')})
    
    

    
    ws = set(gw['apk'])
    net = dict()
    already_added = set()
    while len(ws)>0:
        w= ws.pop()

        simp = set(gw[gw['apk']==w]['sims'][0])
        simp = simp - already_added

        net[w] = list(simp)
        already_added.update(simp)
        already_added.add(w)

        ws = ws - simp
    
        
    # add solitary nodes & not-aggregating nodes
    if len(already_added)> 0:
        nds = apks.filter_by(list(already_added), exclude=True)
    else:
        nds = apks
        
    for n in nds:
        net[n] = []
        
    return net

In [4]:
mw = tc.load_sframe('../binarydata/funcs-encoded')
mw = mw.remove_column('fcount', inplace=True)

In [5]:
test_apns = np.load('../res/test-tc-1000.npy')

In [6]:
import pickle

net_file = '../res/9003-tc-jaccard-votingnets.pickle'
with open(net_file, 'rb') as f:
    nets = pickle.load(f)

In [7]:
ref, mer = nets[0.0]

In [10]:
dds = mw.filter_by(values=ref.keys(), column_name='apk')

In [15]:
tts = mw.filter_by(values=test_apns, column_name='apk')

In [12]:
k = 1000
sim_recom = tc.item_similarity_recommender.create(dds, 
                                                      user_id='function', 
                                                      item_id='apk', 
                                                      similarity_type='jaccard', 
                                                      only_top_k=k, verbose=False)

In [16]:
preds = sim_recom.predict(tts)

In [17]:
preds

dtype: float
Rows: 3540071
[0.24805590386192003, 0.3312299860866743, 0.0, 0.15946469704310098, 0.27580193110874723, 0.17563376497866504, 0.0, 0.7718316104964934, 0.14815204085842257, 0.0, 0.277665376663208, 0.14829435315359465, 0.30063122639924417, 0.21498892933218178, 0.7718316104964934, 0.2186064122922378, 0.3312671161317206, 0.6268826273904331, 0.0, 0.15847103794415793, 0.0, 0.08293630463627581, 0.0, 0.0, 0.0, 0.0, 0.0, 0.10194328014240708, 0.0, 0.44885062336921694, 0.040303671244278694, 0.2597382295162972, 0.0, 0.22999340295791626, 0.0, 0.005476392431737669, 0.08148811165278642, 0.14125619754565888, 0.46017616987228394, 0.0759665963439206, 0.0, 0.7816644064103714, 0.20437913571715985, 0.1558559122404836, 0.0030576510145931663, 0.7580290678618611, 0.0, 0.21955888072649637, 0.0, 0.2063053173690357, 0.471559683318998, 0.0, 0.7942552974029463, 0.0, 0.5369575983948178, 0.14458289691670376, 0.7249521190921465, 0.1759323957804087, 0.14817800231882045, 0.5681439788654598, 0.0, 0.2096879290

In [20]:
sim_recom.predict(tts[4])

dtype: float
Rows: 1
[0.27580193110874723]

In [22]:
sim_recom.predict(mw[mw['apk']==4227])

dtype: float
Rows: 18516
[0.24805590386192003, 0.27090784931971024, 0.5270429154237112, 0.028318852846130425, 0.5270429154237112, 0.37883143804290076, 0.21037665196249442, 0.22858889999790727, 0.5270429154237112, 0.2741177305348517, 0.1993709589363238, 0.10766640582153102, 0.16128995571986282, 0.16744171902537347, 0.28320428891615435, 0.07200670116364984, 0.4092070900875589, 0.20954668313419264, 0.5270429154237112, 0.09538969340597749, 0.25282739187506115, 0.5270429154237112, 0.27031389291469865, 0.39438218162173316, 0.2688381314277649, 0.07082994438197515, 0.5270429154237112, 0.07183114196255, 0.5270429154237112, 0.09561769129255208, 0.5270429154237112, 0.31768674002243924, 0.5270429154237112, 0.5270429154237112, 0.05962337746610955, 0.24828219413757324, 0.25520707088068495, 0.046019640093144305, 0.06946651347271808, 0.2964508142322302, 0.08787228085182525, 0.11464953550270625, 0.25873138046846156, 0.5270429154237112, 0.2760086269445822, 0.23746347242622937, 0.055303460426544875, 0.06