In [15]:
#access to api stuff
import json
import statistics
from collections import Counter
from load_data_funcs import loadAllCommunityMembers, callApi
import random
import itertools

In [16]:
# load data from uniform distribution in vk users, to model a society
def get_uniform_vkids(random_ids_count = 200000):
    ids = set()
    while len(ids)<random_ids_count:
        ids.add(random.randint(1,46800000))
    return list(ids)

random_ids = get_uniform_vkids()

rand_users_info = loadAllCommunityMembers("random", user_ids = random_ids, load_groups_intersections = False)

In [17]:
teamnavalny_funs_info = loadAllCommunityMembers("teamnavalny")
putin_funs_info = loadAllCommunityMembers("putin_z")

In [18]:
anatolijsharij_info = loadAllCommunityMembers("anatolijsharij")

In [19]:
def get_filed(name, users_info, filt = None ):
    data = [ name(u) if callable(name) else u[name] 
                for u in users_info 
                if ( (not callable(name) and name  in u ) or (callable(name)) ) and (not filt or filt(u) )]
    
    return data
    
def normalithe_batches(batches, factor = 1):
    total_count = sum(( count for key, count in batches ))
    return [ (key,factor*float(count)/total_count) for key, count in batches ]
    
def normalithe_batches_according_to_gd(batches, gd_batches):
    normalized_gd_batches = normalithe_batches(gd_batches)
    key_to_prob_gd_batches = dict(normalized_gd_batches)
    return normalithe_batches([ (key,prob/key_to_prob_gd_batches[key]) for key, prob in batches ])
    
    
def save_data_into_json(data, name):
    with open('./json/{}.js'.format(name), 'w') as f:
        json.dump(data,f)   

In [20]:
AGE_APPER_LIMIT = 50
AGE_LOWER_LIMIT = 14
def is_POI(uinf):
    return uinf['age'] >= AGE_LOWER_LIMIT and uinf['age'] <= AGE_APPER_LIMIT

In [49]:
def summary_data_for_plotting(user_infos):
    data = {}
    #Get age data
    ages = get_filed('age',user_infos, filt = is_POI )
    ages_batches = list(Counter(ages).items())
    ages_data = {'mean':statistics.mean(ages),
        'median':statistics.median(ages),
        'mode':statistics.mode(ages),
        'batches':[["age","count"]]+ages_batches}
    
    data['ages_data'] = ages_data
    
    #Get cites data
    cites = get_filed('city',user_infos)
    most_common_cites = Counter(cites).most_common()[:15]
    most_common_cites_gd = Counter(get_filed('city',rand_users_info)).most_common()
    most_common_cites_normalithed = normalithe_batches_according_to_gd(most_common_cites, most_common_cites_gd)
    most_common_cites_rank = [ (key,prob*100) for key, prob in most_common_cites_normalithed ]
    cites_data = {'city_count':[["city","count"]]+most_common_cites,
           'city_rank':[["city","rank"]]+sorted(most_common_cites_rank, key = lambda x : x[1], reverse=True)}
    data['cites_data'] = cites_data

    #Get sex information
    sexes = Counter(get_filed('sex',user_infos)).most_common()
    sexes_ages = get_filed(lambda u: (u['age'],u['sex']),user_infos,
                           filt = lambda u: 'age' in u and 'sex' in u and u['age']<AGE_APPER_LIMIT)

    male_ages = list(Counter([ age for age,sex in sexes_ages if sex == 'male' ]).items())
    female_ages = list(Counter([ age for age,sex in sexes_ages if sex == 'female' ]).items())

    sex_data = {'sexes':[["sex","count"]]+sexes,
           'male_ages':[["age","count"]]+male_ages,
           'female_ages':[["age","count"]]+female_ages}
    data['sex_data'] = sex_data

    #Group intersection data
    groups_batches = Counter()
    for groups in ( u['groups_of_interest'] for u in user_infos if 'groups_of_interest' in u ):
        groups_batches.update(groups) 

    groups_normalithe_batches = normalithe_batches(groups_batches.items(),100)
    scrennae_to_name = { gu['screen_name']:gu['name'] for gu in callApi('groups.getById',{'group_ids':','.join(groups_batches.keys())})['response'] }

    groups_normalithe_batches = [ (scrennae_to_name[key],val) for key,val in groups_normalithe_batches ]
    groups_data = {'group_rating':[["group","rating"]]+sorted(groups_normalithe_batches, key = lambda x: x[0])}
    data['groups_data'] = groups_data

    #Most popular books
    books_counter = Counter(itertools.chain.from_iterable(get_filed('books',user_infos))).most_common()[:10]
    popular_books = [ name for name, count in books_counter ]
    data['popular_books'] = popular_books

    #Most popular movies
    movies_counter = Counter(filter(lambda x: len(x)>3,itertools.chain.from_iterable(get_filed('movies',user_infos)))).most_common()[:10]
    popular_movies = [ name for name, count in movies_counter ]
    data['popular_movies'] = popular_movies

    #Most popular music
    music_counter = Counter(itertools.chain.from_iterable(get_filed('music',user_infos))).most_common()[:10]
    popular_music = [ name for name, count in music_counter ]
    data['popular_music'] = popular_music

    #Most popular tv
    tv_counter = Counter(itertools.chain.from_iterable(get_filed('tv',user_infos))).most_common()[:10]
    popular_tv = [ name for name, count in tv_counter ]
    data['popular_tv'] = popular_tv

    #User counter
    live_count = len(get_filed(lambda x: 1,user_infos, filt = lambda u: not u['hidden'] and not u['deactivated'] ))
    hiden_count = len(get_filed(lambda x: 1,user_infos, filt = lambda u: bool(u['hidden']) and not u['deactivated']))
    deactivated_count = len(get_filed(lambda x: 1,user_infos, filt = lambda u: bool(u['deactivated'])))
    data['live_persant'] = [['','count']]+[ ['live',live_count/(len(user_infos)+live_count)], ['total', (len(user_infos)-live_count)/(len(user_infos)+live_count)] ]
    data['hiden_count'] = [['','count']]+[ ['hiden',hiden_count/(len(user_infos)+hiden_count)], ['total', len(user_infos)/(len(user_infos)+hiden_count)] ] 
    data['deactivated_count'] = [['','count']]+[ ['deactivated',deactivated_count/(len(user_infos)+deactivated_count)], ['total', len(user_infos)/(len(user_infos)+deactivated_count)] ] 
    
    return data

In [50]:
data = summary_data_for_plotting(teamnavalny_funs_info)
save_data_into_json(data, 'teamnavalny')

In [51]:
data = summary_data_for_plotting(anatolijsharij_info)
save_data_into_json(data, 'anatolijsharij')


In [52]:

data = summary_data_for_plotting(putin_funs_info)
save_data_into_json(data, 'putin_z')