In [1]:
import pandas as pd
import numpy as np

import ipywidgets as widgets
from ipywidgets import AppLayout
import IPython.display as pyDis

from context import algorithms
from algorithms.clustering.similarityCommunityDetection import SimilarityCommunityDetection
from algorithms.clustering.explainedCommunitiesDetection import ExplainedCommunitiesDetection
from algorithms.visualization.gephiVisualization import GephiVisualization

from sklearn.metrics.pairwise import cosine_similarity

In [2]:
def transport_pollution_values(transport_polution):
    if transport_polution == 'High':
        return 2
    elif transport_polution == 'Medium':
        return 1
    elif transport_polution == 'Low':
        return 0
    
def transport_type_values(transport_type):
    if transport_type == 'Private':
        return 1
    else:
        return 0
    
def booleans_values(value):
    if value:
        return 1
    else:
        return 0

In [3]:
# Load data and prepare information
data_df = pd.read_csv('../../data/MNCN/user_profiles_grouped.csv')

data_df['Transport Pollution'] = data_df['Transport Pollution'].apply(transport_pollution_values)
data_df['Transport Type'] = data_df['Transport Type'].apply(transport_type_values)
data_df['Reduce Consumption'] = data_df['Reduce Consumption'].apply(booleans_values)
data_df['Change Transport'] = data_df['Change Transport'].apply(booleans_values)
data_df['Recycle'] = data_df['Recycle'].apply(booleans_values)

attributes = {
    'Transport Pollution': ['Low (Walking, Bike)', 'Medium (Metro, Bus)', 'High (Car)'],
    'Transport Type': ['Public (Bus, Metro)', 'Private (Car, Bike, ...)'],
    'Reduce Consumption': ['No', 'Yes'],
    'Change Transport': ['No', 'Yes'],
    'Recycle': ['No', 'Yes']
}

data = None

In [4]:
def search_communities(data, percentage):
    print('Percentage', percentage)
    # Apply algorithm to detect communities
    community_detection = ExplainedCommunitiesDetection(data, SimilarityCommunityDetection, 'cosine')
    n_communties, users_communities = community_detection.search_all_communities(answer_binary=False, percentage=percentage)
    
    # Explain communities
    users_without_community = []
    for c in range(n_communties):
        community_data = community_detection.get_community(c, percentage=percentage)
        
        if len(community_data['members']) > 1:
        
            print('---------------------')
            print('COMMUNITY -', community_data['name'])
            print('\t- N. Members:', len(community_data['members']))
            print('\t- Properties:')

            for k in community_data['properties'].keys():
                print('\t\t-', k, community_data['properties'][k])
        else:
            users_without_community.extend(community_data['members'])
            
    print('---------------------')
    print('N. USERS WITHOUT COMMUNITY -', len(users_without_community))
    
    # Incluimos la los datos las comunidades
    data_df['community'] = users_communities.values()
    
    # Filtramos las comunidades con menos de 2 usuarios
    filter_groups = data_df.groupby(by='community').count()['UserId'] < 2
    groups_to_filter = filter_groups.index.values[filter_groups]
    users_out = data_df[data_df['community'].isin(groups_to_filter)].index.values
    
    # Pintamos en Gephi
    gv = GephiVisualization(workspace='mncn_2')
    
    # Preparamos los datos de usuarios y distancias
    users = data_df[['UserId', 'School', 'Grade', 'Type', 'Zone', 'community']].values
    data = data[~data.index.isin(users_out)]
    distances = cosine_similarity(data)
    
    gv.load_community(users, distances, users_properties=['School', 'Grade', 'Type', 'Zone', 'community'])

In [5]:
def btn_event(obj):
    attrs = list()
    
    for at in questions_wid.value:
        attrs.append(at)
        
    # Filtramos el dataset
    data = data_df[attrs]
    
    percentage = percentage_wid.value
    search_communities(data, percentage)

In [6]:
# Preparo la interfaz
questions_wid = widgets.SelectMultiple(
    options=list(attributes.keys()),
    descriptions='Select attributes',
    disable=False
)

percentage_wid = widgets.FloatSlider(
    value=1.0,
    min=0.0,
    max=1.0,
    step=0.01,
    description='Min % common answers'
)

button = widgets.Button(
    description = 'Search'
)

button.on_click(btn_event)
AppLayout(hader=None, left_sidebar=questions_wid, center=None, right_sidebar=percentage_wid, footer=button)

AppLayout(children=(Button(description='Search', layout=Layout(grid_area='footer'), style=ButtonStyle()), Sele…

Percentage 1.0
---------------------
COMMUNITY - 0
	- N. Members: 28
	- Properties:
		- Transport Pollution 0
		- Transport Type 1
---------------------
COMMUNITY - 1
	- N. Members: 2
	- Properties:
		- Transport Pollution 0
		- Transport Type 1
		- Reduce Consumption 0
		- Change Transport 0
		- Recycle 0
---------------------
COMMUNITY - 2
	- N. Members: 30
	- Properties:
		- Reduce Consumption 1
---------------------
N. USERS WITHOUT COMMUNITY - 1
