In [1]:
import pandas as pd
import numpy as np

import ipywidgets as widgets
from ipywidgets import AppLayout
import IPython.display as pyDis

from context import algorithms
from algorithms.community_detection.similarityCommunityDetection import SimilarityCommunityDetection
from algorithms.community_detection.explainedCommunitiesDetection import ExplainedCommunitiesDetection
from algorithms.visualization.gephiVisualization import GephiVisualization

from sklearn.metrics.pairwise import cosine_similarity

In [2]:
hecht_implicit_df = pd.read_csv('../../data/HECHT/implicit_values.csv')
hecht_implicit_df.head()

Unnamed: 0,userId,LQAOTMuseum1OpenBeleifsConflict,LQAOTMuseum1OpenBeleifsImportant,LQAOTMuseum1OpenBeliefTest,LQAOTMuseum1OpenCategorizePeople,LQAOTMuseum1OpenConfuse,LQAOTMuseum1OpenConsider,LQAOTMuseum1OpenDifferentContext,LQAOTMuseum1OpenHateBeliefs,LQAOTMuseum1OpenOneWay,...,LQRHMSMuseum1HistUnderstandWorld,LQRHMSMuseum1HistUnderstandWorldNeg,LQRHMSPrepHistDifferentiate,LQRHMSPrepHistIdentity,LQRHMSPrepHistPatrioticValue,LQRHMSPrepHistPersonalKnowledge,LQRHMSPrepHistUnderstandNews,LQRHMSPrepHistUnderstandValues,LQRHMSPrepHistUnderstandWorld,LQRHMSPrepHistUnderstandWorldNeg
0,NR111300,6.0,6.0,1.0,1.0,1.0,6.0,6.0,1.0,6.0,...,3.0,3.0,6.0,3,1,2.0,5.0,1.0,4.0,3.0
1,NR111352,4.0,5.0,4.0,2.0,2.0,5.0,5.0,5.0,2.0,...,5.0,3.0,6.0,6,6,5.0,3.0,3.0,4.0,2.0
2,NR112800,4.0,3.0,4.0,2.0,1.0,6.0,5.0,5.0,2.0,...,5.0,2.0,6.0,6,6,5.0,3.0,2.0,5.0,1.0
3,NR115421,4.0,4.0,4.0,3.0,3.0,6.0,4.0,3.0,3.0,...,5.0,5.0,2.0,2,2,2.0,2.0,1.0,3.0,4.0
4,NR116510,6.0,6.0,1.0,3.0,3.0,4.0,4.0,6.0,3.0,...,5.0,2.0,5.0,5,6,5.0,6.0,4.0,6.0,3.0


In [3]:
hecht_demographic = pd.read_csv('../../data/HECHT/demographic.csv')
hecht_demographic.head()

Unnamed: 0,userId,DemographicEducationType,DemographicGender,DemographicGrade,DemographicIdentity,DemographicPolitics,DemographicReligous
0,NR111300,GJ,M,11,J,VR,S
1,NR111352,GJ,M,11,J,R,S
2,NR112800,GJ,M,11,J,R,S
3,NR115421,GJ,F,11,J,R,S
4,NR116510,GJ,F,11,J,VR,M


In [4]:
# Calcular la matriz de similitud entre usuarios con el bloque 1 de datos de usuarios (Museum-Openess)
open_cols = [x for x in hecht_implicit_df.columns.values if '1Open' in x]
clean_cols = ['userId'] + open_cols
hecht_open_df = hecht_implicit_df[clean_cols]
hecht_open_df.to_csv('../../data/HECHT/open.csv', index=False)
hecht_open_df.head()

Unnamed: 0,userId,LQAOTMuseum1OpenBeleifsConflict,LQAOTMuseum1OpenBeleifsImportant,LQAOTMuseum1OpenBeliefTest,LQAOTMuseum1OpenCategorizePeople,LQAOTMuseum1OpenConfuse,LQAOTMuseum1OpenConsider,LQAOTMuseum1OpenDifferentContext,LQAOTMuseum1OpenHateBeliefs,LQAOTMuseum1OpenOneWay,LQAOTMuseum1OpenOthers,LQAOTMuseum1OpenReligiousLeaders,LQAOTMuseum1OpenValuesMore,LQAOTMuseum1OpenValuesPossible
0,NR111300,6.0,6.0,1.0,1.0,1.0,6.0,6.0,1.0,6.0,6.0,1.0,5.0,1.0
1,NR111352,4.0,5.0,4.0,2.0,2.0,5.0,5.0,5.0,2.0,3.0,1.0,5.0,3.0
2,NR112800,4.0,3.0,4.0,2.0,1.0,6.0,5.0,5.0,2.0,5.0,1.0,5.0,4.0
3,NR115421,4.0,4.0,4.0,3.0,3.0,6.0,4.0,3.0,3.0,5.0,4.0,4.0,4.0
4,NR116510,6.0,6.0,1.0,3.0,3.0,4.0,4.0,6.0,3.0,5.0,6.0,4.0,5.0


In [5]:
'''
INPUT:
    - data: dataframe con el que se van a calcular las similitudes
    - percentage: porcentaje de usuarios con respuesta común
'''
data = hecht_open_df[open_cols]
percentage = 0.7

community_detection = ExplainedCommunitiesDetection(data, SimilarityCommunityDetection, 'euclidean')
n_communities, users_communities = community_detection.search_all_communities(answer_binary=False, percentage=percentage)

In [6]:
# Explicamos comunidades
users_without_community = []

for c in range(n_communities):
        community_data = community_detection.get_community(c, percentage=percentage)
        
        if len(community_data['members']) > 1:
        
            print('---------------------')
            print('COMMUNITY -', community_data['name'])
            print('\t- N. Members:', len(community_data['members']))
            print('\t- Properties:')

            for k in community_data['properties'].keys():
                print('\t\t-', k, community_data['properties'][k])
        else:
            users_without_community.extend(community_data['members'])
            
            
print('---------------------')
print('N. USERS WITHOUT COMMUNITY -', len(users_without_community))

---------------------
COMMUNITY - 0
	- N. Members: 2
	- Properties:
		- LQAOTMuseum1OpenBeleifsConflict 1.0
		- LQAOTMuseum1OpenDifferentContext 6.0
		- LQAOTMuseum1OpenOneWay 1.0
		- LQAOTMuseum1OpenReligiousLeaders 1.0
---------------------
COMMUNITY - 1
	- N. Members: 2
	- Properties:
		- LQAOTMuseum1OpenBeleifsConflict 6.0
		- LQAOTMuseum1OpenDifferentContext 4.0
		- LQAOTMuseum1OpenValuesMore 4.0
---------------------
COMMUNITY - 2
	- N. Members: 2
	- Properties:
		- LQAOTMuseum1OpenCategorizePeople 5.0
		- LQAOTMuseum1OpenConfuse 1.0
		- LQAOTMuseum1OpenDifferentContext 6.0
		- LQAOTMuseum1OpenHateBeliefs 6.0
		- LQAOTMuseum1OpenOneWay 2.0
		- LQAOTMuseum1OpenReligiousLeaders 1.0
---------------------
COMMUNITY - 3
	- N. Members: 2
	- Properties:
		- LQAOTMuseum1OpenBeleifsConflict 5.0
		- LQAOTMuseum1OpenBeliefTest 5.0
		- LQAOTMuseum1OpenConfuse 5.0
		- LQAOTMuseum1OpenOthers 6.0
---------------------
COMMUNITY - 4
	- N. Members: 2
	- Properties:
		- LQAOTMuseum1OpenConfuse 1.0

In [7]:
# Incluimos los datos de las comunidades
hecht_demographic['community'] = users_communities.values()
hecht_demographic.to_csv('../../data/HECHT/open_communities.csv', index=False)

In [8]:
# Filtramos las comunidades con menos de 2 usuarios
filter_groups = hecht_demographic.groupby(by='community').count()['userId'] < 2
groups_to_filter = filter_groups.index.values[filter_groups]
users_out = hecht_demographic[hecht_demographic['community'].isin(groups_to_filter)].index.values

In [10]:
users = hecht_demographic[hecht_demographic.columns.values].values
data = data[~data.index.isin(users_out)]
distances = cosine_similarity(data) 

In [14]:
pd.DataFrame(distances).to_csv('../../data/HECHT/forces_open.csv', index=False)

In [9]:
# Pintamos en Gephi
gv = GephiVisualization(workspace='hecht_open')

ConnectionRefusedError: [Errno 61] Connection refused

In [None]:
# Separo los datos demográficos


In [None]:
# Preparamos los datos de usuarios y distancias

users = hecht_demographic[hecht_demographic.columns.values].values
data = data[~data.index.isin(users_out)]
distances = cosine_similarity(data)

In [None]:
gv.load_community(users, distances, users_properties=list(hecht_demographic.columns.values[1:]))

In [None]:
list(hecht_demographic.columns.values)

In [None]:
hecht_demographic[hecht_demographic.columns.values].values