In [None]:
import tweepy
import json
from pymongo import MongoClient
from collections import Counter, defaultdict
import logging
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import networkx as nx
import community
import seaborn as sns
from fastprogress import master_bar, progress_bar
import time
import re

#Logger
logging.basicConfig(filename='Anàlisi-GetUsersCommunity.log', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO)
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
logging.getLogger().addHandler(ch)

In [None]:
##############
# PARAMETERS #
##############

client = MongoClient(username='XXX', password='XXX')

DatabaseName = "Hashtags"
CollectionName = "Campanya"

In [None]:
##################
# BUILDING GRAPH #
##################

G = nx.DiGraph()

db = client[DatabaseName]

tweets = db[CollectionName].find(no_cursor_timeout=True, batch_size=1000000)

for result in tweets:
    uid = result['user']['screen_name']
    G.add_node(uid)

    if 'retweeted_status' in result:
        if G.has_edge(uid, result['retweeted_status']['user']['screen_name']):
            G[uid][result['retweeted_status']['user']['screen_name']]['weight'] += 1.0
        else:
            G.add_edge(uid, result['retweeted_status']['user']['screen_name'], weight = 1.0) 
    elif result['is_quote_status']:
        if 'quoted_status' in result:
            if G.has_edge(uid, result['quoted_status']['user']['screen_name']):
                G[uid][result['quoted_status']['user']['screen_name']]['weight'] += 1.0
            else:
                G.add_edge(uid,result['quoted_status']['user']['screen_name'], weight=1.0)
    elif result['in_reply_to_status_id'] is not None:
        if G.has_edge(uid, result['in_reply_to_screen_name']):
            G[uid][result['in_reply_to_screen_name']]['weight'] += 1.0
        else:
            G.add_edge(uid,result['in_reply_to_screen_name'], weight=1.0)

tweets.close()                    
                    
print("Nombre de nodes: {}".format(G.number_of_nodes()))
print("Nombre d'arestes: {}".format(G.number_of_edges()))
    
#nx.write_graphml(G, FILE_NAME)

In [None]:
#########################################
# GET USER COMMUNITY SORTED BY INDEGREE #
#########################################

part = community.best_partition(G.to_undirected())

print("S'han detectat {} comunitats: ".format(len(set(part.values()))))
comunitats_mes_grans = Counter(part.values()).most_common(15) #Canviem el número per agafar només les que tenen +1%
print("Les comunitats més grans són: {}".format(comunitats_mes_grans))

n = G.number_of_nodes()
a = {k: str(round(float(v/n*100),2))+"%" for k, v in comunitats_mes_grans}
print("Les comunitats més grans tenen les següents proporcions: {}".format(a))


indeg = G.in_degree(weight='weight')
sorted_indeg = sorted(indeg, key=lambda i: i[1], reverse=True)
indeg_dict = dict(sorted_indeg)

for key,value in part.items():
        #print(user + ": " + str(comm))
        indeg_dict[key] = value

print("User communnity sorted :")
print(indeg_dict)

'''
############################################
# GET USER COMMUNITY (MULTIPLE ITERATIONS) #
############################################

user_community_dict = defaultdict(list)

for i in range(0,10):
    part = community.best_partition(G.to_undirected())
    #print(part)
    for key,value in part.items():
        #print(user + ": " + str(comm))
        user_community_dict[key].append(value)
        
print(user_community_dict)
'''
print("")

In [None]:
############################################
# MANUAL INSPECTION OF TOP IN-DEGREE USERS #
############################################

for key,value in indeg_dict.items():
    if value == 12:
        print(key + ": " + str(value))

In [None]:
#####################################
# CHANGE COMMUNITY NUMBER FOR NAME  #
#####################################

#MADRID
community_legend = {1:"PODEMOS", 2:"VOX", 0:"PSOE", 4:"PP", 7:"MAS_MAD", 3:"CS"}

#ANDALUSIA
#community_legend = {7:"VOX", 2:"POR_AND", 1:"PSOE", 5:"PP", 9:"ADELANTE_AND", 6:"CS"}

final_community_dict = {}

for key,value in indeg_dict.items():
        if value in community_legend.keys():
                final_community_dict[key] = community_legend[value]
        else:
            final_community_dict[key] = "NONE"

print("Final community dict:")
print(final_community_dict)

In [None]:
new_collection = db['Users']

for key,value in progress_bar(final_community_dict.items()):
    try:
        new_collection.update_one(
                                    {'screen_name': key},
                                    {'$set': {'community': value}},
                                    upsert=False,
                                  )
    except Exception as e:
        logging.error(e)
        logging.error("Fatal exception inserting users in MongoDB")