In [None]:
import tweepy
import json
from pymongo import MongoClient
from collections import Counter
from collections.abc import MutableMapping
import logging
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import networkx as nx
import community
import seaborn as sns
from fastprogress import master_bar, progress_bar
import time
import re


#Logger
logging.basicConfig(filename='Anàlisi-SupraCommunities.log', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO)
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
logging.getLogger().addHandler(ch)

In [None]:
##############
# PARAMETERS #
##############

client = MongoClient(username='XXX', password='XXX')

DatabaseName = "Hashtags"
TweetCollectionName = "Campanya-Interactions"

db = client[DatabaseName]
tweetCollection = db[TweetCollectionName]

In [None]:
###########################
# ADD SUP_COMMUNITY FIELD #
###########################

def load_tweets(tweet_collection):

    pipeline = [
                {
                    '$project': {
                        '_id': True, 
                        'ACTIVE_community': True,
                        'PASSIVE_community': True
                    }
                }
            ]
    print("Query", end=" ")
    users = tweet_collection.aggregate(pipeline, allowDiskUse=True)
    print("OK; List", end=" ")
    users = list(users)
    print("OK; Total users:", len(users))
    return users

tweets = load_tweets(tweetCollection)

SUPRA_COMMUNITY_ACTIVE = ""
SUPRA_COMMUNITY_PASSIVE = ""

independentist = ['JXCAT', 'ERC', 'CUP']
constitutionalist = ['CS', 'PP', 'VOX']
federalist = ['PSC', 'COMUNS']

for result in progress_bar(tweets):

    if result['ACTIVE_community'] in independentist:
        SUPRA_COMMUNITY_ACTIVE = "IND"
    elif result['ACTIVE_community'] in constitutionalist:
        SUPRA_COMMUNITY_ACTIVE = "CONST"
    elif result['ACTIVE_community'] in federalist:
        SUPRA_COMMUNITY_ACTIVE = "FED"
    else:
        SUPRA_COMMUNITY_ACTIVE = "?"
        
    if result['PASSIVE_community'] in independentist:
        SUPRA_COMMUNITY_PASSIVE = "IND"
    elif result['PASSIVE_community'] in constitutionalist:
        SUPRA_COMMUNITY_PASSIVE = "CONST"
    elif result['PASSIVE_community'] in federalist:
        SUPRA_COMMUNITY_PASSIVE = "FED"
    else:
        SUPRA_COMMUNITY_PASSIVE = "?"
    
    try:
        tweetCollection.update_one(
                                    {'_id': result['_id']},
                                    {'$set': 
                                        {
                                            'SUP_community_active': SUPRA_COMMUNITY_ACTIVE,
                                            'SUP_community_passive': SUPRA_COMMUNITY_PASSIVE
                                        }
                                    },
                                    upsert=False,
                                  )
    except Exception as e:
        logging.error(e)
        logging.error("Fatal exception inserting users in MongoDB")

In [None]:
###########################
# TWEETS BY SUP_COMMUNITY #
###########################

def load_tweets(collection):
    """Extracts the tweet interaction information
    
    Keyword arguments:
    collection -- MongoDB Tweets' Collection
    """
    pipeline = [
                {
                    '$project': {
                        'SUP_community_active': True
                    }
                }, {
                    '$group': {
                        '_id': {'SUP_community_active':'$SUP_community_active'}, 
                        'count': {
                            '$sum': 1
                        }
                    }
                }
            ]
    print("Query", end=" ")
    tweets = collection.aggregate(pipeline, allowDiskUse=True)
    print("OK; List", end=" ")
    tweets = list(tweets)
    print("OK; Total combinations:", len(tweets))
    return tweets

tweets = load_tweets(tweetCollection)
tweets_df = pd.DataFrame(tweets)
tweets_df2 = pd.concat([pd.json_normalize(tweets_df['_id']), tweets_df['count']], axis=1)

totalTweets = tweets_df2['count'].sum()
print("TOTAL TWEETS: " + str(totalTweets))

print(tweets_df2.sort_values('count', ascending=False).to_string(index=False))

In [None]:
########################################
# TWEETS BY SUP_COMMUNITY INTERACTIONS #
########################################

def load_tweets(collection):
    """Extracts the tweet interaction information
    
    Keyword arguments:
    collection -- MongoDB Tweets' Collection
    """
    pipeline = [
                {
                    '$project': {
                        'SUP_community_active': True,
                        'SUP_community_passive': True
                    }
                }, {
                    '$group': {
                        '_id': {'SUP_community_active':'$SUP_community_active', 'SUP_community_passive':'$SUP_community_passive'}, 
                        'count': {
                            '$sum': 1
                        }
                    }
                }
            ]
    print("Query", end=" ")
    tweets = collection.aggregate(pipeline, allowDiskUse=True)
    print("OK; List", end=" ")
    tweets = list(tweets)
    print("OK; Total combinations:", len(tweets))
    return tweets

tweets = load_tweets(tweetCollection)
tweets_df = pd.DataFrame(tweets)
tweets_df2 = pd.concat([pd.json_normalize(tweets_df['_id']), tweets_df['count']], axis=1)

totalTweets = tweets_df2['count'].sum()
print("TOTAL TWEETS: " + str(totalTweets))

print(tweets_df2.sort_values('count', ascending=False).to_string(index=False))