# Get SocialVec embeddings
This code fetches the SocialVec embeedings for a given list of Twitter users

In [None]:
#config related imports
import numpy as np
import pandas as pd
import tomli
import tweepy
from tweepy import Stream
from tweepy import OAuthHandler
from gensim.models import Word2Vec
from tqdm import tqdm
from tqdm import trange
import time
import ast

#for logging
import logging
from importlib import reload
import datetime


# Configurations

In [None]:
with open("../config.toml", "rb") as f:
    config = tomli.load(f)
    
# Additional Configs:
limit_amount_of_users = True
users_limitation = 30

#Setup logging
reload(logging)
now = datetime.datetime.now()
now = now.strftime("%Y%m%d_%H%M%S")
logging.basicConfig(filename=f"logs/SocialVec Collector_{now}.log", filemode='w', format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p',level=logging.DEBUG)


## Load SocialVec and list of popular users

In [None]:

SocialVec = Word2Vec.load(config["SocialVec"]["model_path"])
SocialVec_popular_list = SocialVec.wv.index_to_key

# Twitter connection
For each relevant user, collet the list of popular users they follow.

In [None]:
# Open Twitter API for the cases that users do not exist in SocialVec yet.
# authorization of consumer key and consumer secret
auth = tweepy.OAuthHandler(config["twitter_api"]["consumer_key"],
                           config["twitter_api"]["consumer_secret"])
  
# set access to user's access key and access secret 
auth.set_access_token(config["twitter_api"]["access_token"], 
                      config["twitter_api"]["access_token_secret"])
  
# calling the api 
api = tweepy.API(auth, wait_on_rate_limit=False)

## Auxiliary Function

In [None]:
def twitter_sleep():
    sleep_time = 600 * 15
    for i in trange(sleep_time,
                    desc = f"sleep for {sleep_time/600} minutes"):
        time.sleep(0.1)

## Expermintal - collect users from Tweet IDs (not fully working yet)

In [None]:
tweets_df = pd.read_csv('incivility-sage.csv')

for tid in tweets_df.id_str.unique():
    try:    
        print(api.get_status(tid))
    except tweepy.errors.TweepyException as e:
        print(e.api_messages)

## Get list of users from file
We use users limitation to limit the amount of users we collect for explortaions

In [None]:
users_df = pd.read_csv('../Data/datasets/twitter-incivility/candidate_tweet_responses.csv')

#we only need screen name of user_id
users_df = users_df[['screen_name']].drop_duplicates()

if limit_amount_of_users:
    users_df = users_df.head(users_limitation)

#Set index for later required access
users_df = users_df.set_index('screen_name')

# Get SocialVec vectors per user
## Get list of friends per user

In [None]:
for user in users_df.iterrows():
    user = user[0]

    for attempt in range(10):
        try:
            print (f"user#:{user}")
            friends = sorted(api.get_friend_ids(screen_name=user))
            users_df.at[user, 'friends_list'] = str(friends)
            logging.debug(f"Completed user:{user}")
            users_df.at[user, 'friends_available'] = "Yes"

        except tweepy.errors.TweepyException as e:

            if 'Rate limit exceeded' in e.api_messages:
                print(f"Rate limit exceeded. Wait and retry {10-attempt} more times...")
                twitter_sleep()
                continue #retry
            else:
                print (f"error: {e.api_messages}")
                logging.debug(f"user#:{user} Error{e.response}")
                break # we failed not because of rate limit
        # We succeeded
        break
    #completed all attempts

In [None]:
users_df.to_csv("users_with_friends.csv")

In [None]:
def get_popular_list( row ):

    followers = ast.literal_eval(row['friends_list'])
    #transform users ids from int list to string list
    item_str = [str(item) for item in followers]
    return str(item_str)

In [None]:
def friends_to_SocialVec( row ):
    followers = ast.literal_eval(row['friends_list'])
    #transform users ids from int list to string list
    item_str = [str(item) for item in followers]
    
    #keep only popular users in the list:
    popular_list = [i for i in item_str if i in SocialVec_popular_list]
    
    user_embedding = np.mean(SocialVec.wv[popular_list], axis=0)

    return str(user_embedding.tolist())

In [None]:
df = pd.DataFrame(users_df.iloc[3]).T

In [None]:
df['temp'] = df.apply(friends_to_SocialVec, axis=1)