# Check users connections on Twitter

This notebook is used to check wether users that are conencted based on similarity are also connected on Twitter

### Handle imports

In [None]:
import os

import pandas as pd
import numpy as np
import tweepy
from tqdm import tqdm
from pymongo import MongoClient

### Load Twitter API secrets

In [None]:
%load_ext dotenv
%dotenv

In [None]:
twitter_api = {
        "bearer_token": os.getenv("BEARER_TOKEN"),
        "api_key": os.getenv("API_KEY"),
        "api_secret": os.getenv("API_SECRET"),
        "access_token": os.getenv("ACCESS_TOKEN"),
        "access_secret": os.getenv("ACCESS_SECRET")
    }

### Load Twitter data from parquet file

This data should be created with one of the notebooks that handle the coordination calculation

In [None]:
similarity = pd.read_parquet("./parquet_saves/XXXX.snappy")
similarity = similarity.sort_values("Weight", ascending=False)
similarity = similarity[similarity["Weight"] >= 0.95].reset_index(drop=True)

### Transform the user_screen_names to twitter ids

User screen names can easily be changed on Twitter while the user id stays the same

In [None]:
source_uuid = "8273444c-abdd-4410-829a-970846ebd00e"
connection = MongoClient(f"mongodb://localhost:27017/")
db = connection.textclustDB
id_users1 = np.array((), dtype="int")
id_users2 = np.array((), dtype="int")
for i, row in tqdm(similarity.iterrows(), total=len(similarity)):
    id = db[f"texts_{source_uuid}"].find_one({"specific.user.screen_name": row["User1"]}, projection={"_id": 0, "id": "$specific.user.id"})
    id_users1 = np.append(id_users1, id['id'])
    id = db[f"texts_{source_uuid}"].find_one({"specific.user.screen_name": row["User2"]}, projection={"_id": 0, "id": "$specific.user.id"})
    id_users2 = np.append(id_users2, id['id'])

similarity["user1id"] = id_users1
similarity["user2id"] = id_users2

### Create the new column which identifies if one user follows the other

In [None]:
similarity["user1follow"] = np.nan
similarity["user2follow"] = np.nan

### Check the conenction for the users in the similarity matrix

In [None]:
auth = tweepy.OAuthHandler(twitter_api["api_key"], twitter_api["api_secret"])
auth.set_access_token(twitter_api["access_token"], twitter_api["access_secret"])

api = tweepy.API(auth, wait_on_rate_limit=True)

for i, row in tqdm(similarity.iterrows(), total=len(similarity)):
    test = api.get_friendship(source_id=row["user1id"], target_id=row["user2id"])
    similarity.loc[i, "user1follow"] = test[0]._json['following']
    similarity.loc[i, "user2follow"] = test[0]._json['followed_by']

### Store the result

In [None]:
similarity.to_parquet("./parquet_saves/friendships XXXXX.snappy", compression="snappy")

### Filter the rows where at least one user follows the other

In [None]:
similarity[(similarity["user1follow"] == True) | (similarity["user1follow"] == True)]