In [3]:
import pandas as pd
from tqdm.auto import tqdm
import numpy as np
from pathlib import Path
import gzip
from collections import Counter
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfTransformer
from scipy.sparse import csr_matrix
import igraph as ig
import xnetwork as xn
import re
tqdm.pandas()

In [45]:
datasetName = "debate2024_Jun_bluesky"
dataPath = Path("Data")
networksPath = dataPath/"Networks"

In [46]:
with gzip.open(dataPath / f"{datasetName}.feather.gz", 'rb') as f:
    df = pd.read_feather(f)

In [47]:
g = xn.load(networksPath/f"{datasetName}_corepost.xnet")

In [48]:
g.vcount(), g.ecount()

(4808, 173448)

In [49]:
g.strength(weights="weight")[:10]

[8.751000405670558,
 9.070030278913151,
 9.861308996650004,
 1.8315453344354662,
 5.142834843120943,
 4.547724138141994,
 16.716858165992786,
 6.286531523091272,
 12.76438024671198,
 27.40263172521967]

In [50]:
g.vs.attributes()

['Label', 'MostUniqueRepost', 'Reposts count', 'RepostsCount', 'SampleReposts']

In [53]:
g.vs["MostUniqueRepost"][:10]

['"After many of Biden’s accomplishments have been dismantled and erased by right wing courts, it will become clear that his inability to get to the right place on those two issues was one of his most deadly miscalculations," writes @hamiltonnolan.bsky.social. www.hamiltonnolan.com/p/the-end-of...',
 'The "Drop out Biden" discourse is especially braindead because none of the people pushing it even think he\'s not capable of doing the job. They just think he\'s not capable of *winning*, a judgment they have rushed to without even bothering to wait a week for polls to come in',
 'At a Turning Point Action conference in Detroit, 8,000 people packed into a convention hall to hear from Donald Trump. But first, there was a word from a sponsor: Alexander Spellane, a precious metals salesman being sued by federal regulators for fraud. nyti.ms/3XyWu49',
 "President Biden’s campaign said on Tuesday that it had raised $127 million in June together with the Democratic Party. It's a sign of strengt

In [54]:
gstrenghts = g.strength(weights="weight")
userHighestStrength = g.vs["Label"][np.argmax(gstrenghts)]
userHighestStrength

'did:plc:boopgqnkg2inpleusxo7kj4l'

In [55]:
centrality = g.betweenness()
userHighestCentrality = g.vs["Label"][np.argmax(centrality)]
userHighestCentrality

'did:plc:boopgqnkg2inpleusxo7kj4l'

In [25]:
# sample posts for user with highest strength
df[df["user_id"] == userHighestStrength].sample(10)["text"].values

array(['“No text ends up as it began. None escapes history,” Rodgers writes. “In the very act of reading a text, cherishing, possessing or rejecting it, its meaning is remade. It is, inescapably, always under construction.” www.nytimes.com/2024/07/02/o...',
       'Roberts rules for Trump. Absolute immunity for “official acts.” Vacated and remanded. Party line case. Both Sotomayor and Jackson dissent. Fuck this. www.supremecourt.gov/opinions/23p...',
       'NYT: “I am worried about the image projected (by the #debate) to the outside world,” Sergey Radchenko, a historian at the Johns Hopkins School of Advanced International Studies, wrote on social media. “It is not an image of #leadership. It is an image of terminal #decline.”',
       '“Kenneth Chenault, the well-known former head of American Express, said he’s been telling CEOs that our democracy is ‘facing an existential threat,’ and that for business to function, the #ruleoflaw is ‘absolutely sacrosanct’.” newrepublic.com/article/

In [43]:
# Most similar pair of users (high weight)
mostSimilarEdgeIndices = np.argsort(g.es["weight"])
edgeIndex = mostSimilarEdgeIndices[-3]
user1Index = g.es[edgeIndex].source
user2Index = g.es[edgeIndex].target

user1 = g.vs["Label"][user1Index]
user2 = g.vs["Label"][user2Index]
print(user1, user2, g.es[edgeIndex]["weight"])

did:plc:m77ecru537mpjw2vtvkahve7 did:plc:xnkxdg6iiey2vnknyygatbij 0.9591198545584751


In [44]:
# 5 samples from these users
display(df[df["user_id"] == user1].sample(5)["text"].values)
display(df[df["user_id"] == user2].sample(5)["text"].values)

array(["Mod: Let's talk about immigration.\n\nMe: Why? They're already ignoring you.\n\n#debate",
       'Mod: The Israel-Gaza situation. President Biden, what additional leverage will you use to get Israel to end the war?\n\nBiden: I put forward a plan and everyone in Hamas and even Bibi himself have accepted my plan.\n\nI call BS.\n\n#debate',
       "Mod: Hey Trump, Putin says he'll only stop the Ukraine invasion if you do everything he wants. You game?\n\nTrump: Look, every single person in the military hates this guy [theatrically points to Biden]. They hate him! I bet he encouraged Russia to go in! \n\n#debatef",
       "Trump: The secretary of NATO said I was doing the most incredible job he'd ever seen. Why isn't he making NATO put up all the money? We have an ocean in between us and them! \n\n#debate",
       "Mod: I asked a specific question...\n\nTrump: HE'S A DOODY HEAD!\n\nBiden: NUH-UH YOU ARE\n\n#debate"],
      dtype=object)

array(['this really set the tone\n\nAnd the Republican strategists on CNN just literally applauded Tapper, Bash & CNN for being "fair" to Trump. Which is what they say when reporters act in ways that are strongly pro-GOP.',
       'Lawrence O\'donnell "lets face it, \'imagery\' has taken control of the presidency\'.. #debate \nWe\'ve turned to the most superficial components to decide our politics.',
       'That was not good. #debate \nBiden looks worn out - dammit',
       'I know the “cool kids” are posting that they are not going to watch tonight’s debate.  I’m locked in with my family.  \n\nYou do you.',
       'Trump just steam rolls, runs on.. talks.. rapid fire.. its hard to gauge what he\'s saying...its tone, manner, control.. dominance. And no one will dissect what hes said.. its just so much.. #debate \n"what? What did he say? He sounds strong.. I dunno.. whaddid he say?" \nfuck.'],
      dtype=object)

In [56]:
g.vs["community"]=g.community_leiden().membership

In [63]:
topCommunities = Counter(g.vs["community"]).most_common(10)

In [64]:
# Sample posts from each top community
for community,_ in topCommunities:
    print(f"Community {community}")
    users = g.vs.select(lambda v: v["community"] == community)
    users = set([v["Label"] for v in users])
    print(users)
    posts = df[df["user_id"].isin(users)].sample(5)["text"].values
    print(posts)

Community 0
{'did:plc:222rnvnta2lbl364bog2plxw'}
['The more you read yesterday’s immunity decision, the more appalling it gets. My take:\n\nwww.nytimes.com/2024/07/02/o...'
 'I agree.'
 'I don’t think Dems should replace Biden because I don’t like Biden. I think Dems should replace Biden because we must win.\n\nBiden is losing right now and doesn’t have the cognitive abilities to turn it around. Kamala or Whitmer would start from a similar baseline, but could campaign actively.'
 'Under today’s immunity decision, Biden could order AG Garland to immediately prosecute Thomas and Alito for corruption and insurrection. (Official act). If Garland refuses, Biden can fire him and replace him with someone who will. (Official act) I would suggest Elie Mystal.'
 'Biden is clearly suffering from cognitive decline. 72% of Americans believe he can’t do the job. That’s a terrible baseline. It’s also probably true!']
Community 1
{'did:plc:223yxsx3ifr4vghg36hi3w4a'}
["i'm getting so, so tired of the d