In [None]:
# Step0: Import necessary libraries
!pip install networkx matplotlib


import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt




In [None]:
# step1: Imp Retweet Network - Directed Weighted Graph
# Expects a file with Vertex1 Vertex2 and weight
# Load the uploaded file into a DataFrame
filename  ='Imp_Retweet Network.xls'
df = pd.read_excel(filename)
df.head()




In [None]:
# Step2 : Create an directed graph
G = nx.DiGraph()

for _, row in df.iterrows():
    G.add_edge(row['Vertex1'], row['Vertex2'], weight=row['Weight'])
    # G.add_edge(row['mask_roll'], row['To'], weight=1)
# Step 6: Draw the initial graph
plt.figure(figsize=(8, 8))
pos = nx.spring_layout(G)
nx.draw(G, pos, with_labels=False, node_size=500, node_color="lightblue", font_size=10)
plt.title("Undirected Graph")
plt.show()

In [None]:
# Step 3: Centrality calculations (Indegree)
top_n = 20
# In-Degree Centrality
degree_centrality = nx.in_degree_centrality(G)
top_degree = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:top_n]

plt.figure(figsize=(8, 8))
node_colors = ['red' if node in dict(top_degree) else 'lightblue' for node in G.nodes()]
nx.draw(G, pos, with_labels=False, node_color=node_colors, node_size=500, font_size=10)
plt.title("Top Nodes by Degree In Centrality")
plt.show()
print("Top nodes by Degree Centrality:", top_degree)

# Iterate through the list of top_degree using range
for i in range(len(top_degree)):  # Use range to create an iterable sequence
    print(top_degree[i][0], top_degree[i][1])



In [None]:
# step4 Create a output file for the top degree centrality results
top_degree_df = pd.DataFrame(top_degree, columns=['Tweet_id', 'In-Degree Centrality'])

# Calculate and add the 'Degree' column to the DataFrame
top_degree_df['Degree'] = top_degree_df['Tweet_id'].apply(lambda node: G.in_degree(node))

# Export the DataFrame to a CSV file
csv_filename = "top_tweets_centrality.csv"
top_degree_df.to_csv(csv_filename, index=False, encoding="utf-8")

print(f"Top tweets exported to {csv_filename}")

In [None]:
## Step opt1 at the end is a programmatic way to export the tweets for these tweet ids. it can alos be done manually hanec, this step has been made optional

In [None]:
# step5: Imp USER Network - Directed Weighted Graph
# Expects a file with Vertex1 Vertex2 and weight
# Load the uploaded file into a DataFrame
filename  ='Imp_User_Network.xls'
df = pd.read_excel(filename)
df.head()

In [None]:
# Step 6: Community detection
from networkx.algorithms.community import greedy_modularity_communities
communities = list(greedy_modularity_communities(G))

# Assign colors to communities
community_colors = {node: i for i, community in enumerate(communities) for node in community}
node_colors = [community_colors[node] for node in G.nodes()]

# Plot the graph with community colors
plt.figure(figsize=(8, 8))
nx.draw(G, pos, with_labels=False, node_color=node_colors, cmap=plt.cm.rainbow, node_size=500, font_size=10)
plt.title("Community Detection")
plt.show()

In [None]:
# Step7 : Create an directed graph
G = nx.DiGraph()

for _, row in df.iterrows():
    G.add_edge(row['Vertex1'], row['Vertex2'], weight=row['Weight'])
    # G.add_edge(row['mask_roll'], row['To'], weight=1)
# Step 6: Draw the initial graph
plt.figure(figsize=(8, 8))
pos = nx.spring_layout(G)
nx.draw(G, pos, with_labels=False, node_size=500, node_color="lightblue", font_size=10)
plt.title("Undirected Graph")
plt.show()

In [None]:
# Step 8: Centrality calculations (Outdegree) for users
top_n = 20
# Out-Degree Centrality
degree_centrality = nx.out_degree_centrality(G)
top_degree = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:top_n]

plt.figure(figsize=(8, 8))
node_colors = ['red' if node in dict(top_degree) else 'lightblue' for node in G.nodes()]
nx.draw(G, pos, with_labels=False, node_color=node_colors, node_size=500, font_size=10)
plt.title("Top Nodes by Degree out Centrality")
plt.show()
print("Top nodes by Degree Centrality:", top_degree)

# Iterate through the list of top_degree using range
for i in range(len(top_degree)):  # Use range to create an iterable sequence
    print(top_degree[i][0])


In [None]:
#
# step9: Create a DataFrame for the top degree centrality users
top_degree_df = pd.DataFrame(top_degree, columns=['Tweet_id', 'Out-Degree Centrality'])

# Calculate and add the 'Degree' column to the DataFrame
top_degree_df['Degree'] = top_degree_df['Tweet_id'].apply(lambda node: G.out_degree(node))

# Export the DataFrame to a CSV file
csv_filename = "top_user_centrality.csv"
top_degree_df.to_csv(csv_filename, index=False, encoding="utf-8")

print(f"Top users exported to {csv_filename}")

In [None]:
### Step Opt1 a :- Optional way to find the tweets belonging to highest centrality tweets
# Do Topic Modelling on the tweets to find out the themes
# StepA- Load the TWitter Data
df_tweet = pd.read_excel("xoxoday_Raw_tweets.xls", dtype = {"In-Reply Tweet ID": 'str', 'Imported ID': 'str'})
df_tweet.head()




In [None]:
# Step Opt1 b - Load to get the tweets of relevance
import pandas as pd

# Load the top tweets centrality data
top_tweets_df = pd.read_csv("top_tweets_centrality.csv",  dtype = { "Tweet_id": 'str'})

# Rename the column in top_tweets_df to match the column in df_tweet for joining
# Check if "Tweet" is the actual column name in top_tweets_df that you want to rename
top_tweets_df.rename(columns={"Tweet_id": "Imported ID"}, inplace = True)

# Print columns of both DataFrames to verify names before merging
print("Columns in top_tweets_df:", top_tweets_df.columns)
print("Columns in df_tweet:", df_tweet.columns)

# Perform the join using the correct column name
# Ensure "In-Reply Tweet ID" is the correct column name in both DataFrames
merged_df = pd.merge(top_tweets_df, df_tweet[["Imported ID", "Tweet", 'Vertex 1', 'Relationship', "Tweet Date (UTC)"]], on="Imported ID", how="inner")

# Display the merged DataFrame
print(merged_df)

# Export the merged DataFrame to a CSV file
merged_df.to_csv("top_tweets.csv", index=False, encoding="utf-8")
print("Merged DataFrame exported to top_tweets.csv")

In [None]:
# Step Opt2 b - Load to get the tweets of relevance
import pandas as pd

# Load the top tweets centrality data
top_tweets_df = pd.read_csv("top_tweets_centrality.csv",  dtype = { "Tweet_id": 'str'})

# Rename the column in top_tweets_df to match the column in df_tweet for joining
# Check if "Tweet" is the actual column name in top_tweets_df that you want to rename
top_tweets_df.rename(columns={"Tweet_id": "Imported ID"}, inplace = True)

# Print columns of both DataFrames to verify names before merging
print("Columns in top_tweets_df:", top_tweets_df.columns)
print("Columns in df_tweet:", df_tweet.columns)

# Perform the join using the correct column name
# Ensure "In-Reply Tweet ID" is the correct column name in both DataFrames
merged_df = pd.merge(top_tweets_df, df_tweet[["Imported ID", "Tweet", 'Vertex 1', 'Relationship', "Tweet Date (UTC)"]], on="Imported ID", how="inner")

# Display the merged DataFrame
print(merged_df)

# Export the merged DataFrame to a CSV file
merged_df.to_csv("top_tweets.csv", index=False, encoding="utf-8")
print("Merged DataFrame exported to top_tweets.csv")