In [1]:
from google.colab import files

# Prompt user to upload file(s)
uploaded = files.upload()

# Access the uploaded file
for filename in uploaded.keys():
    print(f'User uploaded file "{filename}" with length {len(uploaded[filename])} bytes')

Saving merged_dataset.csv to merged_dataset.csv
User uploaded file "merged_dataset.csv" with length 2444581 bytes


Creating visualization...


  plt.savefig('hashtag_network.png', dpi=300, bbox_inches='tight')


In [2]:
# Import libraries
import pandas as pd
import re
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
from itertools import combinations
import numpy as np

# Function definitions
def extract_hashtags(text):
    if isinstance(text, str):
        return re.findall(r'#(\w+)', text.lower())
    return []

def create_hashtag_network(dataframe):
    G = nx.Graph()
    for _, row in dataframe.iterrows():
        hashtags = row['hashtags']
        for tag in hashtags:
            G.add_node(tag)
        for pair in combinations(hashtags, 2):
            if G.has_edge(*pair):
                G[pair[0]][pair[1]]['weight'] += 1
            else:
                G.add_edge(*pair, weight=1)
    return G

# Load and preprocess data
print("Loading dataset...")
df = pd.read_csv('/content/merged_dataset.csv')
df['hashtags'] = df['content'].apply(extract_hashtags)

# Create filtered datasets
misinfo_df = df[df['label'] == 'Misinformation']
reliable_df = df[df['label'] == 'Reliable']
df_with_hashtags = df[df['hashtags'].apply(len) >= 2]

# Create networks
print("Creating networks...")
full_network = create_hashtag_network(df_with_hashtags)

# Centrality calculations
print("Calculating centralities...")
degree_centrality = nx.degree_centrality(full_network)
betweenness_centrality = nx.betweenness_centrality(full_network, k=100)
eigenvector_centrality = nx.eigenvector_centrality(full_network, max_iter=1000)

# Create centrality dataframe
centrality_df = pd.DataFrame({
    'Hashtag': list(degree_centrality.keys()),
    'Degree': degree_centrality.values(),
    'Betweenness': [betweenness_centrality.get(t, 0) for t in degree_centrality],
    'Eigenvector': [eigenvector_centrality.get(t, 0) for t in degree_centrality]
}).sort_values('Degree', ascending=False)

# Community detection
try:
    import community as community_louvain
    communities = community_louvain.best_partition(full_network)
    centrality_df['Community'] = centrality_df['Hashtag'].map(communities)
except ImportError:
    print("Install python-louvain for community detection")
    centrality_df['Community'] = 0

# Misinfo/reliable analysis
misinfo_counts = Counter([h for sub in misinfo_df['hashtags'] for h in sub])
reliable_counts = Counter([h for sub in reliable_df['hashtags'] for h in sub])

centrality_df['Misinfo_Count'] = centrality_df['Hashtag'].map(misinfo_counts).fillna(0)
centrality_df['Reliable_Count'] = centrality_df['Hashtag'].map(reliable_counts).fillna(0)
centrality_df['Total_Count'] = centrality_df[['Misinfo_Count', 'Reliable_Count']].sum(1)
centrality_df['Misinfo_Ratio'] = centrality_df.apply(
    lambda r: r.Misinfo_Count/r.Total_Count if r.Total_Count > 0 else 0, axis=1)

# Save results
centrality_df.to_csv('hashtag_analysis.csv', index=False)
print("Saved hashtag_analysis.csv")

# Visualize the network
print("Creating visualization...")
fig = plt.figure(figsize=(12, 12))
ax = fig.add_subplot()

# Use only top hashtags for visualization
top_hashtags = centrality_df.head(100)['Hashtag'].tolist()
subnet = full_network.subgraph(top_hashtags)
pos = nx.spring_layout(subnet, k=0.3, seed=42)

# Draw nodes with color mapping
nodes = nx.draw_networkx_nodes(
    subnet, pos,
    node_size=[degree_centrality.get(n, 0) * 5000 for n in subnet.nodes()],
    node_color=[centrality_df[centrality_df['Hashtag'] == n]['Misinfo_Ratio'].values[0]
                for n in subnet.nodes()],
    cmap='coolwarm',
    alpha=0.8,
    ax=ax  # Explicit Axes reference
)

# Add colorbar using figure reference
fig.colorbar(nodes, ax=ax, label='Misinformation Ratio')

# Remaining drawing code...
nx.draw_networkx_edges(subnet, pos, width=0.3, alpha=0.3, edge_color='gray', ax=ax)
nx.draw_networkx_labels(subnet, pos,
                       labels={n: n for n in subnet.nodes() if degree_centrality.get(n, 0) > 0.01},
                       font_size=8,
                       ax=ax)

plt.axis('off')
plt.title("Top Hashtags Network\nNode size: Degree Centrality, Color: Misinformation Ratio")
plt.savefig('hashtag_network.png', dpi=300, bbox_inches='tight')
plt.close()


print("Analysis complete! Check hashtag_analysis.csv and network.png")


Loading dataset...
Creating networks...
Calculating centralities...
Saved hashtag_analysis.csv
Creating visualization...


  plt.savefig('hashtag_network.png', dpi=300, bbox_inches='tight')


Analysis complete! Check hashtag_analysis.csv and network.png
