In [None]:
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd

file1_path = '/content/drive/MyDrive/Group6/authors.schistosomiasis.csv'
file2_path = '/content/drive/MyDrive/Group6/articles.schistosomiasis.csv'

file1_data = pd.read_csv(file1_path)
file2_data = pd.read_csv(file2_path)

file1_data.head(), file2_data.head(), file1_data.info(), file2_data.info()
# Merge data
merged_data = pd.merge(file1_data, file2_data, on='PMID', how='inner')

author_activity = merged_data.groupby(['AuthorForename', 'AuthorLastname']).size().reset_index(name='ArticleCount')

most_active_authors = author_activity.sort_values(by='ArticleCount', ascending=False).head(10)

# Build a partnership network
from itertools import combinations
import networkx as nx

edges = []
for pmid, group in merged_data.groupby('PMID'):
    authors = list(zip(group['AuthorForename'], group['AuthorLastname']))
    edges.extend(combinations(authors, 2))  # Generate partnerships

collaboration_graph = nx.Graph()
collaboration_graph.add_edges_from(edges)
import matplotlib.pyplot as plt

# Plotting the number of articles by the most active authors
plt.figure(figsize=(12, 6))
plt.barh(most_active_authors['AuthorLastname'] + ', ' + most_active_authors['AuthorForename'],
         most_active_authors['ArticleCount'], color='skyblue')
plt.xlabel('Article Count')
plt.ylabel('Author')
plt.title('Top 10 Most Active Authors')
plt.gca().invert_yaxis()
plt.show()

top_authors = set(zip(most_active_authors['AuthorForename'], most_active_authors['AuthorLastname']))

subgraph_edges = [
    edge for edge in collaboration_graph.edges
    if edge[0] in top_authors and edge[1] in top_authors
]
subgraph = nx.Graph()
subgraph.add_edges_from(subgraph_edges)

node_sizes = []
for node in subgraph.nodes:
    author_data = most_active_authors[
        (most_active_authors['AuthorForename'] == node[0]) &
        (most_active_authors['AuthorLastname'] == node[1])
    ]
    if not author_data.empty:
        node_sizes.append(author_data['ArticleCount'].values[0] * 5)
    else:
        node_sizes.append(10)

# Visualizing network graphs
plt.figure(figsize=(14, 12))
pos = nx.spring_layout(subgraph, seed=42)
nx.draw_networkx_nodes(subgraph, pos, node_size=node_sizes, node_color='skyblue', alpha=0.7)
nx.draw_networkx_edges(subgraph, pos, alpha=0.5)
nx.draw_networkx_labels(subgraph, pos, font_size=10, font_color='black', font_family='sans-serif')

plt.title("Collaboration Network of Top 100 Active Authors", fontsize=16)
plt.axis("off")
plt.show()

