# Community Extraction

In [1]:
import pickle
import os
import networkx as nx
import re
DOWNLOADS_DIR = "downloads"

In [2]:
# load graph:
S = pickle.load(open("graph.pkl", "rb"))

## Using `region`

### Creating `S_region`
Finds region in wikitext downloaded and adds it to the network (if multiple exist it takes the first region)

In [None]:
S_subfield = S.copy()

region_pattern = r'\|\s*region\s*=\s*\[\[([^\]]+)\]\]'
nodes_to_remove = []  # Collect nodes to remove

for node in list(S_subfield.nodes):
	node_file_name = DOWNLOADS_DIR + "/" + node + ".txt"
	if os.path.exists(node_file_name):
		try:
			with open(node_file_name, 'r', encoding='utf-8') as f:
				text = f.read()
				matches = re.search(region_pattern, text)
				if matches:
					# Extract all individual regions from the matched string
					regions = re.findall(r'\[\[([^\]]+)\]\]', matches.group(0))
					S_subfield.nodes[node]['region'] = regions[0]
				else:
					nodes_to_remove.append(node)  # Mark node for removal
		except Exception as e:
			print(f"Error processing node {node}: {e}")
	else:
		nodes_to_remove.append(node)  # Mark node for removal if file does not exist
S_subfield.remove_nodes_from(nodes_to_remove)

In [None]:
print("Original graph: ", S)
print("Region graph: ", S_subfield)
print(list(S_subfield.nodes(data=True))[143])

Original graph:  DiGraph with 1366 nodes and 10850 edges
Region graph:  DiGraph with 557 nodes and 4415 edges
('Anaximander', {'contentlength': 7671, 'region': 'Western philosophy'})


### Generating the communities

In [None]:
region_partition = {}

for node, data in S_subfield.nodes(data=True):
		genre = data.get('region')
		if genre not in region_partition:
			region_partition[genre] = []
		region_partition[genre].append(node)

print("Region communities with length:")
for genre, nodes in region_partition.items():
		print(f"Region: {genre}, Length: {len(nodes)}")

Only western philosophy is the big one.

## Using `subfield`
https://en.wikipedia.org/wiki/Lists_of_philosophers

### Extracting subfield information

In [12]:
from wiki_utils import getJsonResponse, findLinks

subfield_links = [
		"List of aestheticians",
		"List of critical theorists",
		"List of environmental philosophers",
		"List of epistemologists",
		"List of ethicists",
		"List of existentialists",
		"List of feminist philosophers",
		"List of humanists",
		"List of logicians",
		"List of metaphysicians",
		"List of social and political philosophers",
		"List of phenomenologists",
		"List of philosophers of language",
		"List of philosophers of mind",
		"List of philosophers of religion",
		"List of philosophers of science",
		"List of political philosophers",
		"List of political theorists",
		"List of rationalists",
		"List of utilitarians"
]
subfield_data = {}

for subfield_list in subfield_links:
	wiki_markup = getJsonResponse(subfield_list)
	philosopher_list = findLinks(wiki_markup)
	if subfield_list not in subfield_data:
			subfield_data[subfield_list] = []
	subfield_data[subfield_list].append(philosopher_list)

# Flatten the list of lists for each subfield
for subfield_list in subfield_data:
		subfield_data[subfield_list] = [link for sublist in subfield_data[subfield_list] for link in sublist]

clean_subfield_data = {}
for subfield in subfield_data:
		cleaned_subfield = subfield.replace("List of", "").strip()
		clean_subfield_data[cleaned_subfield] = subfield_data[subfield]


total_philosophers = 0
print("Subfield: Amount of philosophers categorized in this subfield")
print("(Multiple subfields for a philosopher is possible)")
print("-"*50)

philosopher_subfield_map = {} # Converting the subfield info to a map of philosophers to subfields
for subfield, philosopher_list in clean_subfield_data.items():
	print(f"- {subfield}: {len(philosopher_list)}")
	for philosopher in philosopher_list:
		if philosopher not in philosopher_subfield_map:
			philosopher_subfield_map[philosopher] = []
			philosopher_subfield_map[philosopher].append(subfield)
					
# printing
#for philosopher_list, subfields in list(philosopher_subfield_map.items())[:10]:
		#print(f"{philosopher_list}: {subfields}")

Subfield: Amount of philosophers categorized in this subfield
(Multiple subfields for a philosopher is possible)
--------------------------------------------------
- aestheticians: 86
- critical theorists: 107
- environmental philosophers: 80
- epistemologists: 122
- ethicists: 287
- existentialists: 60
- feminist philosophers: 134
- humanists: 1
- logicians: 291
- metaphysicians: 184
- social and political philosophers: 1
- phenomenologists: 24
- philosophers of language: 141
- philosophers of mind: 140
- philosophers of religion: 110
- philosophers of science: 111
- political philosophers: 199
- political theorists: 137
- rationalists: 27
- utilitarians: 104


### Adding subfield info to network

In [17]:
S_subfield = S.copy()

nodes_to_remove = []  # Collect nodes to remove

for node in list(S_subfield.nodes):
	if node in philosopher_subfield_map:
		S_subfield.nodes[node]['subfields'] = philosopher_subfield_map[node]
	else:
		nodes_to_remove.append(node)  # Mark node for removal if no subfield info

S_subfield.remove_nodes_from(nodes_to_remove)
print("Did not find subfield for: ", len(nodes_to_remove), " philosophers (therefore not included in S_subfield)")
print("Original graph S: ", S)
print("S_subfield: ", S_subfield)

Did not find subfield for:  801  philosophers (therefore not included in S_subfield)
Original graph S:  DiGraph with 1366 nodes and 10850 edges
S_subfield:  DiGraph with 565 nodes and 4789 edges


## Using western eastern divide list