In [1]:
import pickle
import os
import re
from wiki_utils import get_category_data
from network_utils import filter_graph_by_attribute, count_nodes_by_attribute

DOWNLOADS_DIR = "downloads"

# load graph:
S = pickle.load(open("graph.pkl", "rb"))

# Extracting Communities from Wikipedia

## (A) Using `region` (Not used)

### Creating `S_region`
Finds region in wikitext downloaded and adds it to the network (if multiple exist it takes the first region)

In [None]:
S_subfields = S.copy()

region_pattern = r'\|\s*region\s*=\s*\[\[([^\]]+)\]\]'
nodes_to_remove = []  # Collect nodes to remove

for node in list(S_subfields.nodes):
	node_file_name = DOWNLOADS_DIR + "/" + node + ".txt"
	if os.path.exists(node_file_name):
		try:
			with open(node_file_name, 'r', encoding='utf-8') as f:
				text = f.read()
				matches = re.search(region_pattern, text)
				if matches:
					# Extract all individual regions from the matched string
					regions = re.findall(r'\[\[([^\]]+)\]\]', matches.group(0))
					S_subfields.nodes[node]['region'] = regions[0]
				else:
					nodes_to_remove.append(node)  # Mark node for removal
		except Exception as e:
			print(f"Error processing node {node}: {e}")
	else:
		nodes_to_remove.append(node)  # Mark node for removal if file does not exist
S_subfields.remove_nodes_from(nodes_to_remove)

In [None]:
print("Original graph: ", S)
print("Region graph: ", S_subfields)
print(list(S_subfields.nodes(data=True))[143])

Original graph:  DiGraph with 1366 nodes and 10850 edges
Region graph:  DiGraph with 557 nodes and 4415 edges
('Anaximander', {'contentlength': 7671, 'region': 'Western philosophy'})


### Generating the communities

In [None]:
region_partition = {}

for node, data in S_subfields.nodes(data=True):
		genre = data.get('region')
		if genre not in region_partition:
			region_partition[genre] = []
		region_partition[genre].append(node)

print("Region communities with length:")
for genre, nodes in region_partition.items():
		print(f"Region: {genre}, Length: {len(nodes)}")

Region communities with length:
Region: Western philosophy, Length: 496
Region: Russian philosophy, Length: 10
Region: Persia, Length: 2
Region: Western Philosophy, Length: 3
Region: Western philosophy|Western, Length: 1
Region: Jewish philosophy, Length: 2
Region: East Asian philosophy, Length: 1
Region: Indian philosophy, Length: 1
Region: Greater Iran|Persia, Length: 1
Region: Eastern philosophy, Length: 5
Region: Chinese philosophy, Length: 6
Region: British Unitarianism, Length: 1
Region: Africana philosophy, Length: 1
Region: Middle Eastern philosophy, Length: 4
Region: Teng (state)|Teng, Length: 1
Region: Mithila region, Length: 1
Region: Middle East, Length: 1
Region: Greek philosophy, Length: 1
Region: Islamic philosophy, Length: 3
Region: Byzantine Empire, Length: 1
Region: Roman Egypt, Length: 1
Region: Spanish philosophy, Length: 1
Region: Al-Andalus, Length: 1
Region: African philosophy, Length: 1
Region: Ancient Greek philosophy, Length: 1
Region: German philosophy, Lengt

Only western philosophy is the big one.

## (B) Using `subfield`
https://en.wikipedia.org/wiki/Lists_of_philosophers

### (B.1) Extracting subfield information

- Option 1: **Fetch the data from Wikipedia** (takes around 5 seconds)

In [11]:
subfield_links = [
		"List of aestheticians",
		"List of critical theorists",
		"List of environmental philosophers",
		"List of epistemologists",
		"List of ethicists",
		"List of existentialists",
		"List of feminist philosophers",
		"List of secular humanists", #List of Humanists redirect
		"List of logicians",
		"List of metaphysicians",
		"Index of sociopolitical thinkers", #List of Social and Political Philosophers redirect
		"List of phenomenologists",
		"List of philosophers of language",
		"List of philosophers of mind",
		"List of philosophers of religion",
		"List of philosophers of science",
		"List of political philosophers",
		"List of political theorists",
		"List of rationalists",
		"List of utilitarians"
]
replace_terms_subfields = [("List of", ""), ("Index of", "")]
clean_subfield_data, philosopher_subfield_info = get_category_data(
    subfield_links, replace_terms_subfields, verbose=True
)
pickle.dump(philosopher_subfield_info, open("philosopher_subfield_info.pkl", "wb")) # Saved as local version for later use

print("Total =", len(philosopher_subfield_info))

- aestheticians: 86
- critical theorists: 107
- environmental philosophers: 80
- epistemologists: 122
- ethicists: 287
- existentialists: 60
- feminist philosophers: 134
- secular humanists: 413
- logicians: 291
- metaphysicians: 184
- sociopolitical thinkers: 245
- phenomenologists: 24
- philosophers of language: 141
- philosophers of mind: 140
- philosophers of religion: 110
- philosophers of science: 111
- political philosophers: 199
- political theorists: 137
- rationalists: 27
- utilitarians: 104
Total = 2183


- Option 2: **Use local pickle file** from last time option 1 was run

In [12]:
philosopher_subfield_info = pickle.load(open("philosopher_subfield_info.pkl", "rb"))

### (B.2) Creating `S.subfields` network

In [20]:
S_subfields = filter_graph_by_attribute(
    graph=S,
    attribute_info=philosopher_subfield_info,
    attribute_name='subfields',
    verbose=True
)
print("Subfield graph: ", S_subfields)
print("-"*50)
count_nodes_by_attribute(S_subfields, 'subfields', verbose=True)
print("-"*50)


Did not find subfields for: 771 philosophers (therefore not included in the filtered graph)
 -> Example of removed nodes: ['John_Selden', 'John_Maynard_Keynes', 'Thomas_of_York_(Franciscan)']
Original graph: 1366 nodes and 10855 edges
Filtered 'subfields' graph: 595 nodes and 5211 edges
Subfield graph:  DiGraph with 595 nodes and 5211 edges
--------------------------------------------------
Number of nodes by subfields:
 -> epistemologists: 78
 -> ethicists: 115
 -> philosophers of religion: 49
 -> logicians: 89
 -> metaphysicians: 118
 -> philosophers of language: 68
 -> sociopolitical thinkers: 131
 -> philosophers of mind: 57
 -> utilitarians: 29
 -> political philosophers: 135
 -> aestheticians: 51
 -> existentialists: 7
 -> feminist philosophers: 23
 -> political theorists: 73
 -> philosophers of science: 45
 -> critical theorists: 41
 -> secular humanists: 34
 -> phenomenologists: 11
 -> rationalists: 17
 -> environmental philosophers: 8
------------------------------------------

## (C) Using `tradition` (western/eastern divide)

### (C.1) Extracting information from wikipedia

- Option 1: Extract from wikipedia

In [14]:
tradition_links = [
	"Timeline of Eastern philosophers",
	"Timeline of Western philosophers"
]

replace_terms_traditions = [("Timeline of", "")]
clean_tradition_data, philosopher_tradition_info = get_category_data(
    tradition_links, replace_terms_traditions, verbose=True
)
pickle.dump(philosopher_tradition_info, open("philosopher_tradition_info.pkl", "wb")) # Saved as local version for later use
print("Total =", len(philosopher_tradition_info))

- Eastern philosophers: 343
- Western philosophers: 565
Total = 893


- Option 2: **Use local pickle file** from last time option 1 was run

In [15]:
philosopher_tradition_info = pickle.load(open("philosopher_tradition_info.pkl", "rb"))

### (C.2) Creating `S_tradition` network

In [16]:
S_tradition = filter_graph_by_attribute(
    graph=S,
    attribute_info=philosopher_tradition_info,
    attribute_name='tradition',
    verbose=True
)

print("-"*50)
count_nodes_by_attribute(S_tradition, 'tradition', verbose=True)
print("-"*50)

Did not find tradition for: 1020 philosophers (therefore not included in the filtered graph)
 -> Example of removed nodes: ['John_Selden', 'John_Maynard_Keynes', 'Thomas_of_York_(Franciscan)']
Original graph: 1366 nodes and 10855 edges
Filtered 'tradition' graph: 346 nodes and 3202 edges
--------------------------------------------------
Number of nodes by tradition:
 -> Western philosophers: 279
 -> Eastern philosophers: 69
--------------------------------------------------


# Partitioning

## Imports and definitions

In [None]:
# Install community package if not already installed
# pip install community

In [48]:
from community import community_louvain

def create_partition_using_attribute(graph, attribute_name):
	partition = {}
	for node, data in graph.nodes(data=True):
		attribute = data.get(attribute_name)
		if isinstance(attribute, list):
			attribute = attribute[0]
		if attribute not in partition:
				partition[attribute] = []
		partition[attribute].append(node)
	return partition

ImportError: cannot import name 'community_louvain' from 'community' (/Users/kevinmoore/anaconda3/lib/python3.11/site-packages/community/__init__.py)

## `S_subfields`

Each node is characterized by the first `subfield` if there are multiple.

In [43]:
subfields_partition = create_partition_using_attribute(S_subfields, 'subfields')

## `S_tradition`

In [45]:
tradition_partition = create_partition_using_attribute(S_tradition, 'tradition')