# Community Extraction

In [1]:
import pickle
import os
import networkx as nx
import re
DOWNLOADS_DIR = "downloads"

In [2]:
# load graph:
S = pickle.load(open("graph.pkl", "rb"))

## Using `region`

### Creating `S_region`
Finds region in wikitext downloaded and adds it to the network (if multiple exist it takes the first region)

In [None]:
S_subfield = S.copy()

region_pattern = r'\|\s*region\s*=\s*\[\[([^\]]+)\]\]'
nodes_to_remove = []  # Collect nodes to remove

for node in list(S_subfield.nodes):
	node_file_name = DOWNLOADS_DIR + "/" + node + ".txt"
	if os.path.exists(node_file_name):
		try:
			with open(node_file_name, 'r', encoding='utf-8') as f:
				text = f.read()
				matches = re.search(region_pattern, text)
				if matches:
					# Extract all individual regions from the matched string
					regions = re.findall(r'\[\[([^\]]+)\]\]', matches.group(0))
					S_subfield.nodes[node]['region'] = regions[0]
				else:
					nodes_to_remove.append(node)  # Mark node for removal
		except Exception as e:
			print(f"Error processing node {node}: {e}")
	else:
		nodes_to_remove.append(node)  # Mark node for removal if file does not exist
S_subfield.remove_nodes_from(nodes_to_remove)

In [None]:
print("Original graph: ", S)
print("Region graph: ", S_subfield)
print(list(S_subfield.nodes(data=True))[143])

Original graph:  DiGraph with 1366 nodes and 10850 edges
Region graph:  DiGraph with 557 nodes and 4415 edges
('Anaximander', {'contentlength': 7671, 'region': 'Western philosophy'})


### Generating the communities

In [None]:
region_partition = {}

for node, data in S_subfield.nodes(data=True):
		genre = data.get('region')
		if genre not in region_partition:
			region_partition[genre] = []
		region_partition[genre].append(node)

print("Region communities with length:")
for genre, nodes in region_partition.items():
		print(f"Region: {genre}, Length: {len(nodes)}")

Only western philosophy is the big one.

## (B) Using `subfield`
https://en.wikipedia.org/wiki/Lists_of_philosophers

### (B.1) Extracting subfield information

First we fetch the data from Wikipedia 
(takes around 5 seconds)

In [80]:
from wiki_utils import getJsonResponse, findLinks

subfield_links = [
		"List of aestheticians",
		"List of critical theorists",
		"List of environmental philosophers",
		"List of epistemologists",
		"List of ethicists",
		"List of existentialists",
		"List of feminist philosophers",
		"List of secular humanists",
		"List of logicians",
		"List of metaphysicians",
		"Index of sociopolitical thinkers", #List of Social and Political Philosophers redirect
		"List of phenomenologists",
		"List of philosophers of language",
		"List of philosophers of mind",
		"List of philosophers of religion",
		"List of philosophers of science",
		"List of political philosophers",
		"List of political theorists",
		"List of rationalists",
		"List of utilitarians"
]
subfield_data = {}

for subfield_list in subfield_links:
	wiki_markup = getJsonResponse(subfield_list)
	philosopher_list = findLinks(wiki_markup)
	if subfield_list not in subfield_data:
			subfield_data[subfield_list] = []
	subfield_data[subfield_list].append(philosopher_list)

# Flatten the list of lists for each subfield
for subfield_list in subfield_data:
		subfield_data[subfield_list] = [link for sublist in subfield_data[subfield_list] for link in sublist]

We then clean the data fetched

In [None]:
clean_subfield_data = {}
for subfield in subfield_data:
		cleaned_subfield = subfield.replace("List of", "").strip()
		cleaned_subfield = cleaned_subfield.replace("Index of", "").strip()
		clean_subfield_data[cleaned_subfield] = subfield_data[subfield]


total_philosophers = 0
verbose = False
if (verbose):
	print("Subfield: Amount of philosophers categorized in this subfield")
	print("(Multiple subfields for a philosopher is possible)")
	print("-"*50)


philosopher_subfield_info = {} # Converting the subfield info to a map of philosophers to subfields
for subfield, philosopher_list in clean_subfield_data.items():
	if verbose:
		print(f"- {subfield}: {len(philosopher_list)}")
	for philosopher in philosopher_list:
		if philosopher not in philosopher_subfield_info:
			philosopher_subfield_info[philosopher] = []
		philosopher_subfield_info[philosopher].append(subfield)

print("Total categorized philosophers =",  len(philosopher_subfield_info))
print(" (assuming a link in the subfield list = a philosopher)")		

Total categorized philosophers = 2183
 (assuming a link in the subfield list = a philosopher)


### (B.2) Adding `subfield` info to network

In [78]:
S_subfield = S.copy()

nodes_to_remove = []  # Collect nodes to remove

for node in list(S_subfield.nodes):
	if node in philosopher_subfield_info:
		S_subfield.nodes[node]['subfields'] = philosopher_subfield_info[node]
	else:
		nodes_to_remove.append(node)  # Mark node for removal if no subfield info

S_subfield.remove_nodes_from(nodes_to_remove)
print("Did not find subfield for: ", len(nodes_to_remove), " philosophers (therefore not included in S_subfield)")
print(" -> Example of removed nodes: ", list(nodes_to_remove[0:3]))
print("Original amount of philosophers in S: ", len(S.nodes))
print("Modified amount in S_subfield: ", len(S_subfield.nodes))

Did not find subfield for:  771  philosophers (therefore not included in S_subfield)
 -> Example of removed nodes:  ['Georgy_Shchedrovitsky', 'James_McCosh', 'Zeno_of_Sidon']
Original amount of philosophers in S:  1366
Modified amount in S_subfield:  595


### (B.3)

## Using western eastern divide list

In [None]:
from wiki_utils import getJsonResponse, findLinks

western_eastern_links = [
	"Timeline of Eastern philosophers",
	"Timeline of Western philosophers"
]

western_eastern_data = {}
for western_eastern_list in western_eastern_links:
	wiki_markup = getJsonResponse(western_eastern_list)
	philosopher_list = findLinks(wiki_markup)
	if western_eastern_list not in western_eastern_data:
			western_eastern_data[western_eastern_list] = []
	western_eastern_data[western_eastern_list].append(philosopher_list)

for western_eastern_list in western_eastern_data:
		western_eastern_data[western_eastern_list] = [link for sublist in western_eastern_data[western_eastern_list] for link in sublist]

{'Timeline of Eastern philosophers': [['Sai_Baba_of_Shirdi', 'Chaitanya_Mahaprabhu', 'totalistic_legalism', 'ISKCON', 'Mūlamadhyamakakārikā', 'Jeong_Yak-yong', 'Seungrang', 'Brajendranath_Seal', 'Anandavardhana', 'Sri_Aurobindo', 'Jizang', 'Zhou_dynasty', 'Krishna_Chandra_Bhattacharya', 'Zhi_Dun', 'Gangesha_Upadhyaya', 'Nisargadatta_Maharaj', 'Zhu_Xi', 'Nagarjuna', 'Zhaozhou_Congshen', 'Kuki_Shūzō', 'Mou_Tsung-san', 'Basaveshwara', 'Kim_Jeong-hui', 'Choi_Je-u', 'Jiao_Hong', 'Tang_dynasty', 'Nichiren', 'Indian_atomism', 'Legalism_(Chinese_philosophy)', 'Agastya', 'Umasvati', 'Xiong_Shili', 'Ogyū_Sorai', 'Atiśa', 'Hōnen', 'Kundakunda', 'Chang_Tsai', 'Cārvāka', 'Mozi', 'Ji_Kang', 'Gadadhara_Bhattacharya', 'Nolini_Kanta_Gupta', 'Bhartrhari', 'Xuanzang', 'Yajnavalkya', 'Sun_Yat-sen', 'Bharadwaja', 'Woncheuk', 'Kang_Youwei', 'Nyaya', 'Rabindranath_Tagore', 'Gorampa', 'Parshvanatha', 'Vasishtha', 'Three_Kingdoms', 'Buddhaghosa', 'Dai_Zhen', 'Pandurang_Shastri_Athavale', 'Kabir', 'Abhinavagupt