In [237]:
import json
import re
import subprocess
from bs4 import BeautifulSoup as bs

def print_armor_piece(armor):
	print(f"{armor['name']}")
	for skill in armor['skills']:
		print(f"    {skill}: {armor['skills'][skill]}")
	print(f"    Slots:{armor['slots'][0]}-{armor['slots'][1]}-{armor['slots'][2]}")

def sanitize_skills(dirty_skills):
	skill_dictionary = dict()
	for skill in dirty_skills:
		sanitized = skill.replace('/','')
		sanitized = sanitized.replace('+',' ')
		skill_dictionary[sanitized] = skill
	return skill_dictionary

def get_armor_urls():
	try:
		fin = open(r"sets.txt","rt")
		armor_urls = fin.read()
		fin.close()
		armor_urls = armor_urls.split(',')
		armor_dictionary = {}
		for url in armor_urls:
			sanitized = url.replace('+S',' S')
			sanitized = sanitized.replace('+','')
			sanitized = sanitized.replace('/','')
			sanitized = sanitized.replace('Set','')
			sanitized = sanitized[:-1]
			armor_dictionary[sanitized] = url
		del armor_dictionary['']
		return armor_dictionary
	except FileNotFoundError:
		print("sets.txt not found.")
		armor_urls = scrape_armor_sets()
		fout = open("sets.txt", "wt")
		for url in armor_urls:
			fout.write(url)
			fout.write(',')
		fout.close()
		armor_dictionary = {}
		for url in armor_urls:
			sanitized = url.replace('+S',' S')
			sanitized = sanitized.replace('+','')
			sanitized = sanitized.replace('/','')
			sanitized = sanitized.replace('Set','')
			sanitized = sanitized[:-1]
			armor_dictionary[sanitized] = url
		del armor_dictionary['']
		return armor_dictionary

def get_skill_urls():
	try:
		fin = open(r"skills.txt","rt")
		skill_urls = fin.read()
		fin.close()
		skill_urls = skill_urls.split(',')
		skill_dictionary = {}
		for url in skill_urls:
			sanitized = url.replace('/','')
			sanitized = sanitized.replace('+',' ')
			skill_dictionary[sanitized] = url
		return skill_dictionary
	except FileNotFoundError:
		print("skills.txt not found.")
		skill_urls = scrape_skills()
		fout = open("skills.txt", "wt")
		for url in skill_urls:
			fout.write(url)
			fout.write(',')
		fout.close()
		skill_dictionary = {}
		for url in skill_urls:
			sanitized = url.replace('/','')
			sanitized = sanitized.replace('+',' ')
			skill_dictionary[sanitized] = url
		return skill_dictionary

def remove_url_txt():
	cmd = ["rm","sets.txt","skills.txt"]
	process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
	process.communicate()

def scrape_armor_sets():
	print("Scraping https://monsterhunterrise.wiki.fextralife.com/Armor+Sets and saving urls.")
	try:
		fin = open(r"monsterhunterrise.wiki.fextralife.com/Armor+Sets","rt")
		html_text = fin.read()
		fin.close()
	except FileNotFoundError:
		print("file not found")
	soup = bs(html_text, 'html.parser')
	tmp_links = soup.find_all(class_="wiki_link", title=re.compile(r"Set\b"))
	links = set()
	for link in tmp_links:
		links.add(str(link).split("\"")[3])
	return links

def scrape_skills():
	print("Scraping https://monsterhunterrise.wiki.fextralife.com/Skills and saving urls.")
	#skills_url = "https://monsterhunterrise.wiki.fextralife.com/Skills"
	#html_text = requests.get(skills_url).text
	try:
		fin = open(r"armor/monsterhunterrise.wiki.fextralife.com/Skills","rt")
		html_text = fin.read()
		fin.close()
	except FileNotFoundError:
		print("file not found")
	soup = bs(html_text, 'html.parser')
	rows = soup.find_all(class_="wiki_link",href=True,title=re.compile(r"Monster Hunter Rise"))
	i = 0
	links = set()
	for row in rows:
		if i > 4: 
			links.add(str(row).split("\"")[3])
		i += 1
	return links

def scrape_set(offset):
	base = "monsterhunterrise.wiki.fextralife.com"
	url = base + offset
	try:
		fin = open(url,"rt")
		html_text = fin.read()
		fin.close()
	except FileNotFoundError:
		print("file not found")
	soup = bs(html_text, 'html.parser')
	helm  = scrape_helm(soup)
	chest = scrape_chest(soup)
	arms  = scrape_arms(soup)
	waist = scrape_waist(soup)
	legs  = scrape_legs(soup)
	return {'helm':helm, 'chest':chest, 'arms':arms, 'waist':waist, 'legs':legs}
	
def scrape_sets(urls):
	sets = dict()
	for url in urls:
		sets[url] = scrape_set(urls[url])
	return sets

def scrape_helm(soup):
	helm = {'skills': dict()}
	helm_html = str(soup.find(title="helm-headgear-icon-monster-hunter-rise-wiki-guide").find_next("td"))
	helm_html = helm_html.split('</a>')
	for i in range(0,len(helm_html)-1):
		skill = helm_html[i].split('>')[len(helm_html[i].split('>'))-1]
		if len(skill) > 2:
			try:
				level = helm_html[i+1].split('x')[1].split('<')[0]
			except IndexError:
				level = 1
			if type(level) is int:
				helm['skills'][skill]= level
			else: 
				try:
					helm['skills'][skill]= int(level.split(' ')[0])
				except ValueError:
					try:
						helm['skills'][skill]= int(level.split(' ')[1])
					except ValueError:
						helm['skills'][skill] = int(level.split(',')[0])
	helm_html = soup.find(class_='bonfire').find_next('tbody').find_next('tr').find_next('td')
	try: helm['name'] = str(helm_html).split('href=')[1].split("\"")[1].replace('/','').replace('+',' ')
	except IndexError:
		return None
	helm_html = str(helm_html.find_next('td').find_next('td'))
	gem = helm_html.split('src="/file/Monster-Hunter-Rise/gem_level_')
	slots = [0]*3
	for i in range(1,len(gem)):
		slots[i-1] = int(gem[i][0])
	helm['slots'] = sorted(slots, reverse = True)
	return helm

def scrape_chest(soup):
	chest = {'skills': dict()}
	chest_html = str(soup.find(title="torso-chest-plate-icon-monster-hunter-rise-wiki-guide").find_next('td'))
	chest_html = chest_html.split('</a>')
	for i in range(0,len(chest_html)-1):
		skill = chest_html[i].split('>')[len(chest_html[i].split('>'))-1]
		if len(skill) > 2:
			try:
				level = chest_html[i+1].split('x')[1].split('<')[0]
			except IndexError:
				level = 1
			if type(level) is int:
				chest['skills'][skill]= level
			else: 
				try:
					chest['skills'][skill]= int(level.split(' ')[0])
				except ValueError:
					try:
						chest['skills'][skill]= int(level.split(' ')[1])
					except ValueError:
						chest['skills'][skill] = int(level.split(',')[0])

	chest_html = soup.find(class_='bonfire').find_next('tbody').find_next('tr').find_next('tr').find_next('td')
	try: chest['name'] = str(chest_html).split('href=')[1].split("\"")[1].replace('/','').replace('+',' ')
	except IndexError:
		return None
	chest_html = str(chest_html.find_next('td').find_next('td'))
	gem = chest_html.split('src="/file/Monster-Hunter-Rise/gem_level_')
	slots = [0]*3
	for i in range(1,len(gem)):
		slots[i-1] = int(gem[i][0])
	chest['slots'] = sorted(slots, reverse = True)
	return chest

def scrape_arms(soup):
	arms = {'skills': dict()}
	arms_html = str(soup.find(title="arms-gauntlets-icon-monster-hunter-rise-wiki-guide").find_next('td'))
	arms_html = arms_html.split('</a>')
	for i in range(0,len(arms_html)-1):
		skill = arms_html[i].split('>')[len(arms_html[i].split('>'))-1]
		if len(skill) > 2:
			try:
				level = arms_html[i+1].split('x')[1].split('<')[0]
			except IndexError:
				level = 1
			if type(level) is int:
				arms['skills'][skill]= level
			else: 
				try:
					arms['skills'][skill]= int(level.split(' ')[0])
				except ValueError:
					try:
						arms['skills'][skill]= int(level.split(' ')[1])
					except ValueError:
						arms['skills'][skill] = int(level.split(',')[0])
	arms_html = soup.find(class_='bonfire').find_next('tbody').find_next('tr').find_next('tr').find_next('tr').find_next('td')
	try: arms['name'] = str(arms_html).split('href=')[1].split("\"")[1].replace('/','').replace('+',' ')
	except IndexError:
		return None
	arms_html = str(arms_html.find_next('td').find_next('td'))
	gem = arms_html.split('src="/file/Monster-Hunter-Rise/gem_level_')
	slots = [0]*3
	for i in range(1,len(gem)):
		slots[i-1] = int(gem[i][0])
	arms['slots'] = sorted(slots, reverse = True)
	return arms

def scrape_waist(soup):
	waist = {'skills': dict()}
	waist_html = str(soup.find(title="waist-belt-icon-monster-hunter-rise-wiki-guide").find_next('td'))
	waist_html = waist_html.split('</a>')
	for i in range(0,len(waist_html)-1):
		skill = waist_html[i].split('>')[len(waist_html[i].split('>'))-1]
		if len(skill) > 2:
			try:
				level = waist_html[i+1].split('x')[1].split('<')[0]
			except IndexError:
				level = 1
			if type(level) is int:
				waist['skills'][skill]= level
			else: 
				try:
					waist['skills'][skill]= int(level.split(' ')[0])
				except ValueError:
					try:
						waist['skills'][skill]= int(level.split(' ')[1])
					except ValueError:
						waist['skills'][skill] = int(level.split(',')[0])
	waist_html = soup.find(class_='bonfire').find_next('tbody').find_next('tr').find_next('tr').find_next('tr').find_next('tr').find_next('td')
	try: waist['name'] = str(waist_html).split('href=')[1].split("\"")[1].replace('/','').replace('+',' ')
	except IndexError:
		return None
	waist_html = str(waist_html.find_next('td').find_next('td'))
	gem = waist_html.split('src="/file/Monster-Hunter-Rise/gem_level_')
	slots = [0]*3
	for i in range(1,len(gem)):
		slots[i-1] = int(gem[i][0])
	waist['slots'] = sorted(slots, reverse = True)
	return waist

def scrape_legs(soup):
	legs = {'skills': dict()}
	legs_html = str(soup.find(title="feet-boots-greaves-icon-monster-hunter-rise-wiki-guide").find_next('td'))
	legs_html = legs_html.split('</a>')
	for i in range(0,len(legs_html)-1):
		skill = legs_html[i].split('>')[len(legs_html[i].split('>'))-1]
		if len(skill) > 2:
			try:
				level = legs_html[i+1].split('x')[1].split('<')[0]
			except IndexError:
				level = 1
			if type(level) is int:
				legs['skills'][skill]= level
			else: 
				try:
					legs['skills'][skill]= int(level.split(' ')[0])
				except ValueError:
					try:
						legs['skills'][skill]= int(level.split(' ')[1])
					except ValueError:
						legs['skills'][skill] = int(level.split(',')[0])			
	legs_html = soup.find(class_='bonfire').find_next('tbody').find_next('tr').find_next('tr').find_next('tr').find_next('tr').find_next('tr').find_next('td')
	try: legs['name'] = str(legs_html).split('href=')[1].split("\"")[1].replace('/','').replace('+',' ')
	except IndexError:
		return None
	legs_html = str(legs_html.find_next('td').find_next('td'))
	gem = legs_html.split('src="/file/Monster-Hunter-Rise/gem_level_')
	slots = [0]*3
	for i in range(1,len(gem)):
		slots[i-1] = int(gem[i][0])
	legs['slots'] = sorted(slots, reverse = True)
	return legs

In [238]:
armor_urls = get_armor_urls()
sets = scrape_sets(armor_urls)
with open('sets.json', 'w') as f:
	json.dump(sets, f)

In [239]:
for armor_piece in sets['Nargacuga S']:
	print_armor_piece(sets['Nargacuga S'][armor_piece])

Nargacuga Helm S
    Evade Window: 1
    Slots:3-0-0
Nargacuga Mail S
    Evade Window: 1
    Critical Eye: 2
    Slots:1-1-0
Nargacuga Braces S
    Evade Window: 1
    Evade Extender: 1
    Slots:1-1-0
Nargacuga Coil S
    Evade Extender: 2
    Critical Eye: 1
    Slots:2-0-0
Nargacuga Greaves S
    Evade Window: 2
    Critical Eye: 1
    Slots:1-0-0


In [236]:
with open('sets.json') as f:
	sets = json.load(f)
for armor_piece in sets['Nargacuga S']:
	print_armor_piece(sets['Nargacuga S'][armor_piece])

Nargacuga Helm S
    Evade Window: 1
    Slots:3-0-0
Nargacuga Mail S
    Evade Window: 1
    Critical Eye: 2
    Slots:1-1-0
Nargacuga Braces S
    Evade Window: 1
    Evade Extender: 1
    Slots:1-1-0
Nargacuga Coil S
    Evade Extender: 2
    Critical Eye: 1
    Slots:2-0-0
Nargacuga Greaves S
    Evade Window: 2
    Critical Eye: 1
    Slots:1-0-0
