@@ -0,0 +1,60 @@
from bs4 import BeautifulSoup
import requests
import re

# Please run this main func
def scrapeLyrics(band, song):
band_ = band.replace(" ", "-").replace("'","%27")
song_ = song.replace(" ", "-").replace("'","%27")
url = "http://lyrics.wikia.com/wiki/"+band_+":"+song_
lyricParse = urlParse(url)
if lyricParse == "Error in band or song":
return findSearch(band, song)
return urlParse(url)

# Helper func for secondary search
def findSearch(band, song):
error = "Error in band or song"
band_ = band.replace(" ", "+").replace("'","%27").replace(":","%3A")
song_ = song.replace(" ", "+").replace("'","%27").replace(":","%3A")
url = "http://lyrics.wikia.com/wiki/Special:Search?search="+band_+"%3A"+song_+"&fulltext=Search&ns0=1#"

r = requests.get(url)
data = r.text
soup = BeautifulSoup(data, 'html.parser')

if soup.find("p", class_="no-result"):
return error

article = soup.find("article")
if "song" not in article.text:
return error

href = soup.find("a", class_="result-link")['href']
if "http" in href:
return urlParse(href)
return error

# Parses lyric page text
def urlParse(url):
r = requests.get(url)
data = r.text
soup = BeautifulSoup(data, 'html.parser')
soup = soup.find("div", class_="lyricbox")
if soup == None:
return "Error in band or song"

for br in soup.find_all("br"):
br.replace_with("\n")
return soup.text

def scrapeFromFile(f):
fout = open('output.txt', 'w', encoding="utf-8")
with open(f, "r", encoding="utf-8") as doc:
for line in doc:
band, song = line.split(";")
band = band.strip()
song = song.strip()
fout.write(scrapeLyrics(band, song).replace('\n', " "))
fout.write("\n")
fout.close()