Skip to content

Commit

Permalink
add filters
Browse files Browse the repository at this point in the history
  • Loading branch information
philipperemy committed Sep 3, 2018
1 parent 339d5b6 commit 5e44a40
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 6 deletions.
16 changes: 14 additions & 2 deletions download.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import os
import sys
from time import sleep

import os
import pexpect
import shutil
from glob import glob
from pexpect.exceptions import ExceptionPexpect

PERSISTENCE_FILENAME = 'persistence.txt'

KEYWORDS_TO_FILTER_OUT = ['album complet', 'compil', 'full album', 'compilation', 'full ep', 'full']


def get_music(name='Linkin Park papercut'):
child = pexpect.spawn('instantmusic')
Expand Down Expand Up @@ -42,7 +46,15 @@ def run(song_filename, output_folder):
printable_music = music.strip()
print('Downloading {0}'.format(printable_music))
get_music(printable_music)
os.system('mv *.mp3 {}'.format(output_folder))

# print(glob('*.mp3'))
for mp3_music in glob('*.mp3'):
for keyword_to_filter in KEYWORDS_TO_FILTER_OUT:
if keyword_to_filter.lower() in mp3_music.lower():
print('Music filtered {}.'.format(mp3_music))
os.remove(mp3_music)
continue
shutil.move(mp3_music, output_folder + '/')
break
except ExceptionPexpect: # also check pexpect.exceptions.TIMEOUT: Timeout exceeded.
num_attempts += 1
Expand Down
9 changes: 5 additions & 4 deletions list_songs/music_names_from_billboard.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
# http://billboardtop100of.com/1989-2/
import string

import requests
import string
from bs4 import BeautifulSoup

printable = set(string.printable)

music_names = open('music.txt', 'w')
output_filename = 'music.txt'
print('Writing to {}.'.format(output_filename))
music_names = open(output_filename, 'w')

for year in range(1995, 2017):

if year == 2013:
continue

print(year)
print('Downloading for {}.'.format(year))
response = requests.get('http://billboardtop100of.com/{0}-2/'.format(year))
assert response.status_code == 200
soup = BeautifulSoup(response.content, 'lxml')
Expand Down

0 comments on commit 5e44a40

Please sign in to comment.