Skip to content

Commit 053be60

Browse files
authored
Merge pull request #2 from JJ/master
Some changes in docs and file name
2 parents c535916 + ba36e8c commit 053be60

File tree

7 files changed

+56
-10
lines changed

7 files changed

+56
-10
lines changed

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
tropescraper.egg-info/
2+
__pycache__
3+
scraper_cache/
4+
*~
5+
.cache

.travis.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
dist: xenial
2+
language: python
3+
python:
4+
- "3.6"
5+
- "3.7"
6+
- "3.6-dev" # 3.6 development branch
7+
- "3.7-dev" # 3.7 development branch
8+
install:
9+
- pip install -e .
10+
script: pytest

README.md

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,20 @@
11
# tropescraper
2-
A tropes scraper
2+
3+
A scraper for the website TV tropes.
4+
5+
6+
## Install
7+
8+
Install all dependencies with:
9+
10+
pip install -e .
11+
12+
(pip should be installed and available).
13+
14+
## Run
15+
16+
Run it with
17+
18+
bin/scrape-tvtropes
19+
20+
It will take a good while while it scrapes ~12k films.

bin/scrape-tvtropes

100644100755
Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,10 @@ import os
77
from tropescraper.tvtropes_scraper import TVTropesScraper
88

99
if len(sys.argv) != 2:
10-
command = sys.argv[0].split(os.sep)[-1]
11-
print(f'Error: Invalid usage\nPlease execute \'{command} <target_file.json>\'')
12-
sys.exit(1)
13-
14-
file_name = sys.argv[1]
10+
file_name = "tvtropes.json"
11+
else:
12+
13+
file_name = sys.argv[1]
1514

1615
logging.basicConfig(level=logging.INFO)
1716
scraper = TVTropesScraper()

tropescraper/cache_information.py

Lines changed: 0 additions & 3 deletions
This file was deleted.
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import unittest
2+
from tropescraper.web_page_retriever import WebPageRetriever
3+
4+
class TestWebPageRetriever(unittest.TestCase):
5+
6+
def setUp(self):
7+
self.scraper = WebPageRetriever(0.5,"https://tvtropes.org/pmwiki/pmwiki.php/Film/FantasticBeastsAndWhereToFindThem","/tmp")
8+
9+
def test_class(self):
10+
self.assertIsInstance( self.scraper, WebPageRetriever, "Correct class" )
11+
12+
def test_retrieve(self):
13+
content = self.scraper.retrieve()
14+
self.assertNotEqual( content, "", "Retrieves something")
15+
content2 = self.scraper.retrieve()
16+
self.assertEqual( content, content2, "Retrieves from cache")

tropescraper/web_page_retriever.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@
44
import os
55
from datetime import datetime
66
from time import sleep, ctime
7+
from collections import namedtuple
78

89
import requests
910

10-
from tropescraper.cache_information import CacheInformation
11+
CacheInformation = namedtuple('CacheInformation', ['size', 'files_count', 'creation_date'])
1112

1213

1314
class WebPageRetriever(object):

0 commit comments

Comments
 (0)