-
Notifications
You must be signed in to change notification settings - Fork 0
/
urlloader.py
115 lines (80 loc) · 2.23 KB
/
urlloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
'''
define connections for http downloading
'''
from user import User
import urllib
class UrlLoader:
GLOBAL_ID = 0
def __init__(self, url = '', user = None, proxy = {}):
self._id = UrlLoader.GLOBAL_ID
UrlLoader.GLOBAL_ID += 1
self.setUrl(url)
self._content = '' # may not be used
self._user = user
self._cookie = None
self._loadFinished = False
self._proxy = proxy
def setUrl(self, url):
self._url = url
def getUrl(self):
return self._url
def getUser(self):
return self._user
def setUser(self, user):
self._user = user
def getContent(self):
return self._content
def getCookie(self):
return self._cookie
def addProxy(self, key, value):
if key == None:
return
self._proxy[key] = value
def getProxy(self):
return self._proxy
def loginByUser(self, user, url = None):
self._user = user
if url != None:
self.setUrl(url)
return True
def _downloadUrl(self, url, cookie):
return True
def load(self, url = None, user = None):
if url != None:
self.setUrl(url)
if user == None:
user = self.getUser()
if self.loginByUser(user, self.getUrl()) == False:
return False
self._content = self._downloadUrl(self.getUrl(), self.getCookie())
return True
def downloadAsFile(self, url, target):
if url == None:
if self.getUrl() == None:
return False
else:
url = self.getUrl()
if target == None:
return False
rfp = None
fp = None
res = True
try:
rfp = urllib.urlopen(url, proxies = self.getProxy())
fp = open(target, 'w')
for line in rfp:
fp.write(line)
fp.close()
rfp.close()
except:
res = False
finally:
if fp != None:
fp.close()
if rfp != None:
rfp.close()
return res
if __name__ == '__main__':
l = UrlLoader()
l2 = UrlLoader()
l3 = UrlLoader()