-
Notifications
You must be signed in to change notification settings - Fork 19
/
clone.py
76 lines (65 loc) · 1.52 KB
/
clone.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/python
import urllib2
import sys
import socket
import os
socket.setdefaulttimeout(15)
dataTypesToDownload = [".jpg", ".png", ".ico", ".css", ".js"]
if len(sys.argv) == 1:
url = raw_input("URL of site to clone: ")
else:
url = sys.argv[1]
if "http://" not in url and "https://" not in url:
url = "http://"+url
try:
os.mkdir("clone")
except OSError:
pass
file = open("clone/index.html", "w")
try:
content = urllib2.urlopen(url).read()
except urllib2.URLError as e:
print "An error occured: " + str(e.reason)
exit()
resources = content.split("=\"")
first = False
for resource in resources:
if first == False:
first = True
continue
resource = resource.split("\"")[0]
if any(s in resource for s in dataTypesToDownload):
print "Downloading " + resource
try:
path = resource.split("/")
if len(path) != 1:
path.pop(len(path) - 1)
trail = "./clone/"
for folder in path:
trail += folder+"/"
try:
os.mkdir(trail)
except OSError:
pass
except IOError:
pass
try:
if "?" in resource:
download = open("clone/"+resource.split("?")[len(resource.split("?")) - 2], "w")
else:
download = open("clone/"+resource, "w")
print url+"/"+resource
dContent = urllib2.urlopen(url+"/"+resource).read()
except urllib2.URLError as e:
print "An error occured: " + str(e.reason)
download.close()
continue
except IOError:
pass
continue
download.write(dContent)
download.close()
print "Downloaded!"
file.write(content)
print "Cloned "+url+" !"
file.close()