|
| 1 | +import os |
| 2 | +import time |
| 3 | +import urllib |
| 4 | +import requests |
| 5 | +from urllib.parse import quote |
| 6 | +import array as arr |
| 7 | + |
| 8 | + |
| 9 | +class simple_image_download: |
| 10 | + def __init__(self): |
| 11 | + pass |
| 12 | + |
| 13 | + def urls(self, keywords, limit): |
| 14 | + keyword_to_search = [str(item).strip() for item in keywords.split(',')] |
| 15 | + i = 0 |
| 16 | + links = [] |
| 17 | + while i < len(keyword_to_search): |
| 18 | + url = 'https://www.google.com/search?q=' + quote( |
| 19 | + keyword_to_search[i].encode( |
| 20 | + 'utf-8')) + '&biw=1536&bih=674&tbm=isch&sxsrf=ACYBGNSXXpS6YmAKUiLKKBs6xWb4uUY5gA:1581168823770&source=lnms&sa=X&ved=0ahUKEwioj8jwiMLnAhW9AhAIHbXTBMMQ_AUI3QUoAQ' |
| 21 | + raw_html = self._download_page(url) |
| 22 | + |
| 23 | + end_object = -1 |
| 24 | + |
| 25 | + j = 0 |
| 26 | + while j < limit: |
| 27 | + while (True): |
| 28 | + try: |
| 29 | + new_line = raw_html.find('"https://', end_object + 1) |
| 30 | + end_object = raw_html.find('"', new_line + 1) |
| 31 | + |
| 32 | + buffor = raw_html.find('\\', new_line + 1, end_object) |
| 33 | + if buffor != -1: |
| 34 | + object_raw = (raw_html[new_line + 1:buffor]) |
| 35 | + else: |
| 36 | + object_raw = (raw_html[new_line + 1:end_object]) |
| 37 | + |
| 38 | + if '.jpg' in object_raw or 'png' in object_raw or '.ico' in object_raw or '.gif' in object_raw or '.jpeg' in object_raw: |
| 39 | + break |
| 40 | + |
| 41 | + except Exception as e: |
| 42 | + print(e) |
| 43 | + break |
| 44 | + |
| 45 | + links.append(object_raw) |
| 46 | + j += 1 |
| 47 | + |
| 48 | + i += 1 |
| 49 | + return (links) |
| 50 | + |
| 51 | + def download(self, keywords, limit): |
| 52 | + keyword_to_search = [str(item).strip() for item in keywords.split(',')] |
| 53 | + main_directory = "simple_images/" |
| 54 | + i = 0 |
| 55 | + |
| 56 | + while i < len(keyword_to_search): |
| 57 | + self._create_directories(main_directory, keyword_to_search[i]) |
| 58 | + url = 'https://www.google.com/search?q=' + quote( |
| 59 | + keyword_to_search[i].encode('utf-8')) + '&biw=1536&bih=674&tbm=isch&sxsrf=ACYBGNSXXpS6YmAKUiLKKBs6xWb4uUY5gA:1581168823770&source=lnms&sa=X&ved=0ahUKEwioj8jwiMLnAhW9AhAIHbXTBMMQ_AUI3QUoAQ' |
| 60 | + raw_html = self._download_page(url) |
| 61 | + |
| 62 | + end_object = -1 |
| 63 | + |
| 64 | + j = 0 |
| 65 | + while j < limit: |
| 66 | + while (True): |
| 67 | + try: |
| 68 | + new_line = raw_html.find('"https://', end_object + 1) |
| 69 | + end_object = raw_html.find('"', new_line + 1) |
| 70 | + |
| 71 | + buffor = raw_html.find('\\', new_line + 1, end_object) |
| 72 | + if buffor != -1: |
| 73 | + object_raw = (raw_html[new_line+1:buffor]) |
| 74 | + else: |
| 75 | + object_raw = (raw_html[new_line+1:end_object]) |
| 76 | + |
| 77 | + if '.jpg' in object_raw or 'png' in object_raw or '.ico' in object_raw or '.gif' in object_raw or '.jpeg' in object_raw: |
| 78 | + break |
| 79 | + |
| 80 | + except Exception as e: |
| 81 | + print(e) |
| 82 | + break |
| 83 | + |
| 84 | + path = main_directory + keyword_to_search[i] |
| 85 | + |
| 86 | + # print(object_raw) |
| 87 | + |
| 88 | + if not os.path.exists(path): |
| 89 | + os.makedirs(path) |
| 90 | + |
| 91 | + filename = str(keyword_to_search[i]) + "_" + str(j + 1) + ".jpg" |
| 92 | + |
| 93 | + try: |
| 94 | + r = requests.get(object_raw, allow_redirects=True) |
| 95 | + open(os.path.join(path, filename), 'wb').write(r.content) |
| 96 | + except Exception as e: |
| 97 | + print(e) |
| 98 | + j -= 1 |
| 99 | + j += 1 |
| 100 | + |
| 101 | + i += 1 |
| 102 | + |
| 103 | + def _create_directories(self, main_directory, name): |
| 104 | + try: |
| 105 | + if not os.path.exists(main_directory): |
| 106 | + os.makedirs(main_directory) |
| 107 | + time.sleep(0.2) |
| 108 | + path = (name) |
| 109 | + sub_directory = os.path.join(main_directory, path) |
| 110 | + if not os.path.exists(sub_directory): |
| 111 | + os.makedirs(sub_directory) |
| 112 | + else: |
| 113 | + path = (name) |
| 114 | + sub_directory = os.path.join(main_directory, path) |
| 115 | + if not os.path.exists(sub_directory): |
| 116 | + os.makedirs(sub_directory) |
| 117 | + |
| 118 | + except OSError as e: |
| 119 | + if e.errno != 17: |
| 120 | + raise |
| 121 | + pass |
| 122 | + return |
| 123 | + |
| 124 | + def _download_page(self, url): |
| 125 | + |
| 126 | + try: |
| 127 | + headers = {} |
| 128 | + headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36" |
| 129 | + req = urllib.request.Request(url, headers=headers) |
| 130 | + resp = urllib.request.urlopen(req) |
| 131 | + respData = str(resp.read()) |
| 132 | + return respData |
| 133 | + |
| 134 | + except Exception as e: |
| 135 | + print(e) |
| 136 | + exit(0) |
0 commit comments