Merge pull request avinashkranjan#1400 from keenborder786/master

avinashkranjan · web-flow · commit 2c0cebb82799 · 2023-05-04T02:24:20.000+05:30
Google Image Scrapper
diff --git a/Google-Image-Scrapper/Dockerfile b/Google-Image-Scrapper/Dockerfile
@@ -0,0 +1,18 @@
+FROM docker.io/condaforge/mambaforge@sha256:a119fe148b8a276397cb7423797f8ee82670e64b071dc39c918b6c3513bd0174
+
+RUN bin/bash
+EXPOSE 5000
+## Creating the new conda environment with the desired packages using mamba
+WORKDIR /opt
+COPY environment.yml .
+RUN mamba env create -f environment.yml
+RUN echo "conda activate amazing_python_script" >> ~/.bashrc
+
+# COPYING THE RELEVANT FILES
+COPY static /opt/static
+COPY templates /opt/templates
+COPY main.py /opt/main.py
+COPY scrapper.py /opt/scrapper.py
+
+# Starting the server
+ENTRYPOINT ["/opt/conda/envs/amazing_python_script/bin/python","-u", "/opt/main.py"]
diff --git a/Google-Image-Scrapper/README.md b/Google-Image-Scrapper/README.md
@@ -0,0 +1,18 @@
+# Google Image Scrapper
+
+![](http://ForTheBadge.com/images/badges/made-with-python.svg)
+
+
+
+## You will need docker to run the application
+
+
+## Run the following command to run the application
+
+```console
+docker build --tag google_image:1.0 .
+docker run --name google_image_flask -p 8000:8000 -v ~/simple_images:/opt/simple_images google_image:1.0
+```
+- Your downloaded images will be at ~/simple_images
+
+***The real craft is scrapper.py module which can be engineered according to your use case***
diff --git a/Google-Image-Scrapper/environment.yml b/Google-Image-Scrapper/environment.yml
@@ -0,0 +1,63 @@
+name: amazing_python_script
+channels:
+  - conda-forge
+  - defaults
+  - pypi
+dependencies:
+  - _libgcc_mutex=0.1
+  - _openmp_mutex=4.5
+  - bzip2=1.0.8
+  - ca-certificates=2022.12.7
+  - ld_impl_linux-64=2.40
+  - libffi=3.4.2
+  - libgcc-ng=12.2.0
+  - libgomp=12.2.0
+  - libnsl=2.0.0
+  - libsqlite=3.40.0
+  - libuuid=2.38.1
+  - libzlib=1.2.13
+  - ncurses=6.3
+  - openssl=3.1.0
+  - pip=23.1.2
+  - python=3.9.16
+  - readline=8.2
+  - setuptools=67.7.2
+  - tk=8.6.12
+  - tzdata=2023c
+  - wheel=0.40.0
+  - xz=5.2.6
+  - pip:
+      - async-generator==1.10
+      - attrs==23.1.0
+      - blinker==1.6.2
+      - certifi==2022.12.7
+      - charset-normalizer==3.1.0
+      - click==8.1.3
+      - dominate==2.7.0
+      - exceptiongroup==1.1.1
+      - flask==2.3.1
+      - flask-bootstrap==3.3.7.1
+      - flask-modals==0.5.1
+      - flask-wtf==1.1.1
+      - google-images-download==2.8.0
+      - h11==0.14.0
+      - idna==3.4
+      - importlib-metadata==6.6.0
+      - itsdangerous==2.1.2
+      - jinja2==3.1.2
+      - markupsafe==2.1.2
+      - outcome==1.2.0
+      - pysocks==1.7.1
+      - requests==2.29.0
+      - selenium==4.9.0
+      - simple-image-download==0.2
+      - sniffio==1.3.0
+      - sortedcontainers==2.4.0
+      - trio==0.22.0
+      - trio-websocket==0.10.2
+      - urllib3==1.26.15
+      - visitor==0.1.3
+      - werkzeug==2.3.2
+      - wsproto==1.2.0
+      - wtforms==3.0.1
+      - zipp==3.15.0
diff --git a/Google-Image-Scrapper/main.py b/Google-Image-Scrapper/main.py
@@ -0,0 +1,42 @@
+from flask import Flask, render_template, request, flash, redirect, send_from_directory
+from scrapper import simple_image_download
+from flask_bootstrap import Bootstrap
+from flask_wtf import FlaskForm
+from wtforms import StringField, SubmitField, IntegerField, SelectField
+from wtforms.validators import DataRequired, Email
+import os
+
+app = Flask(__name__, template_folder='templates')
+response = simple_image_download()
+app.secret_key = 'tO$&!|0wkamvVia0?n$NqIRVWOG'
+bootstrap = Bootstrap(app)
+downloaded = [False]
+image_request = {'name': '', 'number_of_images': 0}
+
+
+class ImageForm(FlaskForm):
+    name = StringField('name', validators=[DataRequired()])
+    number_of_images = IntegerField('number_of_images', validators=[DataRequired()])
+    submit = SubmitField('Submit')
+
+
+@app.route('/', methods=['GET', 'POST'])
+def index():
+    form = ImageForm()
+    if form.validate_on_submit():
+        image_request['name'] = request.form['name']
+        image_request['number_of_images'] = request.form['number_of_images']
+        flash('Your images are being downloaded. Please wait.')
+        downloaded[0] = True
+        return redirect('/')
+
+    if downloaded[0]:
+        response.download(image_request['name'], int(image_request['number_of_images']))
+        flash('All of your images have been downloaded')
+        downloaded[0] = False
+        return redirect('/')
+    return render_template('index.html', form=form)
+
+
+if __name__ == '__main__':
+    app.run(host="0.0.0.0", port=8000)
diff --git a/Google-Image-Scrapper/scrapper.py b/Google-Image-Scrapper/scrapper.py
@@ -0,0 +1,136 @@
+import os
+import time
+import urllib
+import requests
+from urllib.parse import quote
+import array as arr
+
+
+class simple_image_download:
+    def __init__(self):
+        pass
+
+    def urls(self, keywords, limit):
+        keyword_to_search = [str(item).strip() for item in keywords.split(',')]
+        i = 0
+        links = []
+        while i < len(keyword_to_search):
+            url = 'https://www.google.com/search?q=' + quote(
+                keyword_to_search[i].encode(
+                    'utf-8')) + '&biw=1536&bih=674&tbm=isch&sxsrf=ACYBGNSXXpS6YmAKUiLKKBs6xWb4uUY5gA:1581168823770&source=lnms&sa=X&ved=0ahUKEwioj8jwiMLnAhW9AhAIHbXTBMMQ_AUI3QUoAQ'
+            raw_html = self._download_page(url)
+
+            end_object = -1
+
+            j = 0
+            while j < limit:
+                while (True):
+                    try:
+                        new_line = raw_html.find('"https://', end_object + 1)
+                        end_object = raw_html.find('"', new_line + 1)
+
+                        buffor = raw_html.find('\\', new_line + 1, end_object)
+                        if buffor != -1:
+                            object_raw = (raw_html[new_line + 1:buffor])
+                        else:
+                            object_raw = (raw_html[new_line + 1:end_object])
+
+                        if '.jpg' in object_raw or 'png' in object_raw or '.ico' in object_raw or '.gif' in object_raw or '.jpeg' in object_raw:
+                            break
+
+                    except Exception as e:
+                        print(e)
+                        break
+
+                links.append(object_raw)
+                j += 1
+
+            i += 1
+        return (links)
+
+    def download(self, keywords, limit):
+        keyword_to_search = [str(item).strip() for item in keywords.split(',')]
+        main_directory = "simple_images/"
+        i = 0
+
+        while i < len(keyword_to_search):
+            self._create_directories(main_directory, keyword_to_search[i])
+            url = 'https://www.google.com/search?q=' + quote(
+                keyword_to_search[i].encode('utf-8')) + '&biw=1536&bih=674&tbm=isch&sxsrf=ACYBGNSXXpS6YmAKUiLKKBs6xWb4uUY5gA:1581168823770&source=lnms&sa=X&ved=0ahUKEwioj8jwiMLnAhW9AhAIHbXTBMMQ_AUI3QUoAQ'
+            raw_html = self._download_page(url)
+
+            end_object = -1
+
+            j = 0
+            while j < limit:
+                while (True):
+                    try:
+                        new_line = raw_html.find('"https://', end_object + 1)
+                        end_object = raw_html.find('"', new_line + 1)
+
+                        buffor = raw_html.find('\\', new_line + 1, end_object)
+                        if buffor != -1:
+                            object_raw = (raw_html[new_line+1:buffor])
+                        else:
+                            object_raw = (raw_html[new_line+1:end_object])
+
+                        if '.jpg' in object_raw or 'png' in object_raw or '.ico' in object_raw or '.gif' in object_raw or '.jpeg' in object_raw:
+                            break
+
+                    except Exception as e:
+                        print(e)
+                        break
+
+                path = main_directory + keyword_to_search[i]
+
+                # print(object_raw)
+
+                if not os.path.exists(path):
+                    os.makedirs(path)
+
+                filename = str(keyword_to_search[i]) + "_" + str(j + 1) + ".jpg"
+
+                try:
+                    r = requests.get(object_raw, allow_redirects=True)
+                    open(os.path.join(path, filename), 'wb').write(r.content)
+                except Exception as e:
+                    print(e)
+                    j -= 1
+                j += 1
+
+            i += 1
+
+    def _create_directories(self, main_directory, name):
+        try:
+            if not os.path.exists(main_directory):
+                os.makedirs(main_directory)
+                time.sleep(0.2)
+                path = (name)
+                sub_directory = os.path.join(main_directory, path)
+                if not os.path.exists(sub_directory):
+                    os.makedirs(sub_directory)
+            else:
+                path = (name)
+                sub_directory = os.path.join(main_directory, path)
+                if not os.path.exists(sub_directory):
+                    os.makedirs(sub_directory)
+
+        except OSError as e:
+            if e.errno != 17:
+                raise
+            pass
+        return
+
+    def _download_page(self, url):
+
+        try:
+            headers = {}
+            headers['User-Agent'] = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36"
+            req = urllib.request.Request(url, headers=headers)
+            resp = urllib.request.urlopen(req)
+            respData = str(resp.read())
+            return respData
+
+        except Exception as e:
+            print(e)
+            exit(0)
diff --git a/Google-Image-Scrapper/static/images/google-logo.jpg b/Google-Image-Scrapper/static/images/google-logo.jpg
diff --git a/Google-Image-Scrapper/templates/index.html b/Google-Image-Scrapper/templates/index.html
@@ -0,0 +1,56 @@
+{% extends 'bootstrap/base.html' %}
+
+{% block title %}Flask Bootstrap Form Example{% endblock %}
+{% block content %}
+<div class="container">
+    {% for message in get_flashed_messages() %}
+        <div class="alert alert-warning">
+            <button type="button" class="close" data-dismiss="alert">&times;</button>
+            {{ message }}
+        </div>
+    {% endfor %}
+    <div class="row">
+        <div class="col-sm">
+            <img src="static/images/google-logo.jpg" alt="Google Logo" width="200" height="100">
+        </div>
+        <div class="col-sm">
+            <h1>Google Image Downloader</h1>
+            <form method="post" action="/">
+                {{ form.hidden_tag() }}
+                <div class="form-group">
+                    <label for="name">Name</label>
+                    {{ form.name(class="form-control", id="name", required="required") }}
+                </div>
+                <div class="form-group">
+                    <label for="number_of_images">Number of Images</label>
+                    {{ form.number_of_images(class="form-control", id="number_of_images", required="required") }}
+                </div>
+                <button type="submit" class="btn btn-primary">Submit</button>
+            </form>
+        </div>
+    </div>
+</div>
+
+<script>
+    // Example starter JavaScript for disabling form submissions if there are invalid fields
+    (function () {
+        'use strict'
+
+        // Fetch all the forms we want to apply custom Bootstrap validation styles to
+        var forms = document.querySelectorAll('.needs-validation')
+
+        // Loop over them and prevent submission
+        Array.prototype.slice.call(forms)
+            .forEach(function (form) {
+                form.addEventListener('submit', function (event) {
+                    if (!form.checkValidity()) {
+                        event.preventDefault()
+                        event.stopPropagation()
+                    }
+
+                    form.classList.add('was-validated')
+                }, false)
+            })
+    })()
+</script>
+{% endblock %}