In [1]:
# Import Library
import sqlite3
import os
from flask import Flask, flash, request, redirect, url_for, render_template, Markup, jsonify
from werkzeug.utils import secure_filename
from flask import send_from_directory
import pandas as pd
import re

In [2]:
# Hapus Stopwords
import nltk
nltk.download('punkt')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
nltk.download('stopwords')
nltk.corpus.stopwords.words('indonesian')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\nigon\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\nigon\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


['ada',
 'adalah',
 'adanya',
 'adapun',
 'agak',
 'agaknya',
 'agar',
 'akan',
 'akankah',
 'akhir',
 'akhiri',
 'akhirnya',
 'aku',
 'akulah',
 'amat',
 'amatlah',
 'anda',
 'andalah',
 'antar',
 'antara',
 'antaranya',
 'apa',
 'apaan',
 'apabila',
 'apakah',
 'apalagi',
 'apatah',
 'artinya',
 'asal',
 'asalkan',
 'atas',
 'atau',
 'ataukah',
 'ataupun',
 'awal',
 'awalnya',
 'bagai',
 'bagaikan',
 'bagaimana',
 'bagaimanakah',
 'bagaimanapun',
 'bagi',
 'bagian',
 'bahkan',
 'bahwa',
 'bahwasanya',
 'baik',
 'bakal',
 'bakalan',
 'balik',
 'banyak',
 'bapak',
 'baru',
 'bawah',
 'beberapa',
 'begini',
 'beginian',
 'beginikah',
 'beginilah',
 'begitu',
 'begitukah',
 'begitulah',
 'begitupun',
 'bekerja',
 'belakang',
 'belakangan',
 'belum',
 'belumlah',
 'benar',
 'benarkah',
 'benarlah',
 'berada',
 'berakhir',
 'berakhirlah',
 'berakhirnya',
 'berapa',
 'berapakah',
 'berapalah',
 'berapapun',
 'berarti',
 'berawal',
 'berbagai',
 'berdatangan',
 'beri',
 'berikan',
 'berikut'

In [3]:
# Swagger
from flasgger import Swagger, LazyString, LazyJSONEncoder
from flasgger import swag_from

In [4]:
app = Flask(__name__, template_folder='templates')
app.secret_key = 'abednigo'

In [5]:
##### Tampilan Dasbor Halaman Utama as .html
@app.route("/", methods=['GET'])
def home():
    return render_template('home.html')

In [8]:
####Kemungkinan tidak dibutuhkan, jadi bisa dioverride/direvisi
def cleansing(text):
    
    # Mengubah kalimat menjadi huruf kecil
    text = text.lower()

    # Menghapus hastag
    pola_1 = r'#([^\s]+)'
    text = re.sub(pola_1, '', text)

    # Menghapus mention
    pola_2 = r'@[^\s]+'
    text = re.sub(pola_2, '', text)

    # Menghapus user, retweet, \t, \r, url, xd, orang, kalo
    pola_3 = r'(user|retweet|\\t|\\r|url|xd|orang|kalo)'
    text = re.sub(pola_3, '', text)

    # Menghapus single character
    pola_4 = r'\b\w{1,3}\b'
    text = re.sub(pola_4, '', text)

    # Menghapus tanda baca, karakter operasi matematika, dll.
    pola_5 = r'[\,\@\*\_\-\!\:\;\?\'\.\"\)\(\{\}\<\>\+\%\$\^\#\/\`\~\|\&\|]'
    text = re.sub(pola_5, ' ', text)
    
    # Menghapus emoji
    pola_6 = r'\\[a-z0-9]{1,5}'
    text = re.sub(pola_6, '', text)

    # Menghapus karekter yang bukan termasuk ASCII
    pola_7 = r'[^\x00-\x7f]'
    text = re.sub(pola_7, '', text)

    # Menghapus url yang diawali dengan http atau https
    pola_8 = r'(https|https:)'
    text = re.sub(pola_8, '', text)

    # Menghapus karakter '\',  '[',  ']'
    pola_9 = r'[\\\]\[]'
    text = re.sub(pola_9, '', text)

    # Menghapus "wkwkwk"
    pola_10 = r'\bwk\w+'
    text = re.sub(pola_10, '', text)

    # Menghapus digit karakter
    pola_11 = r'\d+'
    text = re.sub(pola_11, '', text)

    # Menghapus karekter yang bukan termasuk ASCII
    pola_12 = r'(\\u[0-9A-Fa-f]+)'
    text = re.sub(pola_12, '', text)
    
    # Menghapus spasi yang berlebih
    pola_13 = r'(\s+|\\n)'
    text = re.sub(pola_13, ' ', text)
    
    # Menghapus spasi pada kalimat pertama dan terakhir
    text = text.rstrip()
    text = text.lstrip()
    return text


In [11]:
def replaceThreeOrMore(text):
    # Menghapus tiga atau lebih pengulangan karakter termasuk newlines.
    pattern = re.compile(r"(.)\1{1,}", re.DOTALL)
    return pattern.sub(r"\1\1", text)

In [12]:
def remove_stopwords(text):
    return ' '.join([word for word in word_tokenize(text) if word not in indo_stop_words])
### Catatan: Indo stop words ini bisa diganti dengan list_abusive_words, diganti ke list formal words, dan bisa diskip

In [17]:
##### Tampilan Dasbor Halaman Utama as .html
@app.route("/", methods=['GET'])
def homepage():
    return render_template('home.html')

In [18]:
##### Membaca File, Menampilkan Dataframe .HTML #####
@app.route("/data_before_cleansing", methods=["GET", "POST"])
def read_file_to_html():
    conn = sqlite3.connect('database.db')
    cursor = conn.cursor()

    if request.method == 'POST':
        csv_file = request.files.get("file") ### Catatan: Ambil CSV nya, kalau bukan, jadi invalid file
        if not csv_file or not csv_file.filename.endswith('.csv'):
            return 'Invalid file'

    # Membaca file .csv
        df = pd.read_csv(csv_file, encoding='latin-1')

        conn = sqlite3.connect('database.db') ### Catatan: Redundant
        cursor = conn.cursor() ### Catatan: Redundant
        table = df.to_sql('challenge', conn, if_exists='replace') ### Catatan: dilihat dulu hasilnya # to prove that this code is running well, drop the "upload_and_download_csv_file" table first from the database via the app_sqlite.py file
        conn.commit()
        conn.close()

        df = df.to_html(index=False, justify='left') ###Catatan : render jadi html

        return Markup(df)

    # Jika memakai Method .get
    return render_template("file.html")

AssertionError: View function mapping is overwriting an existing endpoint function: read_file_to_html

In [16]:
# if __name__ == '__main__':
#     app.run(debug=True)

In [15]:
app.run(debug=True)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on


 * Restarting with watchdog (windowsapi)


SystemExit: 1