In [None]:
import re
import pandas as pd
import numpy as np
import sqlite3
import matplotlib.pyplot as plt
import seaborn as sns

from flask import Flask, jsonify, request
from flasgger import Swagger, LazyString, LazyJSONEncoder
from flasgger import swag_from

app = Flask(__name__)
app.json_encoder = LazyJSONEncoder
swagger_template = dict(
info = {
    'title': LazyString(lambda: 'API Documentation for Data Processing and Modeling'),
    'version': LazyString(lambda: '1.0.0'),
    'description': LazyString(lambda: 'Dokumentasi API untuk Data Processing dan Modeling'),
    },
    host = LazyString(lambda: request.host)
)
swagger_config = {
    "headers": [],
    "specs": [
        {
            "endpoint": 'docs',
            "route": '/docs.json',
        }
    ],
    "static_url_path": "/flasgger_static",
    "swagger_ui": True,
    "specs_route": "/docs/"
}
swagger = Swagger(app, template=swagger_template,             
                  config=swagger_config)

df_abusive = pd.read_csv('abusive.csv')
df_kamusalay = pd.read_csv('new_kamusalay.csv', encoding='latin-1', header=None)
df_kamusalay.columns=["tidak baku","baku"]

@swag_from("C:/Users/asus/docs/hello_world.yml", methods=['GET'])
@app.route('/', methods=['GET'])
def hello_world():
    json_response = {
        'status_code': 200,
        'description': "Menyapa Hello World",
        'data': "Hello World",
    }
    response_data = jsonify(json_response)
    return response_data

@swag_from("C:/Users/asus/docs/text_processing.yml", methods=['POST'])
@app.route('/text-processing', methods=['POST'])
def text_processing():
    
    text = request.form.get('text')
    
    json_response = {
        'status_code': 200,
        'description': "Teks yang sudah diproses",
        'data': re.sub(r'[^a-zA-Z0-9]',' ', text)
    }
    
    response_data = jsonify(json_response)
    return response_data

@swag_from("C:/Users/asus/docs/file_processing.yml", methods=['POST'])
@app.route('/text-processing-file', methods=['POST'])
def text_processing_file():
    global df
    
    file = request.files.get('file')
    df = pd.read_csv(file, encoding='latin-1')
    df = df[['Tweet']]
    df.drop_duplicates(inplace=True)
    df['no_char'] = df['Tweet'].apply(len)
    df['no_words'] = df['Tweet'].apply(lambda x: len(x.split()))
    
    def tweet_cleansing(x):
        tweet = x
        cleaned_tweet_a = x.lower()
        cleaned_tweet_b = re.sub('\n',' ',cleaned_tweet_a)
        cleaned_tweet_c = re.sub('rt',' ',cleaned_tweet_b)
        cleaned_tweet_d = re.sub('user',' ',cleaned_tweet_c)
        cleaned_tweet_e = re.sub('  +', ' ', cleaned_tweet_d)
        cleaned_tweet_f = re.sub('((www\.[^\s]+)|(https?://[^\s]+)|(http?://[^\s]+))',' ',cleaned_tweet_e)
        cleaned_tweet = re.sub('[^a-zA-Z0-9]+ ', '', cleaned_tweet_f) .strip()
        return cleaned_tweet
    
    
    df['cleaned_tweet'] = df['Tweet'].apply(lambda x: tweet_cleansing(x))
    df['no_char_2'] = df['cleaned_tweet'].apply(len)
    df['no_words_2'] = df['cleaned_tweet'].apply(lambda x: len(x.split()))
    
    def count_abusive(x):
        cleaned_tweet = x
        matched_list = []
        for i in range(len(df_abusive)):
            for j in x.split():
                word = df_abusive['ABUSIVE'].iloc[i]
                if word==j.lower():
                    matched_list.append(word)
        return len(matched_list)
    
    df['estimated_no_abs_words'] = df['cleaned_tweet'].apply(lambda x: count_abusive(x))
    
    conn = sqlite3.connect('Maheswara_Gold_Challenge.db')
    q_create_table = """
    create table if not exists df (Tweet varchar(255), no_char int, no_words int, cleaned_tweet varchar(255), no_char_2 int, no_words_2 int);
    """
    conn.execute(q_create_table)
    conn.commit()
    
    cursor = conn.execute("select count(*) from df")
    num_rows = cursor.fetchall()
    num_rows = num_rows[0][0]
       
    if num_rows == 0:
        for i in range(len(df)):
            tweet = df['Tweet'].iloc[i]
            no_char = int(df['no_char'].iloc[i])
            no_words = int(df['no_words'].iloc[i])
            cleaned_tweet = df['cleaned_tweet'].iloc[i]
            no_char_2 = int(df['no_char_2'].iloc[i])
            no_words_2 = int(df['no_words_2'].iloc[i])
    
            q_insertion = "insert into df (Tweet, no_char, no_words, cleaned_tweet, no_char_2, no_words_2) values (?,?,?,?,?,?)"
            conn.execute(q_insertion,(tweet,no_char,no_words,cleaned_tweet,no_char_2,no_words_2))
            conn.commit()    
    
    conn.close()
    
    plt.figure(figsize=(10,7))
    countplot = sns.countplot(data=df, x="estimated_no_abs_words")
    for p in countplot.patches:
        countplot.annotate(format(p.get_height(), '.0f'), (p.get_x() + p.get_width() / 2., p.get_height()),  ha = 'center'
                            , va = 'center', xytext = (0, 10), textcoords = 'offset points')

    plt.title('Estimated Number of Abusive Words')
    plt.xlabel('Estimated Number of Abusive Words')
    plt.savefig('new_countplot.jpeg')
    
    plt.figure(figsize=(20,4))
    boxplot = sns.boxplot(data=df, x="no_words_2")

    print()

    plt.title('Number of Words (after tweet cleansing)')
    plt.xlabel('')
    plt.savefig('new_boxplot.jpeg')
    
    json_response = {
        'status_code': 200,
        'description': "Teks yang sudah diproses",
        'data': list(df['cleaned_tweet'])
    }
    
    response_data = jsonify(json_response)
    return response_data

if __name__ == "__main__":
    app.run()

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
