In [None]:
from flask import request, Flask, jsonify
from flasgger import Swagger, LazyString, LazyJSONEncoder, swag_from
from werkzeug.utils import secure_filename
from contextlib import closing
import sqlite3
import io
import csv
import re
import pandas as pd
import matplotlib.pyplot as plt
import time
import base64
import os

def connect_db():
    db = sqlite3.connect('database_Nasrudin.db')
    with db:
        db.execute('''
        CREATE TABLE IF NOT EXISTS texts (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            text TEXT NOT NULL,
            text_word_count INTEGER NOT NULL,
            text_char_count INTEGER NOT NULL,
            modified_input TEXT NOT NULL,
            modified_input_word_count INTEGER NOT NULL,
            modified_input_char_count INTEGER NOT NULL,
            char_count_difference INTEGER NOT NULL
        )
        ''')
    return db

app = Flask(__name__)

app.json_encoder = LazyJSONEncoder
swagger_template = dict(
info = {
    'title': LazyString(lambda: 'API Documentation for Data Processing and Modeling'),
    'version': LazyString(lambda: '1.0.0'),
    'description': LazyString(lambda: 'Dokumentasi API untuk Data Processing and Modeling')
    },
    host = LazyString(lambda: request.host)
)

swagger_config = {
    "headers": [],
    "specs": [
        {
            "endpoint": 'docs',
            "route": '/docs.json'
        }
    ],
    "static_url_path": "/flasgger_static",
    "swagger_ui": True,
    "specs_route": "/docs/"
}
swagger = Swagger(app, template=swagger_template,
                  config=swagger_config)

@swag_from("C:/Users/sanga/Documents/BINAR/hello_world.yml", methods=['GET'])
@app.route('/', methods=['GET'])
def hello_world():
    json_response = {
        'status_code': 200,
        'description': "Menyapa Hello World",
        'data': "Hello World"
    }
    
    response_data=jsonify(json_response)
    return response_data

@swag_from("C:/Users/sanga/Documents/BINAR/text_processing_file_2.yml", methods=['POST'])
@app.route('/process_text', methods=['POST'])
def text_processing():
    text = request.form.get('text')
    text_word_count = len(text.split())
    text_char_count = len(text)
    
    a = []
    b = []

    with open('new_kamusalay.csv', 'r') as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            a.append(row[0])
            b.append(row[1])
        
    pattern = re.compile(r'\b(' + '|'.join(a) + r')\b', re.IGNORECASE)
    replacements = {word: replacement for word, replacement in zip(a, b)}

    modified_input = pattern.sub(lambda x: replacements[x.group().lower()], text)
    modified_input_word_count = len(modified_input.split())
    modified_input_char_count = len(modified_input)
    char_count_difference = modified_input_char_count - text_char_count
    
    def plot_bar_chart(text_word_count, modified_input_word_count, filename):
        labels = ['Original Text', 'Cleansed Text']
        values = [text_char_count, modified_input_char_count]

        plt.figure(figsize=(10, 5))
        plt.bar(labels, values, color=['blue', 'green'])
        plt.xlabel('Text Type')
        plt.ylabel('Character Count')
        plt.title('Character Count Comparison between Original & Modified Text')

        bars = plt.bar(labels, values, color=['blue', 'green'])
        for bar in bars:
            yval = bar.get_height()
            plt.text(bar.get_x() + bar.get_width()/2, yval + 0.05, round(yval, 2), ha='center', va='bottom', fontsize=10)

        plt.savefig(filename)
        plt.close()

        return filename

    filename = f"chart_{int(time.time())}.png"
    
    chart_path = plot_bar_chart(text_word_count, modified_input_word_count, filename)
    
    with closing(connect_db()) as db:
        cur = db.cursor()
        cur.execute("INSERT INTO texts (text, text_word_count, text_char_count, modified_input, modified_input_word_count, modified_input_char_count, char_count_difference) VALUES (?, ?, ?, ?, ?, ?, ?)", 
                    (text, text_word_count, text_char_count, modified_input.capitalize(), modified_input_word_count, modified_input_char_count, char_count_difference))
        db.commit()
    
    replaced_words = set(word.lower() for word in text.split()) & set(a)

    json_response = {
        'status_code': 200,
        'description': "Notification of successful text cleansing",
        'data': "Your input has been cleansed!",
        'chart_filename': chart_path
    }
    
    response_data=jsonify(json_response)
    return response_data

@swag_from("C:/Users/sanga/Documents/BINAR/file_processing_file_2.yml", methods=['POST'])
@app.route('/process_csv', methods=['POST'])
def text_processing_file():
    
    file = request.files.get('file')
    
    df = pd.read_csv(file, delimiter=',', encoding='ISO-8859-1')
       
    # use this to input all tweets within the uploaded csv files
    tweet_list = df['Tweet'].tolist()
    
    # use the following two lines to limit the number of tweets & randomize tweet selection
    # sample_size = 100
    # tweet_list = df['Tweet'].sample(n=sample_size, random_state=42).tolist()
    
    a = []
    b = []
    
    with open('new_kamusalay.csv', 'r') as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            a.append(row[0])
            b.append(row[1])
            
    pattern = re.compile(r'\b(' + '|'.join(a) + r')\b', re.IGNORECASE)
    replacements = {word: replacement for word, replacement in zip(a, b)}
    
    text_word_counts = []
    text_char_counts = []
    modified_inputs = []
    modified_input_word_counts = []
    modified_input_char_counts = []
    char_count_differences = []
    
    def plot_bar_chart(char_count_difference, filename):
        sorted_indices = sorted(range(len(char_count_differences)), key=lambda k: char_count_differences[k])
        sorted_differences = [char_count_differences[i] for i in sorted_indices]
        
        plt.figure(figsize=(15, 7))
        plt.bar(range(len(sorted_differences)), sorted_differences, color='purple')
        plt.xlabel('Tweet Index')
        plt.ylabel('Character Count Difference')
        plt.title('bar chart of character Count Difference between Original and Modified Tweets')
        plt.legend()

        bars = plt.bar(range(len(sorted_differences)), sorted_differences, color='purple')
        for bar in bars:
            yval = bar.get_height()
            plt.text(bar.get_x() + bar.get_width()/2, yval + 0.05, round(yval, 2), ha='center', va='bottom', fontsize=10)

        plt.savefig(filename)
        plt.close()
        
        return filename
    
    def plot_box_chart(char_count_differences, filename):
        plt.figure(figsize=(10, 7))
        
        boxprops = dict(linestyle='-', linewidth=1, color='black')
        medianprops = dict(linestyle='-', linewidth=1.5, color='red')
        
        bp = plt.boxplot(char_count_differences, vert=False, patch_artist=True, boxprops=boxprops, medianprops=medianprops)
        
        for box in bp['boxes']:
            box.set_facecolor('blue')
        
        plt.ylabel('Character Count Difference')
        plt.title('Box Plot of Character Count Difference between Original and Modified Tweets')
        plt.savefig(filename)
        plt.close()
    
        return filename
  
    for tweet in tweet_list:
        text_word_count = len(tweet.split())
        text_char_count = (len(tweet))
        modified_input = pattern.sub(lambda x: replacements[x.group().lower()], tweet)
        modified_input_word_count = len(modified_input.split())
        modified_input_char_count = len(modified_input)
        replaced_words = set(tweet.lower().split()) & set (a)
        char_count_difference = modified_input_char_count - text_char_count 
        
        with closing(connect_db()) as db:
            cur = db.cursor()
            cur.execute("INSERT INTO texts (text, text_word_count, text_char_count, modified_input, modified_input_word_count, modified_input_char_count, char_count_difference) VALUES (?, ?, ?, ?, ?, ?, ?)", 
                    (tweet, text_word_count, text_char_count, modified_input.capitalize(), modified_input_word_count, modified_input_char_count, char_count_difference))
            db.commit()
        
        text_word_counts.append(text_word_count)
        text_char_counts.append(text_char_count)
        modified_inputs.append(modified_input)
        modified_input_word_counts.append(modified_input_word_count)
        modified_input_char_counts.append(modified_input_char_count)
        char_count_differences.append(char_count_difference)
        
    filename_bar = f"chart_bar_{int(time.time())}.png"
    chart_path_bar = plot_bar_chart(char_count_differences, filename_bar)
    
    filename_box = f"chart_box_{int(time.time())}.png"
    chart_path_box = plot_box_chart(char_count_differences, filename_box)
        
    json_response = {
        'status_code': 200,
        'description': "Notification message",
        'data': "Your file has been uploaded & cleansed!",
        'bar_chart_filename': chart_path_bar,
        'box_chart_filename': chart_path_box
    }
    
    response_data=jsonify(json_response)
    return response_data

if __name__ == "__main__":
    app.run()