In [12]:
pip install flask pyngrok flask-cors torch transformers scikit-learn pandas numpy



In [17]:
!python -m pip install flask flask-cors



In [18]:
!python -m pip install -U flask-cors



In [None]:
from flask import Flask, request, jsonify
from flask_cors import CORS
import torch
import json
import logging
from datetime import datetime
import threading
from transformers import BertTokenizer, BertForSequenceClassification, BertForTokenClassification, AutoTokenizer

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = Flask(__name__)
CORS(app)

intent_model = None
ner_model = None
tokenizer = None
tokenizer_ner = None
intent_labels = []
intent_data = None
label2id = None
id2label = None

def load_intent(file_path):
    intent_data = {'intents': []}
    intent_dict = {}

    with open(file_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()

    for line in lines:
        line = line.strip()
        if line:
            parts = line.rsplit(' ', 1)
            if len(parts) == 2:
                pattern, tag = parts
                if tag not in intent_dict:
                    intent_dict[tag] = {'tag': tag, 'patterns': [], 'responses': [f"{tag}"]}
                intent_dict[tag]['patterns'].append(pattern)

    intent_data['intents'] = list(intent_dict.values())
    return intent_data

def initialize_models():
    """Initialize models"""
    global intent_model, ner_model, tokenizer, tokenizer_ner, intent_labels, intent_data, label2id, id2label

    try:
        logger.info("🔁 Initializing models...")

        intent_data = load_intent('intent.txt')

        intent_labels = [intent['tag'] for intent in intent_data['intents']]
        logger.info(f"✔ Loaded {len(intent_labels)} intent labels.")

        try:
            with open('ner_label_mapping.json', 'r', encoding='utf-8') as f:
                ner_mapping = json.load(f)
                label2id = ner_mapping['label2id']
                id2label = {int(k): v for k, v in ner_mapping['id2label'].items()}
            logger.info("✔ NER label mapping loaded.")
        except FileNotFoundError:
            logger.warning("⚠ NER label mapping file not found.")
            label2id = None
            id2label = None

        tokenizer = BertTokenizer.from_pretrained('indobenchmark/indobert-base-p2')
        tokenizer_ner = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p2')

        intent_model = BertForSequenceClassification.from_pretrained(
            'indobenchmark/indobert-base-p2',
            num_labels=len(intent_labels)
        )
        intent_model.load_state_dict(torch.load('intent_model.pth', map_location='cpu'))
        intent_model.eval()

        if label2id:
            ner_model = BertForTokenClassification.from_pretrained(
                'indobenchmark/indobert-base-p2',
                num_labels=len(label2id),
                id2label=id2label,
                label2id=label2id
            )
            ner_model.load_state_dict(torch.load('ner_model.pth', map_location='cpu'))
            ner_model.eval()
            logger.info("✔ NER model loaded.")
        else:
            ner_model = None

        logger.info("✅ All models loaded successfully!")
        return True

    except Exception as e:
        logger.error(f"❌ Failed to load models: {str(e)}")
        return False

def predict_intent(text):
    try:
        encoding = tokenizer(text, truncation=True, padding='max_length', max_length=128, return_tensors='pt')
        with torch.no_grad():
            outputs = intent_model(**encoding)
            probs = torch.softmax(outputs.logits, dim=-1)
            max_prob = torch.max(probs).item()
            pred = torch.argmax(outputs.logits, dim=1)
            predicted_intent = intent_labels[pred.item()]

            
            if max_prob < 0.8:
                return 'fallback', max_prob  
            else:
                return predicted_intent, max_prob  
    except Exception as e:
        logger.error(f"❌ Intent prediction error: {str(e)}")
        return "fallback", 0.0  

def predict_ner(text):
    """Predict NER"""
    if not ner_model:
        return []

    try:
        tokens = text.split()
        encoding = tokenizer_ner(tokens, is_split_into_words=True, return_tensors='pt', truncation=True, padding='max_length', max_length=128)
        with torch.no_grad():
            outputs = ner_model(**encoding)
            logits = outputs.logits
            preds = torch.argmax(logits, dim=-1).cpu().numpy()[0]

        word_ids = encoding.word_ids(batch_index=0)
        result = []
        seen_word_idx = set()
        for idx, word_idx in enumerate(word_ids):
            if word_idx is not None and word_idx not in seen_word_idx and word_idx < len(tokens):
                label = id2label[preds[idx]]
                result.append((tokens[word_idx], label))
                seen_word_idx.add(word_idx)
        return result
    except Exception as e:
        logger.error(f"❌ NER prediction error: {str(e)}")
        return []

def extract_entities(ner_result):
    """Extract structured entities"""
    entities = {}
    current_entity = None
    current_tokens = []
    prev_token = None

    for token, label in ner_result:
        if token == prev_token and label.startswith("I-"):
            continue

        if label.startswith('B-'):
            if current_entity and current_tokens:
                ent_type = current_entity[2:]
                entities.setdefault(ent_type, []).append(' '.join(current_tokens))
            current_entity = label
            current_tokens = [token]
        elif label.startswith('I-'):
            if current_entity and current_entity[2:] == label[2:]:
                current_tokens.append(token)
            else:
                if current_entity and current_tokens:
                    ent_type = current_entity[2:]
                    entities.setdefault(ent_type, []).append(' '.join(current_tokens))
                current_entity = 'B-' + label[2:]
                current_tokens = [token]
        else:
            if current_entity and current_tokens:
                ent_type = current_entity[2:]
                entities.setdefault(ent_type, []).append(' '.join(current_tokens))
            current_entity = None
            current_tokens = []

        prev_token = token

    # Jangan lupa entitas terakhir
    if current_entity and current_tokens:
        ent_type = current_entity[2:]
        entities.setdefault(ent_type, []).append(' '.join(current_tokens))

    return entities

def generate_response(intent, entities=None):
    """Generate response"""
    try:
        for item in intent_data['intents']:
            if item['tag'] == intent:
                response = item['responses'][0]
                if entities:
                    for key, values in entities.items():
                        value = values[0] if isinstance(values, list) else values
                        response = response.replace(f'{{{key}}}', value)
                return response
        return "Maaf, saya tidak mengerti pertanyaan Anda."
    except Exception as e:
        logger.error(f"❌ Error generating response: {str(e)}")
        return "Maaf, terjadi kesalahan."

@app.route('/health', methods=['GET'])
def health():
    return jsonify({
        'status': 'OK',
        'message': 'Chatbot API is running',
        'timestamp': datetime.now().isoformat(),
        'models_loaded': {
            'intent': intent_model is not None,
            'ner': ner_model is not None
        }
    })

@app.route('/predict', methods=['POST'])
def predict():
    try:
        if not request.is_json:
            return jsonify({'error': 'Request must be JSON', 'status': 'error'}), 400
        data = request.get_json()
        text = data.get('text', '').strip()

        if not text:
            return jsonify({'error': 'Text cannot be empty', 'status': 'error'}), 400

        logger.info(f"📥 Input text: {text}")

        intent, confidence = predict_intent(text)
        logger.info(f"🎯 Predicted intent: {intent}")

        ner_result = predict_ner(text)
        logger.info(f"📦 Raw NER: {ner_result}")

        entities = extract_entities(ner_result)
        logger.info(f"🧠 Extracted entities: {entities}")

        response = generate_response(intent, entities)

        result = {
    'intent': intent,
    'confidence': confidence,  
    'entities': entities,
    'ner_tokens': ner_result,
    'response': response,
    'status': 'success',
    'timestamp': datetime.now().isoformat()
}

        return jsonify(result)

    except Exception as e:
        logger.error(f"❌ Server error: {str(e)}")
        return jsonify({'error': 'Internal server error', 'details': str(e), 'status': 'error'}), 500

@app.route('/test', methods=['GET'])
def test():
    return jsonify({'message': 'API is working!', 'timestamp': datetime.now().isoformat()})


if __name__ == '__main__':
    if initialize_models():
        app.run(host='0.0.0.0', port=5000)
    else:
        logger.error("❌ Server not started due to model loading failure.")

INFO:__main__:🔁 Initializing models...
INFO:__main__:✔ Loaded 9 intent labels.
INFO:__main__:✔ NER label mapping loaded.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForTokenClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
INFO:__main__:✔ NER model loaded.
INFO:__main__:✅ All models loaded successfully!


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://192.168.61.77:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:__main__:📥 Input text: Pemilik kursi rapat siapa
INFO:__main__:🎯 Predicted intent: kepemilikan_barang
INFO:__main__:📦 Raw NER: [('Pemilik', 'B-karyawan'), ('kursi', 'B-item'), ('rapat', 'I-item'), ('siapa', 'O')]
INFO:__main__:🧠 Extracted entities: {'karyawan': ['Pemilik'], 'item': ['kursi rapat']}
INFO:werkzeug:127.0.0.1 - - [30/Jul/2025 21:33:10] "POST /predict HTTP/1.1" 200 -
INFO:__main__:📥 Input text: cek harga cctv dong
INFO:__main__:🎯 Predicted intent: harga_barang
INFO:__main__:📦 Raw NER: [('cek', 'O'), ('harga', 'B-price'), ('cctv', 'I-item'), ('dong', 'O')]
INFO:__main__:🧠 Extracted entities: {'price': ['harga'], 'item': ['cctv']}
INFO:werkzeug:127.0.0.1 - - [30/Jul/2025 21:33:24] "POST /predict HTTP/1.1" 200 -
INFO:__main__:📥 Input text: cek harga proyektor
INFO:__main__:🎯 Predicted intent: harga_barang
INFO:__main