In [26]:
!pip install flask flask-cors pyngrok joblib scikit-learn



In [27]:
import pandas as pd
import joblib
import numpy as np
import os
from flask import Flask, jsonify, request
from flask_cors import CORS
from pyngrok import ngrok
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor

In [28]:
# --- CẤU HÌNH ---
MODEL_PATH = '/content/roas_model.pkl'
DATA_PATH = '/content/Ppc_campaign_performance_data.xlsx'
SCALER_PATH = '/content/data_scaler.pkl'  # Lưu scaler để dùng cho prediction

app = Flask(__name__)
CORS(app, resources={r"/*": {"origins": "*"}})

<flask_cors.extension.CORS at 0x7c238cdd1580>

In [29]:
def train_and_save_model():
    print("Đang khởi tạo và huấn luyện lại model Scikit-Learn...")

    if not os.path.exists(DATA_PATH):
        print(f" Lỗi: Không tìm thấy file {DATA_PATH}. Hãy upload lại file Excel!")
        return None

    # Đọc dữ liệu
    df = pd.read_excel(DATA_PATH)

    # Tiền xử lý cơ bản
    df = df.fillna(df.mean(numeric_only=True))
    if df.duplicated().sum() > 0:
        df = df.drop_duplicates()

    # Xác định biến X, y
    if 'ROAS' not in df.columns:
        print(" Lỗi: File dữ liệu không có cột 'ROAS'")
        return None

    X = df.drop(columns=['ROAS'])
    y = df['ROAS']

    # Xác định loại cột
    numeric_features = X.select_dtypes(include=['int64','float64']).columns.tolist()
    categorical_features = X.select_dtypes(include=['object']).columns.tolist()

    print(f" Numeric features: {numeric_features}")
    print(f" Categorical features: {categorical_features}")

    # Tạo Pipeline
    preprocessor = ColumnTransformer([
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ], remainder='drop')

    model_pipeline = Pipeline([
        ('preprocess', preprocessor),
        ('rf', RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1))
    ])

    # Chia tập train/test
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train
    model_pipeline.fit(X_train, y_train)
    print(" Huấn luyện xong!")

    # Lưu model
    joblib.dump(model_pipeline, MODEL_PATH)
    print(f" Đã lưu model mới tại: {MODEL_PATH}")

    # Lưu feature names để dùng sau
    joblib.dump({
        'numeric_features': numeric_features,
        'categorical_features': categorical_features
    }, SCALER_PATH)

    return model_pipeline

In [30]:
#try:
#    global_model = train_and_save_model()
#except Exception as e:
#    print(f" Lỗi nghiêm trọng khi train model: {e}")

In [31]:
@app.before_request
def log_request_info():
    if request.path != '/':
        print(f" Dashboard đang gọi: [{request.method}] {request.path}")

def get_replay_data():
    if not os.path.exists(DATA_PATH) or not os.path.exists(MODEL_PATH):
        return None, "Thiếu file Data hoặc Model. Vui lòng kiểm tra lại log."

    try:
        # Load lại model từ file vừa lưu
        model = joblib.load(MODEL_PATH)
        df = pd.read_excel(DATA_PATH)

        # Clean Data & IQR Filter
        df = df.fillna(df.mean(numeric_only=True))
        if df.duplicated().sum() > 0:
            df = df.drop_duplicates()

        numeric_cols = df.select_dtypes(include=['number']).columns
        for col in numeric_cols:
            Q1 = df[col].quantile(0.25)
            Q3 = df[col].quantile(0.75)
            IQR = Q3 - Q1
            lower = Q1 - 1.5 * IQR
            upper = Q3 + 1.5 * IQR
            df = df[(df[col] >= lower) & (df[col] <= upper)]

        # Tách tập Test
        X = df.drop(columns=['ROAS'], errors='ignore')
        y = df['ROAS']
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # Dự báo
        y_pred = model.predict(X_test)

        # Metrics
        metrics = {
            "r2_score": round(r2_score(y_test, y_pred), 4),
            "rmse": round(np.sqrt(mean_squared_error(y_test, y_pred)), 2),
            "mae": round(mean_absolute_error(y_test, y_pred), 2)
        }

        # Tạo DataFrame kết quả
        df_res = X_test.copy()
        df_res['Actual'] = y_test.values
        df_res['Predicted'] = y_pred
        df_res['Residuals'] = y_test.values - y_pred

        # Alias cột cho Frontend
        df_res['actual'] = df_res['Actual']
        df_res['predicted'] = df_res['Predicted']
        df_res['ROAS'] = df_res['Actual']
        df_res['Predicted_ROAS'] = df_res['Predicted']

        if 'Date' in df_res.columns:
            df_res['Date'] = df_res['Date'].astype(str)
            df_res = df_res.sort_values('Date')

        # Feature Importance
        num_df = df_res.select_dtypes(include=[np.number])
        if 'Actual' in num_df.columns:
            corr = num_df.corr()['Actual'].abs().sort_values(ascending=False)
            feats = [{"feature": k, "importance": round(v, 3)} for k, v in corr.items()
                     if k not in ['Actual', 'Predicted', 'Residuals', 'actual', 'predicted', 'ROAS', 'Predicted_ROAS']]
        else:
            feats = []

        # Distribution Data
        hist_values, bin_edges = np.histogram(y_test, bins=10)
        distribution_data = {
            "labels": [f"{int(bin_edges[i])}-{int(bin_edges[i+1])}" for i in range(len(hist_values))],
            "values": hist_values.tolist()
        }

        # ========== FIX #1: Biểu đồ So Sánh Thực Tế vs Dự Báo ==========
        comparison_data = []
        for idx in range(min(50, len(df_res))):  # Lấy top 50 samples
            comparison_data.append({
                "index": idx,
                "actual": float(df_res.iloc[idx]['Actual']),
                "predicted": float(df_res.iloc[idx]['Predicted']),
                "error": abs(float(df_res.iloc[idx]['Actual']) - float(df_res.iloc[idx]['Predicted']))
            })

        # Clean NaN
        df_res = df_res.where(pd.notnull(df_res), None)

        return {
            "data": df_res.head(1000).to_dict(orient='records'),
            "metrics": metrics,
            "feature_importance": feats[:10],
            "visualizations": {
                "roas_distribution": distribution_data,
                "scatter_data": df_res[['Actual', 'Predicted']].head(200).to_dict(orient='records'),
                "comparison_chart": comparison_data  #  Thêm dữ liệu so sánh
            }
        }, None
    except Exception as e:
        return None, f"Lỗi logic get_replay_data: {str(e)}"

In [32]:
# --- ROUTES ---

@app.route('/', methods=['GET'])
def home():
    return "<h1> System Active - API Running</h1>"

@app.route('/load-data', methods=['GET', 'POST'])
def load_data():
    try:
        data, err = get_replay_data()
        if err:
            return jsonify({"status": "error", "message": err}), 500

        response = {
            "status": "success",
            "model_info": "Random Forest (Retrained with Full Pipeline)",
        }
        response.update(data)
        return jsonify(response)
    except Exception as e:
        return jsonify({"status": "error", "message": str(e)}), 500


In [33]:
# ========== FIX #2: Endpoint Dự Báo Hoạt Động Đúng ==========
@app.route('/predict', methods=['POST'])
def predict():
    """
    INPUT JSON:
    {
        "Budget": 1000,
        "Clicks": 500,
        "Campaign_ID": 1,
        "Date": "2025-01-15",
        "Spend": 950,
        "Impressions": 5000,
        ... (các cột khác nếu có)
    }
    """
    try:
        model = joblib.load(MODEL_PATH)
        feature_info = joblib.load(SCALER_PATH)

        data = request.json
        print(f" Input nhận được: {data}")

        # Lấy dữ liệu gốc để biết cấu trúc đúng
        df_original = pd.read_excel(DATA_PATH)
        X_original = df_original.drop(columns=['ROAS'], errors='ignore')

        # Tạo DataFrame từ input
        input_df = pd.DataFrame([data])

        # Đảm bảo tất cả cột có mặt (nếu không có thì điền 0)
        for col in X_original.columns:
            if col not in input_df.columns:
                # Nếu là numeric, điền 0; nếu là categorical, điền 'Unknown'
                if col in feature_info['numeric_features']:
                    input_df[col] = 0
                else:
                    input_df[col] = 'Unknown'

        # Chỉ giữ những cột có trong X_original
        input_df = input_df[X_original.columns]

        print(f"DataFrame sau xử lý: \n{input_df}")
        print(f"Shape: {input_df.shape}")

        # Dự báo
        pred = model.predict(input_df)[0]
        result = round(float(pred), 2)

        # Quyết định
        decision = "NÊN CHẠY " if result > 3 else "KHÔNG NÊN CHẠY "
        decision_color = "green" if result > 3 else "red"

        print(f" Dự báo ROAS: {result} - {decision}")

        return jsonify({
            "status": "success",
            "predicted_roas": result,
            "decision": decision,
            "decision_color": decision_color,
            "input_received": data
        })

    except Exception as e:
        print(f" Lỗi predict: {str(e)}")
        import traceback
        traceback.print_exc()
        return jsonify({
            "status": "error",
            "message": str(e),
            "details": "Kiểm tra console để xem chi tiết"
        }), 400


In [34]:
# ========== ENDPOINT THÊM: Lấy danh sách cột cần input ==========
@app.route('/get-input-schema', methods=['GET'])
def get_input_schema():
    """Trả về schema input mà frontend cần"""
    try:
        feature_info = joblib.load(SCALER_PATH)
        df_original = pd.read_excel(DATA_PATH)
        X_original = df_original.drop(columns=['ROAS'], errors='ignore')

        schema = {
            "numeric_features": feature_info['numeric_features'],
            "categorical_features": feature_info['categorical_features'],
            "all_columns": X_original.columns.tolist()
        }

        return jsonify({
            "status": "success",
            "schema": schema
        })
    except Exception as e:
        return jsonify({
            "status": "error",
            "message": str(e)
        }), 400


In [35]:
# --- RUN SERVER ---
if __name__ == "__main__":
    ngrok.set_auth_token("your authtoken")
    ngrok.kill()
    public_url = ngrok.connect(5000).public_url
    print(f"\n{'='*60}")
    print(f"LINK API MỚI: {public_url}")
    print(f"{'='*60}\n")
    app.run(port=5000)


LINK API MỚI: https://ronna-fleshiest-luana.ngrok-free.dev

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [26/Dec/2025 16:42:38] "OPTIONS /load-data HTTP/1.1" 200 -


 Dashboard đang gọi: [OPTIONS] /load-data
 Dashboard đang gọi: [GET] /load-data


INFO:werkzeug:127.0.0.1 - - [26/Dec/2025 16:42:39] "GET /load-data HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [26/Dec/2025 16:43:22] "OPTIONS /predict HTTP/1.1" 200 -


 Dashboard đang gọi: [OPTIONS] /predict
 Dashboard đang gọi: [POST] /predict
 Input nhận được: {'Budget': 2453, 'Clicks': 23423, 'CTR': 1, 'CPC': 2, 'Conversions': 32, 'CPA': 23, 'Conversion_Rate': 1, 'Duration': 23, 'Revenue': 2342, 'Spend': 23452, 'Impressions': 23234, 'Platform': 'Google', 'Content_Type': 'Video', 'Target_Age': '18-24', 'Target_Gender': 'Male', 'Region': 'Africa'}


INFO:werkzeug:127.0.0.1 - - [26/Dec/2025 16:43:22] "POST /predict HTTP/1.1" 200 -


DataFrame sau xử lý: 
  Campaign_ID  Budget  Clicks  CTR  CPC  Conversions  CPA  Conversion_Rate  \
0     Unknown    2453   23423    1    2           32   23                1   

   Duration Platform Content_Type Target_Age Target_Gender  Region  Revenue  \
0        23   Google        Video      18-24          Male  Africa     2342   

   Spend     Date  Impressions  
0  23452  Unknown        23234  
Shape: (1, 18)
 Dự báo ROAS: 1.99 - KHÔNG NÊN CHẠY 


INFO:werkzeug:127.0.0.1 - - [26/Dec/2025 16:43:42] "GET / HTTP/1.1" 200 -
