In [1]:
%pip install flask-cors

Note: you may need to restart the kernel to use updated packages.


In [52]:
from flask import Flask, request, jsonify
from flask_cors import CORS
import joblib
import numpy as np
import pandas as pd
from io import StringIO
import threading
import time
import nest_asyncio
import sys

In [53]:
MODLE_FILE_PATH = './random_forest_model.pkl'
EXPECTED_FEATURES = [
    'default_profile_image',
    'statuses_count',
    'followers_count',
    'friends_count',
    'protected',
    'name_length',
    'follower_ratio'
]

RAW_COLUMNS_NEEDED = {
    'statuses_count': 'statuses_count',
    'followers_count': 'followers_count',
    'friends_count': 'friends_count',
    'default_profile_image': 'default_profile_image',
    'protected': 'protected',
    'name': 'name'
}

In [54]:
try: 
    MODEL = joblib.load(MODLE_FILE_PATH)
    print(f"Model {MODLE_FILE_PATH} loaded succesfully.")
except FileNotFoundError:
    print(f"Missing model file: {MODLE_FILE_PATH}")

Model ./random_forest_model.pkl loaded succesfully.


In [55]:
app = Flask(__name__)
CORS(app)

<flask_cors.extension.CORS at 0x1b467447f80>

In [56]:
def assign_risk_category(score):
    if score >= 91:
        return {"category": "CRITICAL RISK", "color": "red"}
    if score >= 66:
        return {"category": "BOT", "color": "amber"}
    if score >= 31:
        return {"category": "MODERATE", "color": "yellow"}
    return {"category": "GENUINE", "color": "green"}

In [None]:
# when re-running cells in a notebook, remove existing view functions to avoid duplicate registration errors
for _ep in ('health','batch_predict'):
    if _ep in app.view_functions:
        app.view_functions.pop(_ep)

@app.route('/health', methods=['GET'])
def health():
    return jsonify({'status': 'ok'}), 200

@app.route('/batch-predict', methods=['POST'])
def batch_predict():
    # basic validations
    if 'file' not in request.files:
        return jsonify({'error': 'No file part in the request'}), 400

    file = request.files['file']
    if file.filename == '' or not file.filename.endswith('.csv'):
        return jsonify({'error': 'No selected file or file is not a CSV'}), 400

    try:
        file_content = file.read().decode('latin1')
        df = pd.read_csv(StringIO(file_content))
    except Exception as e:
        return jsonify({'error': 'File Read/Format Error', 'details': str(e)}), 400

    try:
        # normalization and type coercion
        # lowercase the incoming column names to avoid case-mismatch with model features
        df.columns = [c.lower() for c in df.columns]
        df['followers_count'] = pd.to_numeric(df.get('followers_count', 0), errors='coerce').fillna(0)
        df['friends_count'] = pd.to_numeric(df.get('friends_count', 0), errors='coerce').fillna(0)
        df['statuses_count'] = pd.to_numeric(df.get('statuses_count', 0), errors='coerce').fillna(0)
        df['default_profile_image'] = pd.to_numeric(df.get('default_profile_image', 0), errors='coerce').fillna(0)
        df['protected'] = pd.to_numeric(df.get('protected', 0), errors='coerce').fillna(0)
    except Exception as e:
        return jsonify({'error': 'Data Pre-cleaning Failed', 'details': str(e)}), 400

    # ensure columns exist and track completeness
    feature_present_count = 0
    missing_features_list = []
    for expected_raw_col, default_col_name in RAW_COLUMNS_NEEDED.items():
        if default_col_name in df.columns:
            df[default_col_name] = df[default_col_name].fillna(0)
            feature_present_count = feature_present_count + 1
        else:
            df[default_col_name] = 0
            missing_features_list.append(default_col_name)

    try:
        df['name_raw'] = df.get('name', pd.Series(['unknown']*len(df))).fillna('unknown')
        df['name_length'] = df['name_raw'].apply(lambda x: len(str(x)))
        # compute follower_ratio in lowercase to match EXPECTED_FEATURES
        df['follower_ratio'] = df['followers_count'] / (df['friends_count'] + 1)
        df['default_profile_image'] = df['default_profile_image'].astype(int)
        df['protected'] = df['protected'].astype(int)
        df['statuses_count'] = df['statuses_count'].astype(int)
    except Exception as e:
        return jsonify({'error': 'Data Transformation Failed', 'details': str(e)}), 500

    # prediction and downstream processing wrapped to return JSON on error
    try:
        X_predict = df[EXPECTED_FEATURES]
        prediction_probas = MODEL.predict_proba(X_predict)[:, 1]
        df['Suspicion_Score'] = np.round(prediction_probas * 100, 2)
        df['Risk_Analysis'] = df['Suspicion_Score'].apply(assign_risk_category)
        df['Risk_Category'] = df['Risk_Analysis'].apply(lambda x: x['category'])
        df['Risk_Color'] = df['Risk_Analysis'].apply(lambda x: x['color'])
    except Exception as e:

        return jsonify({'error': 'Prediction Failed', 'details': str(e)}), 500

    completness_message = f"{feature_present_count} of {len(RAW_COLUMNS_NEEDED)} critical raw columns present. Missing : {', '.join(missing_features_list) if missing_features_list else 'None' }"
    df_suspicious = df[df['Suspicion_Score'] > 30].sort_values(by = 'Suspicion_Score', ascending=False)
    avg_suspicion = df_suspicious['Suspicion_Score'].mean() if not df_suspicious.empty else 0
    final_table_data = df_suspicious[[
        'id', 'name_raw',  'screen_name', 'Suspicion_Score', 'Risk_Category', 'Risk_Color'
    ]].rename(columns={'id': 'Profile_ID', 'name_raw': 'Name', 'screen_name': 'Handle'})

    response_data = {
        'completness_score': feature_present_count,
        'completness_total': len(RAW_COLUMNS_NEEDED),
        'completness_message': completness_message,
        'profiles': final_table_data.to_dict('records'),
        'suspicion_score_average': round(avg_suspicion, 0)
    }
    return jsonify(response_data)

# start server in background if not already running
nest_asyncio.apply()
if 'server_thread' not in globals() or not server_thread.is_alive():
    def run_flask_app():
        app.run(debug=False, use_reloader=False, port=8000)
    server_thread = threading.Thread(target=run_flask_app)
    server_thread.start()
    print("\n--- FLASK SERVER STATUS ---")
    print("Flask server started successfully on http://127.0.0.1:8000/")
else:
    print("\n--- FLASK SERVER STATUS ---")
    print("Flask server started successfully on http://127.0.0.1:8000/")


--- FLASK SERVER STATUS ---
Flask server started successfully on http://127.0.0.1:8000/


127.0.0.1 - - [20/Oct/2025 10:44:06] "GET /health HTTP/1.1" 200 -
127.0.0.1 - - [20/Oct/2025 10:44:13] "POST /batch-predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Oct/2025 10:44:13] "POST /batch-predict HTTP/1.1" 200 -
