In [1]:
!pip -q install pyngrok

In [2]:
# -*- coding: utf-8 -*-
"""
UCF Crime Detection - Video Processing and Inference
Author: matapv01
Created: 2025-03-15 09:27:27
"""
import os
import cv2
import torch
import numpy as np
from tqdm import tqdm
from PIL import Image
import torch.nn as nn
from datetime import datetime
from torchvision import transforms
from transformers import ViTImageProcessor, ViTForImageClassification
from huggingface_hub import login
from IPython.display import HTML
from base64 import b64encode
from google.colab import files
# Install required packages
!pip install transformers torch torchvision opencv-python pillow tqdm
import cv2
from IPython.display import HTML
from base64 import b64encode
from transformers import ViTForImageClassification, ViTImageProcessor
import torch
# Load model
# Import the necessary modules
from huggingface_hub import login, HfApi





In [3]:
# Hugging Face settings
MODEL_ID_2 = "mata01/crime-20frame-detection-vit-model"  # Your model repo name
#HF_TOKEN = "hf_xxx"  # Replace with your token
from google.colab import userdata
HF_TOKEN = userdata.get('HF_TOKEN')
token = HF_TOKEN

if HF_TOKEN:
  print(f"Loaded token success")


Loaded token success


In [4]:
# Constants
FRAME_CHUNK_SIZE = 20
BATCH_SIZE = 32
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Label mapping
LABELS = {
    0: "Abuse", 1: "Arrest", 2: "Arson", 3: "Assault",
    4: "Burglary", 5: "Explosion", 6: "Fighting", 7: "Normal",
    8: "Road Accident", 9: "Robbery", 10: "Shooting",
    11: "Shoplifting", 12: "Stealing", 13: "Vandalism"
}

class VideoProcessor:
    def __init__(self):
        print("Loading model from Hugging Face...")
        # Initialize video properties
        self.fps = None
        self.frame_width = None
        self.frame_height = None

        try:
            # Login to Hugging Face
            login(HF_TOKEN)

            # Load model and processor
            self.model = ViTForImageClassification.from_pretrained(
                MODEL_ID_2,
                use_auth_token=HF_TOKEN,
                num_labels=len(LABELS),
                id2label=LABELS
            ).to(DEVICE)

            self.processor = ViTImageProcessor.from_pretrained(
                MODEL_ID_2,
                use_auth_token=HF_TOKEN
            )

            print("Model loaded successfully!")

        except Exception as e:
            print(f"Error loading from Hugging Face: {str(e)}")
            raise

        self.model.eval()

        self.transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
            )
        ])

    def extract_frames(self, video_path):
        print("Extracting frames from video...")
        frames = []
        cap = cv2.VideoCapture(video_path)

        if not cap.isOpened():
            raise ValueError(f"Could not open video file: {video_path}")

        # Get video properties
        self.fps = int(cap.get(cv2.CAP_PROP_FPS))
        self.frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

        print(f"Video properties: {self.fps} FPS, {self.frame_width}x{self.frame_height}")

        for _ in tqdm(range(total_frames), desc="Extracting frames"):
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(frame)

        cap.release()
        return frames

    def create_frame_chunks(self, frames):
        print("Creating frame chunks...")
        chunks = []
        for i in range(0, len(frames), FRAME_CHUNK_SIZE):
            chunk = frames[i:i + FRAME_CHUNK_SIZE]
            if len(chunk) == FRAME_CHUNK_SIZE:  # Only use complete chunks
                chunks.append(chunk)
        return chunks

    def process_chunk(self, chunk):
        processed_frames = []
        for frame in chunk:
            pil_image = Image.fromarray(frame)
            processed_frame = self.transform(pil_image)
            processed_frames.append(processed_frame)

        batch = torch.stack(processed_frames).to(DEVICE)

        with torch.no_grad():
            outputs = self.model(batch)
            predictions = torch.softmax(outputs.logits, dim=-1)
            chunk_pred = predictions.mean(dim=0)
            label_idx = chunk_pred.argmax().item()
            confidence = chunk_pred[label_idx].item()

        return LABELS[label_idx], confidence

    def create_output_video(self, frames, predictions, output_path):
        print("Creating output video...")
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, self.fps,
                            (self.frame_width, self.frame_height))

        # Create mapping from frame index to prediction
        frame_predictions = {}
        for chunk_idx, (label, conf) in enumerate(predictions):
            start_frame = chunk_idx * FRAME_CHUNK_SIZE
            end_frame = start_frame + FRAME_CHUNK_SIZE
            for frame_idx in range(start_frame, end_frame):
                frame_predictions[frame_idx] = (label, conf)

        print("Adding labels to frames...")
        for frame_idx, frame in enumerate(tqdm(frames, desc="Processing frames")):
            if frame_idx in frame_predictions:
                label, conf = frame_predictions[frame_idx]

                # Create copy of frame
                frame = frame.copy()

                # Text to display
                text = f"Action: {label} ({conf:.2f})"

                # Draw black background
                (text_width, text_height), _ = cv2.getTextSize(
                    text, cv2.FONT_HERSHEY_SIMPLEX, 1, 2
                )
                cv2.rectangle(
                    frame,
                    (10, 10),
                    (text_width + 20, 40),
                    (0, 0, 0),
                    -1
                )

                # Draw white text
                cv2.putText(
                    frame,
                    text,
                    (15, 30),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    1,
                    (255, 255, 255),
                    2,
                    cv2.LINE_AA
                )

            # Write frame
            frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            out.write(frame_bgr)

        out.release()
        print(f"Video saved to {output_path}")

    def process_video(self, input_path, output_path = 'static/outputs/video.mp4'):
        try:
            # Extract frames
            frames = self.extract_frames(input_path)
            if not frames:
                raise ValueError("No frames extracted from video")

            print(f"Total frames: {len(frames)}")

            # Create and process chunks
            chunks = self.create_frame_chunks(frames)
            print(f"Total chunks: {len(chunks)}")

            # Process chunks
            print("Processing chunks...")
            predictions = []
            for chunk in tqdm(chunks, desc="Processing chunks"):
                label, confidence = self.process_chunk(chunk)
                predictions.append((label, confidence))

            # Create output video
            print("\nCreating output video...")
            self.create_output_video(frames, predictions, output_path)

            return output_path

        except Exception as e:
            print(f"Error processing video: {str(e)}")
            return

def show_video(video_path):
    """
    Displays a video in the notebook
    """
    mp4 = open(video_path, 'rb').read()
    data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
    return HTML(f"""
    <video width=600 controls>
        <source src="{data_url}" type="video/mp4">
    </video>
    """)

In [5]:
def loadimgprocessor():
  # Định nghĩa repository ID
  username = "mata01"  # Thay bằng username thật của bạn
  model_name = "crime_action_cctv_image_detection"
  repo_id = f"{username}/{model_name}"

  # Tải mô hình và tiền xử lý từ Hugging Face Model Hub
  model = ViTForImageClassification.from_pretrained(repo_id, token=token)
  processor = ViTImageProcessor.from_pretrained(repo_id, token=token)

  # 1. Kiểm tra và chọn thiết bị (GPU nếu có)
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  print(f"Using device: {device}")

  # Chuyển mô hình sang GPU
  model.to(device)
  model.eval()  # Đặt mô hình ở chế độ evaluation để tối ưu suy luận
  print("Model moved to GPU successfully!")
  return model, processor

from PIL import Image, ImageDraw, ImageFont
import os

def predict_image(image_path, output_path='static/outputs/output_image.jpg'):
    try:
        # Đọc ảnh sử dụng OpenCV
        img = cv2.imread(image_path)
        if img is None:
            print(f"Lỗi: Không thể đọc ảnh tại {image_path}")
            return

        # Chuyển đổi ảnh sang RGB
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Tiền xử lý ảnh
        inputs = processor_img(images=img, return_tensors="pt").to(device)

        # Dự đoán
        with torch.no_grad():
            outputs = model_img(**inputs)
            logits = outputs.logits
            predicted_class_id = logits.argmax().item()
            predicted_label = model_img.config.id2label[predicted_class_id]

        # Hiển thị ảnh với nhãn dự đoán
        pil_img = Image.fromarray(img)
        draw = ImageDraw.Draw(pil_img)
        font_size = 30
        font = ImageFont.truetype("LiberationSansNarrow-Bold.ttf", font_size)
        text_position = (10, 10)
        text_color = (255, 0, 0)

        # Vẽ nhãn dự đoán lên ảnh
        draw.text(text_position, predicted_label, fill=text_color, font=font)

        # Lưu ảnh đầu ra
        pil_img.save(output_path)
        return output_path

    except Exception as e:
        import traceback
        traceback.print_exc()
        print(f"Đã xảy ra lỗi: {e}")
        return

In [6]:
# Khởi tạo các model AI
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Loading models...")
processor = VideoProcessor()
model_img, processor_img = loadimgprocessor()
print("Models loaded successfully!")

Loading models...
Loading model from Hugging Face...




Model loaded successfully!
Using device: cpu
Model moved to GPU successfully!
Models loaded successfully!


In [7]:
!mkdir -p templates
with open("templates/index.html", "w") as f:
    f.write("""
    <!DOCTYPE html>
    <html lang="vi">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>AI Crime Detection</title>
        <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">
        <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" rel="stylesheet">
    </head>
    <body class="bg-gray-100 min-h-screen flex flex-col items-center justify-center p-6">
        <header class="text-center mb-6">
            <h1 class="text-4xl font-bold text-gray-800">AI Crime Detection</h1>
            <p class="text-gray-600">Phân tích hành vi đáng ngờ từ ảnh và video</p>
        </header>
        <div class="bg-white p-6 rounded-lg shadow-lg max-w-xl w-full">
            <div class="flex justify-center space-x-4 mb-6">
                <button id="image-tab" class="tab-btn bg-blue-500 text-white">
                    <i class="fas fa-image mr-2"></i> Ảnh
                </button>
                <button id="video-tab" class="tab-btn bg-gray-500 text-white">
                    <i class="fas fa-video mr-2"></i> Video
                </button>
            </div>
            <div id="image-section" class="upload-section">
                <label for="image-input" class="upload-label">
                    <i class="fas fa-cloud-upload-alt text-4xl text-gray-400"></i>
                    <p>Chọn ảnh để phân tích</p>
                </label>
                <input type="file" id="image-input" accept="image/*" class="hidden">
            </div>
            <div id="video-section" class="upload-section hidden">
                <label for="video-input" class="upload-label">
                    <i class="fas fa-cloud-upload-alt text-4xl text-gray-400"></i>
                    <p>Chọn video để phân tích</p>
                </label>
                <input type="file" id="video-input" accept="video/*" class="hidden">
            </div>
            <div id="loading" class="hidden text-center py-4">
                <div class="animate-spin rounded-full h-12 w-12 border-b-2 border-blue-500 mx-auto mb-4"></div>
                <p class="text-gray-600">Đang xử lý...</p>
            </div>
            <div id="preview" class="hidden">
                <h2 class="text-lg font-semibold text-center mb-4">Kết quả phân tích</h2>
                <div class="grid grid-cols-2 gap-4">
                    <div>
                        <h3 class="text-sm font-medium">File gốc</h3>
                        <img id="original-image" class="preview-img hidden">
                        <video id="original-video" class="preview-video hidden" controls type="video/mp4"></video>
                    </div>
                    <div>
                        <h3 class="text-sm font-medium">Kết quả</h3>
                        <img id="result-image" class="preview-img hidden">
                        <div class="mt-8"> <!-- Thêm div để tạo khoảng cách -->
                            <a id="result-video-download" class="hidden bg-blue-500 text-white px-4 py-2 rounded hover:bg-blue-600" href="#" download>Tải video kết quả</a>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <script>
            document.addEventListener('DOMContentLoaded', function() {
                const tabs = {
                    image: document.getElementById('image-tab'),
                    video: document.getElementById('video-tab')
                };
                const sections = {
                    image: document.getElementById('image-section'),
                    video: document.getElementById('video-section')
                };
                const inputs = {
                    image: document.getElementById('image-input'),
                    video: document.getElementById('video-input')
                };
                const preview = document.getElementById('preview');
                const loading = document.getElementById('loading');
                const originalImage = document.getElementById('original-image');
                const originalVideo = document.getElementById('original-video');
                const resultImage = document.getElementById('result-image');
                const resultVideoDownload = document.getElementById('result-video-download');

                function switchTab(type) {
                    Object.keys(tabs).forEach(key => {
                        tabs[key].classList.toggle('bg-blue-500', key === type);
                        tabs[key].classList.toggle('bg-gray-500', key !== type);
                        sections[key].classList.toggle('hidden', key !== type);
                    });
                }

                tabs.image.addEventListener('click', () => switchTab('image'));
                tabs.video.addEventListener('click', () => switchTab('video'));

                async function uploadFile(file, type) {
                    preview.classList.remove('hidden');
                    loading.classList.remove('hidden');
                    resultImage.classList.add('hidden');
                    resultVideoDownload.classList.add('hidden');

                    const url = URL.createObjectURL(file);
                    if (type === 'image') {
                        originalImage.src = url;
                        originalImage.classList.remove('hidden');
                        originalVideo.classList.add('hidden');
                    } else {
                        originalVideo.src = url;
                        originalVideo.classList.remove('hidden');
                        originalImage.classList.add('hidden');
                    }

                    const formData = new FormData();
                    formData.append('file', file);

                    try {
                        const endpoint = type === 'image' ? '/upload/image' : '/upload/video';
                        console.log('Requesting:', window.location.origin + endpoint);
                        const response = await fetch(endpoint, {
                            method: 'POST',
                            body: formData
                        });

                        const responseText = await response.text();
                        console.log('Response status:', response.status);
                        console.log('Response text:', responseText);

                        if (!response.ok) {
                            throw new Error(`Server responded with ${response.status}: ${responseText}`);
                        }

                        const data = JSON.parse(responseText);
                        if (data.error) {
                            throw new Error(data.error);
                        }

                        const fullOutputUrl = window.location.origin + data.output_url;
                        if (type === 'image') {
                            console.log('Setting image src to:', fullOutputUrl);
                            resultImage.src = fullOutputUrl;
                            resultImage.classList.remove('hidden');
                        } else {
                            console.log('Setting video download link to:', fullOutputUrl);
                            resultVideoDownload.href = fullOutputUrl;
                            resultVideoDownload.classList.remove('hidden');
                        }

                        loading.classList.add('hidden');
                    } catch (error) {
                        console.error('Error:', error);
                        alert(`Upload failed: ${error.message}`);
                        loading.classList.add('hidden');
                    }
                }

                inputs.image.addEventListener('change', event => {
                    if (event.target.files[0]) uploadFile(event.target.files[0], 'image');
                });
                inputs.video.addEventListener('change', event => {
                    if (event.target.files[0]) uploadFile(event.target.files[0], 'video');
                });
            });
        </script>
        <style>
            .tab-btn { padding: 10px 20px; border-radius: 8px; transition: 0.3s; }
            .upload-section { text-align: center; padding: 20px; border: 2px dashed #ccc; border-radius: 8px; cursor: pointer; }
            .upload-label { display: block; padding: 30px; cursor: pointer; }
            .preview-img, .preview-video { width: 100%; border-radius: 8px; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); }
        </style>
    </body>
    </html>
    """)

In [8]:
from flask import Flask, request, render_template, send_file, jsonify
import numpy as np
import os
from pyngrok import ngrok, conf # Import conf from pyngrok
# lấy token ngrok từ secrectkey
from google.colab import userdata
conf.get_default().auth_token = userdata.get('ngrok_token')


# Khởi tạo Flask app
app = Flask(__name__)
UPLOAD_FOLDER = "static/uploads"
OUTPUT_FOLDER = "static/outputs"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
os.makedirs(OUTPUT_FOLDER, exist_ok=True)

# kết nối sever ngrok để tọa tunnel
public_url = ngrok.connect(8888).public_url
print(" * ngrok tunnel \"{}\" -> \"http://127.0.0.1:{}/\"".format(public_url, 8888))

# Route chính
@app.route('/')
def index():
    print("Rendering index.html")
    return render_template('index.html')

# Route upload ảnh
@app.route('/upload/image', methods=['POST'])
def upload_image():
    print("Received request to /upload/image")
    try:
        if 'file' not in request.files:
            print("No file part in request")
            return jsonify({'error': 'No file part in request'}), 400

        file = request.files['file']
        if file.filename == '':
            print("No file selected")
            return jsonify({'error': 'No file selected'}), 400

        file_path = os.path.join(UPLOAD_FOLDER, file.filename)
        print(f"Saving file to: {file_path}")
        file.save(file_path)

        output_path = predict_image(file_path)
        output_url = f"/outputs/{os.path.basename(output_path)}"
        print(f"Returning JSON: {{'output_url': '{output_url}'}}")

        return jsonify({'output_url': output_url})
    except Exception as e:
        print(f"Error in upload_image: {str(e)}")
        return jsonify({'error': str(e)}), 500

# Route upload video
@app.route('/upload/video', methods=['POST'])
def upload_video():
    print("Received request to /upload/video")
    try:
        if 'file' not in request.files:
            print("No file part in request")
            return jsonify({'error': 'No file part in request'}), 400

        file = request.files['file']
        if file.filename == '':
            print("No file selected")
            return jsonify({'error': 'No file selected'}), 400

        file_path = os.path.join(UPLOAD_FOLDER, file.filename)
        print(f"Saving file to: {file_path}")
        file.save(file_path)

        output_path = processor.process_video(file_path)
        output_url = f"/outputs/{os.path.basename(output_path)}"
        print(f"Returning JSON: {{'output_url': '{output_url}'}}")

        return jsonify({'output_url': output_url})
    except Exception as e:
        print(f"Error in upload_video: {str(e)}")
        return jsonify({'error': str(e)}), 500

# Route phục vụ file
@app.route('/outputs/<path:filename>')
def serve_output(filename):
    print(f"Serving file: {filename}")
    file_path = os.path.join(OUTPUT_FOLDER, filename)
    if not os.path.exists(file_path):
        print("File not found")
        return jsonify({'error': 'Output file not found'}), 404

    # Gửi file (ảnh hoặc video) để xem hoặc tải
    return send_file(file_path, as_attachment=False, conditional=True)

if __name__ == '__main__':
    app.run(debug=True, host='0.0.0.0', port=8888, use_reloader=False)

 * ngrok tunnel "https://51e4-34-75-135-253.ngrok-free.app" -> "http://127.0.0.1:8888/"
 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:8888
 * Running on http://172.28.0.12:8888
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [22/Mar/2025 02:45:07] "GET / HTTP/1.1" 200 -


Rendering index.html


INFO:werkzeug:127.0.0.1 - - [22/Mar/2025 02:45:17] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -


Received request to /upload/video
Saving file to: static/uploads/output_video_action.mp4
Extracting frames from video...
Video properties: 30 FPS, 1280x720


Extracting frames: 100%|██████████| 436/436 [00:03<00:00, 136.13it/s]


Total frames: 436
Creating frame chunks...
Total chunks: 21
Processing chunks...


Processing chunks: 100%|██████████| 21/21 [05:01<00:00, 14.37s/it]



Creating output video...
Creating output video...
Adding labels to frames...


Processing frames: 100%|██████████| 436/436 [00:03<00:00, 121.70it/s]
INFO:werkzeug:127.0.0.1 - - [22/Mar/2025 02:50:44] "POST /upload/video HTTP/1.1" 200 -


Video saved to static/outputs/video.mp4
Returning JSON: {'output_url': '/outputs/video.mp4'}


In [9]:
# import getpass
# import os
# import threading

# from flask import Flask


# print("Enter your authtoken, which can be copied from https://dashboard.ngrok.com/get-started/your-authtoken")



# app = Flask(__name__)

# # Open a ngrok tunnel to the HTTP server
# public_url = ngrok.connect(5000).public_url
# print(" * ngrok tunnel \"{}\" -> \"http://127.0.0.1:{}/\"".format(public_url, 5000))

# # Update any base URLs to use the public ngrok URL
# app.config["BASE_URL"] = public_url

# # ... Update inbound traffic via APIs to use the public-facing ngrok URL


# # Define Flask routes
# @app.route("/")
# def index():
#     return "Hello from Colab!"

# # # Start the Flask server in a new thread
# # threading.Thread(target=app.run, kwargs={"use_reloader": False}).start()
# app.run()
