In [1]:
!pip install lmdeploy

Collecting lmdeploy
  Downloading lmdeploy-0.8.0-cp311-cp311-manylinux2014_x86_64.whl.metadata (17 kB)
Collecting fastapi (from lmdeploy)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting fire (from lmdeploy)
  Downloading fire-0.7.0.tar.gz (87 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting mmengine-lite (from lmdeploy)
  Downloading mmengine_lite-0.10.7-py3-none-any.whl.metadata (20 kB)
Collecting numpy<2.0.0 (from lmdeploy)
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Collecting outlines (from lmdeploy)
  Downloading outlines-0.2.3-py3-none-any.whl.metadata (18 kB)
Collecting partial-json-parser (from lmdeploy)
  Downloading partial_json_p

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import io
import re
import base64
from PIL import Image
from datetime import datetime
from lmdeploy import pipeline, TurbomindEngineConfig, ChatTemplateConfig, GenerationConfig
from lmdeploy.vl import load_image

In [2]:
def load_time_series(file_path):
    df = pd.read_csv(file_path, parse_dates=['indo_time'], index_col="indo_time")
    if not df.index.is_unique:
        print("Warning: Duplicate timestamps found in the index. Aggregating by taking the mean.")
        df = df.groupby(df.index).mean()
    return df

In [3]:
def create_time_series_image(df, window=None, width=800, height=400, dpi=100):
    plt.figure(figsize=(width/dpi, height/dpi), dpi=dpi)

    # Plot the full time series
    plt.plot(df.index, df['speed'], color='blue', alpha=0.7, label='Speed')

    # If window is provided, highlight it
    if window is not None:
        window_start, window_end = window
        window_df = df.loc[window_start:window_end]
        plt.plot(window_df.index, window_df['speed'], color='red', linewidth=2, label='Current Window')
        plt.axvspan(window_start, window_end, color='yellow', alpha=0.3)

    plt.title('Speed Time Series')
    plt.ylabel('Speed')
    plt.xlabel('Timestamp')
    plt.grid(True, alpha=0.3)
    plt.legend()

    # Save the plot to a bytes buffer
    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    plt.close()
    buf.seek(0)

    # Convert the buffer to a PIL Image
    img = Image.open(buf)
    return img

In [4]:
def create_window_image(window_df, width=800, height=400, dpi=100):
    """
    Create an image focusing only on the window data with statistical information
    """
    plt.figure(figsize=(width/dpi, height/dpi), dpi=dpi)

    # Calculate rolling statistics
    rolling_mean = window_df['speed'].rolling(window=max(3, len(window_df)//10)).mean()
    rolling_std = window_df['speed'].rolling(window=max(3, len(window_df)//10)).std()

    # Plot the window data
    plt.plot(window_df.index, window_df['speed'], color='blue', label='Speed')
    plt.plot(window_df.index, rolling_mean, color='green', label='Rolling Mean')
    plt.fill_between(
        window_df.index,
        rolling_mean - 2*rolling_std,
        rolling_mean + 2*rolling_std,
        color='green',
        alpha=0.2,
        label='±2σ Range'
    )

    # Add statistical information
    mean_val = window_df['speed'].mean()
    std_val = window_df['speed'].std()
    max_val = window_df['speed'].max()
    min_val = window_df['speed'].min()

    plt.axhline(y=mean_val, color='r', linestyle='--', alpha=0.7, label=f'Mean: {mean_val:.2f}')

    # Add annotations
    plt.title('Window Analysis')
    plt.ylabel('Speed')
    plt.xlabel('Timestamp')
    plt.grid(True, alpha=0.3)

    stats_text = f"Mean: {mean_val:.2f}\nStd: {std_val:.2f}\nMax: {max_val:.2f}\nMin: {min_val:.2f}"
    plt.annotate(stats_text, xy=(0.02, 0.95), xycoords='axes fraction',
                 bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="gray", alpha=0.8))

    plt.legend()

    # Save the plot to a bytes buffer
    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    plt.close()
    buf.seek(0)

    # Convert the buffer to a PIL Image
    img = Image.open(buf)
    return img

In [5]:
def generate_vlm_prompt(window_df, full_df, window_index):
    """
    Generate a prompt for the VLM to analyze the window data
    """
    prompt = f"""Analyze these two time series visualizations for anomaly detection (window #{window_index}):

The first image shows the entire time series with the current analysis window highlighted in yellow.
The second image shows a detailed view of just the current window with statistical information.

Task: Based on visual patterns, detect if there are any anomalies (unusual patterns, sudden changes, outliers) in the current window.
Anomalies are very rare but may occur. Look at the entire series and by using it along with the current series make decision.

Instruction : Don't misclassify any normal point as anomaly as this will be quite dangerous. Hence think step by step and critically before classifying.

If you detect anomalies, provide the approximate start and end points within the window.
Format your response as: "ANOMALY_DETECTED: true/false; START_POINT: timestamp or position; END_POINT: timestamp or position; REASON: brief explanation"
If no anomalies, respond with: "ANOMALY_DETECTED: false"
"""
    return prompt

In [6]:
def parse_vlm_response(response, window_start, window_end):
    """
    Parse the VLM response to extract anomaly information
    """
    result = {
        "window_start": window_start,
        "window_end": window_end,
        "anomaly_detected": False,
        "anomaly_start": None,
        "anomaly_end": None,
        "reason": None
    }

    # Check if anomaly was detected
    if "ANOMALY_DETECTED: true" in response:
        result["anomaly_detected"] = True

        # Try to extract start point
        start_matches = re.findall(r"START_POINT:([^;]+)", response)
        if start_matches:
            start_point = start_matches[0].strip()
            # If it's a timestamp, try to parse it
            try:
                result["anomaly_start"] = pd.to_datetime(start_point)
            except:
                # If it's a relative position, use window_start as reference
                result["anomaly_start"] = window_start

        # Try to extract end point
        end_matches = re.findall(r"END_POINT:([^;]+)", response)
        if end_matches:
            end_point = end_matches[0].strip()
            # If it's a timestamp, try to parse it
            try:
                result["anomaly_end"] = pd.to_datetime(end_point)
            except:
                # If it's a relative position, use window_end as reference
                result["anomaly_end"] = window_end

        # Extract reason if available
        reason_matches = re.findall(r"REASON:([^;]+)", response)
        if reason_matches:
            result["reason"] = reason_matches[0].strip()

    return result

In [7]:
def detect_anomalies_with_vlm(df_path, window_size=None, output_path=None, overlap=0.5, model_name='OpenGVLab/InternVL2_5-8B'):
    # Load the data
    df = load_time_series(df_path)

    # If window_size is not specified, use 5% of the total time range
    if window_size is None:
        total_time_range = df.index[-1] - df.index[0]
        # Convert to timedelta in seconds
        total_seconds = total_time_range.total_seconds()
        # Use 5% of the total time range
        window_seconds = int(total_seconds * 0.05)
        window_size = pd.Timedelta(seconds=window_seconds)
    else:
        window_size = pd.Timedelta(window_size, "min")

    # Calculate window step size based on overlap
    step_size = window_size * (1 - overlap)

    # Generate windows
    windows = []
    start_time = df.index[0]
    end_time = df.index[-1]

    current_start = start_time
    while current_start < end_time:
        current_end = current_start + window_size
        if current_end > end_time:
            current_end = end_time

        windows.append((current_start, current_end))
        current_start += step_size

        if current_start >= end_time:
            break

    print(f"Created {len(windows)} windows for analysis")

    # Initialize the VLM pipeline
    pipe = pipeline(model_name, backend_config=TurbomindEngineConfig(session_len=32768), chat_template_config=ChatTemplateConfig(model_name='internvl2_5'))

    # Process each window
    results = []

    for i, (window_start, window_end) in enumerate(windows):
        print(f"Processing window {i+1}/{len(windows)}: {window_start} to {window_end}")

        # Get window data
        window_df = df.loc[window_start:window_end]

        # Skip if window is empty
        if len(window_df) < 2:
            print(f"Skipping window {i+1} because it contains insufficient data points")
            continue

        # Create images
        full_image = create_time_series_image(df, window=(window_start, window_end))
        window_image = create_window_image(window_df)

        # Generate prompt
        prompt = generate_vlm_prompt(window_df, df, i+1)

        # Prepare images for the VLM
        images = [full_image, window_image]

        # Run VLM inference
        response = pipe((prompt, images), gen_config=GenerationConfig(max_new_tokens=1024, temperature=0))

        # Parse results
        result = parse_vlm_response(response.text, window_start, window_end)
        result["window_index"] = i+1
        result["vlm_response"] = response.text

        # Add to results
        results.append(result)

        print(f"Window {i+1} processed. Anomaly detected: {result['anomaly_detected']}")

    # Save results if output_path is provided
    if output_path:
        # Convert results to a format that can be easily saved
        save_results = []
        for result in results:
            save_result = {
                "window_index": result["window_index"],
                "window_start": str(result["window_start"]),
                "window_end": str(result["window_end"]),
                "anomaly_detected": result["anomaly_detected"],
                "anomaly_start": str(result["anomaly_start"]) if result["anomaly_start"] else None,
                "anomaly_end": str(result["anomaly_end"]) if result["anomaly_end"] else None,
                "reason": result["reason"],
                "vlm_response": result["vlm_response"]
            }
            save_results.append(save_result)

        # Save as JSON
        import json
        print(save_results)
        with open(output_path, 'w') as f:
            json.dump(save_results, f, indent=2)

        print(f"Results saved to {output_path}")

    return results

In [8]:
if __name__ == "__main__":

    model = 'OpenGVLab/InternVL3-2B'
    input_path = "/content/AGC_Data.csv"
    window = 60
    output_path = "/content/result"
    overlap = 0

    results = detect_anomalies_with_vlm(
        df_path=input_path,
        window_size=window,
        output_path=output_path,
        overlap=overlap,
        model_name=model
    )

    # Print summary of results
    anomaly_count = sum(1 for r in results if r["anomaly_detected"])
    print(f"\nAnalysis complete. Found {anomaly_count} windows with anomalies out of {len(results)} total windows.")

    if anomaly_count > 0:
        print("\nWindows with anomalies:")
        for r in results:
            if r["anomaly_detected"]:
                print(f"Window {r['window_index']}: {r['window_start']} to {r['window_end']}")
                print(f"  Anomaly: {r['anomaly_start']} to {r['anomaly_end']}")
                print(f"  Reason: {r['reason']}")
                print()

Created 267 windows for analysis


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Fetching 20 files:   0%|          | 0/20 [00:00<?, ?it/s]

FlashAttention2 is not installed.




Processing window 1/267: 2024-09-03 20:49:32+07:00 to 2024-09-03 21:49:32+07:00
Window 1 processed. Anomaly detected: True
Processing window 2/267: 2024-09-03 21:49:32+07:00 to 2024-09-03 22:49:32+07:00
Window 2 processed. Anomaly detected: True
Processing window 3/267: 2024-09-03 22:49:32+07:00 to 2024-09-03 23:49:32+07:00
Window 3 processed. Anomaly detected: True
Processing window 4/267: 2024-09-03 23:49:32+07:00 to 2024-09-04 00:49:32+07:00
Window 4 processed. Anomaly detected: True
Processing window 5/267: 2024-09-04 00:49:32+07:00 to 2024-09-04 01:49:32+07:00
Window 5 processed. Anomaly detected: True
Processing window 6/267: 2024-09-04 01:49:32+07:00 to 2024-09-04 02:49:32+07:00
Window 6 processed. Anomaly detected: True
Processing window 7/267: 2024-09-04 02:49:32+07:00 to 2024-09-04 03:49:32+07:00
Window 7 processed. Anomaly detected: True
Processing window 8/267: 2024-09-04 03:49:32+07:00 to 2024-09-04 04:49:32+07:00
Window 8 processed. Anomaly detected: True
Processing windo

In [9]:
import torch
torch.cuda.empty_cache()