In [None]:
!pip install moviepy pydub openai-whisper easyocr transformers Pillow exifread


Collecting openai-whisper
  Downloading openai_whisper-20250625.tar.gz (803 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/803.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m798.7/803.2 kB[0m [31m39.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m803.2/803.2 kB[0m [31m20.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting easyocr
  Downloading easyocr-1.7.2-py3-none-any.whl.metadata (10 kB)
Collecting exifread
  Downloading exifread-3.5.1-py3-none-any.whl.metadata (10 kB)
Collecting python-bidi (from easyocr)
  Downloading python_bidi-0.6.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
Collecting pyclipper (from easyocr)
  Downloading pyclipper-1.3.0.

In [None]:
import moviepy.editor as mp
from pydub import AudioSegment
import whisper
import easyocr
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
import numpy as np

  IMAGEMAGICK_BINARY = r"C:\Program Files\ImageMagick-6.8.8-Q16\magick.exe"
  lines_video = [l for l in lines if ' Video: ' in l and re.search('\d+x\d+', l)]
  rotation_lines = [l for l in lines if 'rotate          :' in l and re.search('\d+$', l)]
  match = re.search('\d+$', rotation_line)
  if event.key is 'enter':

  m = re.match('([su]([0-9]{1,2})p?) \(([0-9]{1,2}) bit\)$', token)

  m2 = re.match('([su]([0-9]{1,2})p?)( \(default\))?$', token)

  elif re.match('(flt)p?( \(default\))?$', token):

  elif re.match('(dbl)p?( \(default\))?$', token):



In [4]:
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
ocr_reader = easyocr.Reader(["en"])
whisper_model = whisper.load_model("base")

# -----------------------------
# 1. Extract video frames
# -----------------------------
def extract_frames(video_path, every_n_seconds=5):
    clip = mp.VideoFileClip(video_path)
    frames = []
    for t in np.arange(0, clip.duration, every_n_seconds):
        frame = clip.get_frame(t)  # returns numpy array
        frames.append(Image.fromarray(frame))
    return frames

# -----------------------------
# 2. Frame analysis (like image analyzer)
# -----------------------------
RISK_WEIGHTS = {
    "face": 35,
    "office/logo": 20,
    "document": 40,
    "group": 15,
    "street sign": 15,
    "ocr_text": 25
}

def analyze_frame(frame):
    # OCR text
    results = ocr_reader.readtext(np.array(frame))
    ocr_text = [res[1] for res in results]

    # CLIP classification
    inputs = clip_processor(
        text=["a person face", "an office with logo", "a document", "a group of people", "a street sign"],
        images=frame,
        return_tensors="pt",
        padding=True
    )
    outputs = clip_model(**inputs)
    probs = outputs.logits_per_image.softmax(dim=1).detach().numpy()[0]
    labels = ["face", "office/logo", "document", "group", "street sign"]
    clip_analysis = {labels[i]: float(probs[i]) for i in range(len(labels))}

    return {"ocr_text": ocr_text, "clip_analysis": clip_analysis}

# -----------------------------
# 3. Extract audio from video
# -----------------------------
def extract_audio_from_video(video_path, audio_path="temp_audio.wav"):
    clip = mp.VideoFileClip(video_path)
    clip.audio.write_audiofile(audio_path)
    return audio_path

def analyze_audio(audio_path):
    # Extract metadata
    audio = AudioSegment.from_file(audio_path)
    metadata = {
        "duration_seconds": len(audio)/1000,
        "channels": audio.channels,
        "frame_rate": audio.frame_rate
    }
    # Transcription
    transcript = whisper_model.transcribe(audio_path)["text"]
    return {"metadata": metadata, "transcript": transcript}

# -----------------------------
# 4. Compute video exposure score
# -----------------------------
def compute_video_exposure_score(frames_analysis, audio_analysis):
    total = 0
    details = []

    # Frame CLIP contributions
    for fa in frames_analysis:
        for label, prob in fa["clip_analysis"].items():
            if prob > 0.5:
                contrib = RISK_WEIGHTS.get(label, 0) * prob
                total += contrib
                details.append((label, round(contrib,2)))

        # OCR text
        if fa["ocr_text"]:
            total += RISK_WEIGHTS["ocr_text"]
            details.append(("ocr_text", RISK_WEIGHTS["ocr_text"]))

    # Audio keyword detection
    SENSITIVE_KEYWORDS = ["password", "ssn", "credit card", "secret", "confidential"]
    keyword_count = sum(audio_analysis["transcript"].lower().count(kw) for kw in SENSITIVE_KEYWORDS)
    if keyword_count:
        contrib = min(50, keyword_count * 10)
        total += contrib
        details.append(("audio_sensitive_keywords", contrib))

    total = min(100, round(total,2))
    return {"exposure_score": total, "details": details}

# -----------------------------
# 5. Full video analysis
# -----------------------------
def analyze_video(video_path):
    print("Extracting frames...")
    frames = extract_frames(video_path, every_n_seconds=5)

    print("Analyzing frames...")
    frames_analysis = [analyze_frame(f) for f in frames]

    print("Extracting audio...")
    audio_path = extract_audio_from_video(video_path)

    print("Analyzing audio...")
    audio_analysis = analyze_audio(audio_path)

    print("Computing exposure score...")
    score = compute_video_exposure_score(frames_analysis, audio_analysis)

    return {"frames_analysis": frames_analysis, "audio_analysis": audio_analysis, "score": score}

# -----------------------------
# 6. Colab file upload & run
# -----------------------------
from google.colab import files
uploaded = files.upload()

for filename in uploaded.keys():
    print(f"\nAnalyzing {filename}...\n")
    result = analyze_video(filename)
    print(f"\nOverall Exposure Score: {result['score']['exposure_score']}/100")
    print("Details:")
    for d in result["score"]["details"]:
        print(f"- {d[0]} → +{d[1]}")
    print(f"\nAudio Transcript:\n{result['audio_analysis']['transcript']}")

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]



Saving 6247922-uhd_2160_3840_24fps.mp4 to 6247922-uhd_2160_3840_24fps.mp4

Analyzing 6247922-uhd_2160_3840_24fps.mp4...

Extracting frames...
Analyzing frames...
Extracting audio...
MoviePy - Writing audio in temp_audio.wav





MoviePy - Done.
Analyzing audio...
Computing exposure score...

Overall Exposure Score: 35.35/100
Details:
- group → +9.4
- face → +25.95

Audio Transcript:

