# Annotate Camera History with Flood Classification Prediction

In [1]:
cd ../

C:\Users\luisr\Desktop\Repositories\Data Science Projects\Hackaton COR IV - Centro de Operações do RJ\INCUBAÇÃO\Cameras


In [2]:
random_state = 0

#### Function to display train and test classes counting

In [3]:
import pandas as pd

def split_class_count(y_train, y_test):
    display(pd.concat([
        pd.Series(y_train).value_counts().to_frame('Train set'),
        pd.Series(y_test).value_counts().to_frame('Test set')
    ], axis=1))

  from pandas.core.computation.check import NUMEXPR_INSTALLED


#### Class to load frames from labeled videos as labeled images

In [4]:
import os, cv2, numpy as np, pandas as pd
from datetime import datetime as dt
from IPython.display import clear_output as co

def timestamp_parser(path):
    file_name = path.split('/')[-1]
    stamp = ' '.join(file_name.split(' ')[1:])
    return dt.strptime(stamp, '%Y-%m-%d %H-%M-%S.mp4')

class VideoProcessor:
        
    def __init__(self, fps=3, frame_dimension=1, frame_processing_function=None, filename_to_timestamp_function=timestamp_parser):
        """  """
        self.fps = fps
        self.frame_dimension = frame_dimension
        self.frame_processing_function = frame_processing_function
        self.filename_to_timestamp_function = filename_to_timestamp_function
        
    def process_labeled_videos(self, videos_dataframe, path_key, print_each=None):
        """
        parameters:
            videos - pandas dataframe containing a column with name `path_key`, with paths to a set of videos.
            path_key - name of column containing path to videos in local file system
        """
        i, n  = 0, len(videos_dataframe)
        results = []
        for idx, video_object in videos_dataframe.iterrows():
            frames_results = self.process_frames(video_object, path_key)
            results += frames_results
            i += 1
            if print_each is not None and i % print_each == 0:
                co(True); print(f'LABELED VIDEOS PROCESSING · PROGRESS: {i}/{n}')
        return results

    def process_frames(self, video_object, path_key):
        path = video_object[path_key]
        timestamp = None
        if self.filename_to_timestamp_function is not None:
            timestamp = self.filename_to_timestamp_function(path)
            offset = pd.offsets.Second() / self.fps
        cap = cv2.VideoCapture(path)
        if not cap.isOpened():
            print(f"CANNOT OPEN VIDEO CAPTURE · PATH: {path}")
            return []
        frames_results = []
        while True:
            ret, frame = cap.read()
            if not ret:
                break # stream finished
            if self.frame_dimension == 1: # 1D flat frame
                frame = np.reshape(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY), -1)
            if self.frame_dimension == 2: # 2D gray scale frame
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            if self.filename_to_timestamp_function is not None:
                timestamp += offset # frame time stamp update
            result = self.frame_processing_function(frame, metadata=video_object.to_dict(), timestamp=timestamp)
            frames_results.append(result)
        cap.release(); cv2.destroyAllWindows()
        return frames_results

#### Class to write videos

In [5]:
import os, cv2

class Video:

    def __init__(self, codec:str='mp4v', fps:int=3, shape:tuple=(854, 480), overwrite=False):
        self.codec = codec; self.fps = fps; self.shape = shape
        self.overwrite = overwrite

    def writer(self, path):
        if not self.overwrite and os.path.exists(path):
            print(f'ANNOTATE VIDEO TIMESTAMP FAILED. FILE ALREADY EXISTS · FILE-PATH: {path}')
            return False
        return cv2.VideoWriter(path, cv2.VideoWriter_fourcc(*self.codec), self.fps, self.shape)

---
## Reload labeled flood videos dataset

In [7]:
import pandas as pd

videos = pd.read_csv('Dados/Rotulos/1475_2023-02-07.csv')

# preprocessing - blob/file name fix · replace `:` for `-`
videos['blob_name'] = [blob_name.replace(':', '-') for blob_name in videos['blob_name']]

# preprocessing - datetime conversion
videos['timestamp'] = pd.to_datetime(videos['timestamp'])
videos = videos.set_index('timestamp', drop=True).sort_index()

# preprocessing - drop videos larger than `video_max_bytes`
video_max_bytes = 5e6 # 5 Mb
original_shape = videos.shape
videos = videos[videos['blob_size'] < video_max_bytes]

display(videos[['blob_name', 'tag']].head())
print(f'Shape: {original_shape}')
print(f'\n - Shape after processing: {videos.shape}')

Unnamed: 0_level_0,blob_name,tag
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-02-07 19:25:00,polygons/flood-unlabeled/1/1475/CODE1475 2023-...,alagamento
2023-02-07 19:30:00,polygons/flood-unlabeled/1/1475/CODE1475 2023-...,alagamento
2023-02-07 19:35:00,polygons/flood-unlabeled/1/1475/CODE1475 2023-...,alagamento
2023-02-07 19:40:00,polygons/flood-unlabeled/1/1475/CODE1475 2023-...,alagamento
2023-02-07 19:50:00,polygons/flood-unlabeled/1/1475/CODE1475 2023-...,alagamento


Shape: (165, 8)

 - Shape after processing: (163, 8)


---
## Video Metadata Preprocessing

#### Binarize target variable categories

In [8]:
replace_tags = {
    'acúmulo': ['lâmina', 'bolsão', 'alagamento'],
    'normalidade': ['poça', 'normalidade'],
}

y_true = []
for tag in videos['tag']:
    for key, values in replace_tags.items():
        if tag in values: y_true.append(key)

y_true = pd.Series(y_true, index=videos.index)

videos['label'] = y_true

display(videos['label'].value_counts().to_frame('Video Samples'))

Unnamed: 0_level_0,Video Samples
label,Unnamed: 1_level_1
normalidade,128
acúmulo,35


---
## Load Flood Classifier Model

#### Load flood image classifier

In [9]:
import pickle, pandas as pd

filename = 'code1475.sav'
model = pickle.load(open(filename, 'rb'))

#### Image processing function

In [10]:
# Get image classification model prediction and metadata

def classify_image(frame, metadata, timestamp=None):
    prediction = model.predict([frame])[0]
    return pd.Series({
        'prediction': prediction,
        'timestamp': timestamp,
        **metadata
    })

---
## Process labeled videos from single camera

#### Processing pipeline:
1. Classify frame
1. Annotate frame with timestamp, label and prediction
1. Write frame to video file

#### Custom functions to process videos' frames individually

In [13]:
def add_timestamp_to_frame(frame, text):
    cv2.putText(frame, text, org=(542, 27), fontFace=cv2.FONT_HERSHEY_COMPLEX,
    fontScale=0.67, color=(0, 0, 0), thickness=2, lineType=cv2.LINE_8)
    cv2.putText(frame, text, org=(540, 25), fontFace=cv2.FONT_HERSHEY_COMPLEX,
    fontScale=0.67, color=(40, 230, 230), thickness=2, lineType=cv2.LINE_8)
    return frame

def history_write(frame, metadata, timestamp):
    frame_1d = np.reshape(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY), -1) # 1D from gray 2D image (reduces columns 3 times)
    label = metadata['label']
    timestamp_str =  timestamp.strftime('OCTA %d/%m/%Y %H:%M:%S')
    prediction = model.predict([frame_1d])[0]
    text_label = f'Rotulo: {label.upper().replace("Ú", "U")}'
    text_prediction = f'Previsao (IA): {prediction.upper().replace("Ú", "U")}'
    frame = add_timestamp_to_frame(frame, timestamp_str)  # put timestamp text
    cv2.putText(
        frame, text_label, org=(20, 425), fontFace=cv2.FONT_HERSHEY_COMPLEX,
        fontScale=0.67, color=(40, 230, 230), thickness=2
    )
    cv2.putText(
        frame, text_prediction, org=(20, 460), fontFace=cv2.FONT_HERSHEY_COMPLEX,
        fontScale=0.67, color=(40, 230, 230), thickness=2
    )
    history_writer.write(frame)
    return {
        'write': 'success',
        'prediction': prediction,
        'timestamp': timestamp,
        **metadata,
    }

#### Videos frames pipeline · Concatenate multiple videos

In [12]:
# Pipeline parameters
pre_sampled = False
process = history_write
folder = 'Dados/flood-video-collection'

# Video parameters
fps=15
overwrite=True
full_history_video_path = 'Dados/Simulação/history-labeled-code1475.mp4'

if pre_sampled:
    X = xx.copy()  # pre-sampled from loaded videos dataset
else:
    X = videos.copy()  # complete loaded videos dataset

X['path'] = f'{folder}/' + X['blob_name']
X = X.sort_index().reset_index()

# OPEN VIDEO FILE WRITER
video = Video(fps=fps, overwrite=overwrite)
history_writer = video.writer(path=full_history_video_path)

# RUN FRAME PIPELINE
pipeline = VideoProcessor(frame_dimension=3, process=process)
results = pipeline.process_labeled_videos(videos=X, path_key='path', print_each=1)

# CLOSE/RELEASE VIDEO FILE WRITER
history_writer.release(); cv2.destroyAllWindows()

print(f'\nVideos Selected: {len(X)}')
print(f'Frames Loaded: {len(results)}')
print()

LABELED VIDEOS PROCESSING · PROGRESS: 163/163

Videos Selected: 163
Frames Loaded: 5419



#### Evaluate result

In [14]:
frames_concat = pd.DataFrame(results)

#### Classification Model Real Conditions Simulation Report

from sklearn.metrics import classification_report

# Get frames labels and predictions
y_true = frames_concat['label']
y_pred = frames_concat['prediction']

display(frames_concat['write'].value_counts().to_frame('Operation Success'))
print()

print(classification_report(y_true, y_pred))
print(f'\n * Accuracy: {round((y_true == y_pred).mean(), 2)}')
print(f'\n * Mislabeled data points: {(y_true != y_pred).sum()} / {len(y_true)}')

Unnamed: 0_level_0,Operation Success
write,Unnamed: 1_level_1
success,5419



              precision    recall  f1-score   support

     acúmulo       0.94      0.94      0.94       978
 normalidade       0.99      0.99      0.99      4441

    accuracy                           0.98      5419
   macro avg       0.96      0.97      0.96      5419
weighted avg       0.98      0.98      0.98      5419


 * Accuracy: 0.98

 * Mislabeled data points: 115 / 5419
