# Annotate Camera History with Flood Classification Prediction

In [1]:
cd ../

C:\Users\luisr\Desktop\Repositories\Data Science Projects\Hackaton COR IV - Centro de Operações do RJ\INCUBAÇÃO\Cameras


In [2]:
random_state = 0

#### Class to load frames from labeled videos as labeled images

In [3]:
from modules.video_frame_processing import VideoProcessor

#### Function to binarize target variable class labels * CHANGE FUNCTION NAME TO `replace_tags(y, tags)` or something else

In [4]:
import pandas as pd

def relabel(Y: pd.Series, labels: dict) -> pd.Series:
    """
    Relabels the given Pandas series with the corresponding label from the given dictionary.

    Parameters:
    Y (pd.Series): Pandas series to relabel
    labels (dict): Dictionary containing the corresponding labels for each tag

    Returns:
    pd.Series: Relabeled Pandas series
    """
    y_true = []
    for tag in Y:
        for key, values in labels.items():
            if tag in values:
                y_true.append(key)
                break
    return pd.Series(y_true, index=Y.index)

#### Function to display target variable class count of train and test sets

In [5]:
import pandas as pd

def split_class_count(y_train, y_test):
    display(pd.concat([
        pd.Series(y_train).value_counts().to_frame('Train set'),
        pd.Series(y_test).value_counts().to_frame('Test set')
    ], axis=1))

#### Class to write videos

In [6]:
import os, cv2

class Video:

    def __init__(self, codec:str='mp4v', fps:int=3, shape:tuple=(854, 480), overwrite=False):
        self.codec = codec; self.fps = fps; self.shape = shape
        self.overwrite = overwrite

    def writer(self, path):
        if not self.overwrite and os.path.exists(path):
            print(f'ANNOTATE VIDEO TIMESTAMP FAILED. FILE ALREADY EXISTS · FILE-PATH: {path}')
            return False
        return cv2.VideoWriter(path, cv2.VideoWriter_fourcc(*self.codec), self.fps, self.shape)

### Define the camera code

In [7]:
code = 1487 # 1475 # 1649 # 1648 # 1487

dataset_date = '2023-04-29'
model_date = '2023-05-06'

---
### Reload labeled flood videos dataset

In [8]:
import pandas as pd

videos = pd.read_csv(f'Dados/Rotulados/videos_labeled_CODE{code}_{dataset_date}.csv')

# preprocessing - blob/file name fix · replace `:` for `-`
videos['blob_name'] = [blob_name.replace(':', '-') for blob_name in videos['blob_name']]

# preprocessing - datetime conversion
videos['timestamp'] = pd.to_datetime(videos['timestamp'])
videos = videos.set_index('timestamp', drop=True).sort_index()

# preprocessing - drop videos larger than `video_max_bytes`
video_max_bytes = 5e6 # 5 Mb
original_shape = videos.shape
# videos = videos[videos['blob_size'] < video_max_bytes]

display(videos[['blob_name', 'tag']].head())
print(f'Shape: {original_shape}')
print(f'\n - Shape after processing: {videos.shape}')

mega_bytes = round(videos["blob_size"].sum() / 1e6, 3)
print(f'\nTotal video bytes: {mega_bytes} Mb')

Unnamed: 0_level_0,blob_name,tag
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-02-07 19:25:00,polygons/flood-unlabeled/-1/1487/CODE1487 2023...,0.2
2023-02-07 19:30:00,polygons/flood-unlabeled/-1/1487/CODE1487 2023...,0.2
2023-02-07 19:35:00,polygons/flood-unlabeled/-1/1487/CODE1487 2023...,0.2
2023-02-07 19:50:00,polygons/flood-unlabeled/-1/1487/CODE1487 2023...,1.0
2023-02-07 19:54:00,polygons/flood-unlabeled/-1/1487/CODE1487 2023...,1.0


Shape: (360, 8)

 - Shape after processing: (360, 8)

Total video bytes: 338.954 Mb


---
### Reload labeled flood images dataset

In [9]:
import pandas as pd

# code = 1648 # 1487

images = pd.read_csv(f'Dados/Rotulados/images_labeled_CODE{code}_{dataset_date}.csv')

# preprocessing
images.sort_values('image_timestamp', inplace=True)
# images['image_timestamp'] = pd.to_datetime(images['image_timestamp'])
# images = images.set_index('image_timestamp', drop=True).sort_index()

# drop images larger than `video_max_bytes`
image_max_bytes = 5e6 # 5 Mb
original_shape = images.shape
# images = images[images['blob_size'] < image_max_bytes]

display(images[['blob_name', 'tag']].head())
print(f'Shape: {original_shape}')
print(f'\n - Shape after processing: {images.shape}')

mega_bytes = round(images.drop_duplicates('blob_name')["blob_size"].sum() / 1e6, 3)
print(f'\nTotal video bytes: {mega_bytes} Mb')

Unnamed: 0,blob_name,tag
0,polygons/flood-unlabeled/-1/1487/CODE1487 2023...,0.2
1,polygons/flood-unlabeled/-1/1487/CODE1487 2023...,0.2
2,polygons/flood-unlabeled/-1/1487/CODE1487 2023...,0.2
3,polygons/flood-unlabeled/-1/1487/CODE1487 2023...,0.2
4,polygons/flood-unlabeled/-1/1487/CODE1487 2023...,0.2


Shape: (11354, 10)

 - Shape after processing: (11354, 10)

Total video bytes: 265.168 Mb


#### Set image reference dataset · IMPORTANT

In [10]:
# replace_tags = {
#     'acúmulo': ['lâmina', 'bolsão', 'alagamento'],
#     'normalidade': ['poça', 'normalidade'],
# }

replace_tags = {
    0: [0.0],
    1: [0.2],
    2: [0.4],
    3: [0.6],
    4: [0.8],
    5: [1.0],
}

image_reference = images.set_index(['blob_name', 'image_timestamp'])
image_reference['label'] = relabel(images['tag'], replace_tags).values # overrides `label` field from videos dataset

---
## Video Metadata Preprocessing

#### Binarize target variable categories

In [11]:
# replace_tags = {
#     'acúmulo': ['lâmina', 'bolsão', 'alagamento'],
#     'normalidade': ['poça', 'normalidade'],
# }

replace_tags = {
    0: [0.0],
    1: [0.2],
    2: [0.4],
    3: [0.6],
    4: [0.8],
    5: [1.0],
}

videos['label'] = relabel(videos['tag'], replace_tags)  # update videos dataset

display(videos['label'].value_counts().to_frame('Video Samples'))

Unnamed: 0,Video Samples
0,276
1,70
3,6
5,5
2,3


---
## Load Flood Image Classifier Model

#### Class to wrap regression models with rounded prediction functionality

In [14]:
import numpy as np

class RounderRegressor:
    
    bins_default = np.array([0.0, 0.2, 0.4, 0.6, 0.8, 1.0])

    def __init__(self, model=None, fitted_model=None, bins=bins_default):
        self.model = model
        self.fitted_model = fitted_model
        self.bins = bins
        
    def round_close(self, Y, bins=bins_default):
        Y_round = []
        for value in Y:
            diffs = np.abs(bins - value)
            rounded_value = bins[diffs==diffs.min()][0]
            Y_round.append(rounded_value)
        return np.array(Y_round)

    def fit(self, x, y):
        self.fitted_model = self.model.fit(x, y)
        return self
    
    def predict(self, x):
        y_pred = self.fitted_model.predict(x)
        y_pred_round = self.round_close(y_pred, self.bins)
        return y_pred_round

### Load flood image classifier

In [25]:
import pickle, pandas as pd

# code = 1648

file_name = f'models/water-level/code{code}_{model_date}.sav'
model = pickle.load(open(file_name, 'rb'))

# Image processing function

# Get image classification model prediction and metadata

def classify_image(frame, metadata, timestamp=None):
    prediction = model.predict([frame])[0]
    return pd.Series({
        'prediction': prediction,
        'timestamp': timestamp,
        **metadata
    })

### Load Histogram Dissimilarity Classifier

#### Function to load model

In [106]:
#### Import modules

import cv2
from modules.histcomparison import HistogramDissimilarityAnalysis

# Function to load HDA model

def get_hda(code, folder):
    code_ref = f'{folder}/{code}/reference'
    base_str = f'CODE{code}'
    base_filepath = f'{code_ref}/{base_str}_'
    code_back = cv2.imread(f"{base_filepath}MEAN.jpg")
    code_mask = cv2.imread(f"{base_filepath}MASK.jpg")
    code_day = cv2.imread(f"{base_filepath}day.jpg")
    code_night = cv2.imread(f"{base_filepath}night.jpg")
    code_puddle = cv2.imread(f"{base_filepath}puddle.jpg")
    code_flood = cv2.imread(f"{base_filepath}flood.jpg")
    return HistogramDissimilarityAnalysis(code, code_back, code_mask, code_day, code_night, code_puddle, code_flood)


# Load model instance

# code = 1648
model = get_hda(code, folder='Dados/images')

---
## Process labeled videos from single camera

#### Processing pipeline:
1. Classify frame
1. Annotate frame with timestamp, label and prediction
1. Write frame to video file

#### Custom functions to process videos' frames individually

In [26]:
import numpy as np

def blob_name_path_fix(blob_name):
    blob_info = blob_name.split('/')
    file_info = blob_info[-1].split(' ')
    file_time_fix = file_info[-1].replace('-', ':')
    blob_name_fix = '/'.join(blob_info[:-1] + [' '.join(file_info[:-1] + [file_time_fix])])
    return blob_name_fix

def round_ms(stamp):
    n_digits = len(stamp)
    if n_digits > 23:
        digits_out = - (n_digits - 23)
        stamp = stamp[:digits_out]
    elif n_digits < 23 and n_digits > 19:
        digits_in = 23 - n_digits
        stamp = stamp + '0' * digits_in
    elif n_digits <= 19:
        digits_in = 19 - n_digits
        stamp = stamp + '0' * digits_in + '.' + '000'
    return stamp

def add_timestamp_to_frame(frame, text):
    cv2.putText(frame, text, org=(542, 27), fontFace=cv2.FONT_HERSHEY_COMPLEX,
    fontScale=0.67, color=(0, 0, 0), thickness=2, lineType=cv2.LINE_8)
    cv2.putText(frame, text, org=(540, 25), fontFace=cv2.FONT_HERSHEY_COMPLEX,
    fontScale=0.67, color=(40, 230, 230), thickness=2, lineType=cv2.LINE_8)
    return frame

def history_write(frame, timestamp, metadata):
    
    """
    expects to be deefined outside of function:
        model - model with predict method
        history_writer
        image_reference
        LABEL_TYPE
        MODEL_TYPE
    """
        
    # get image`blob_name` and `image_timstamp` references
    blob_name = metadata['blob_name']
    
    # fix `blob_name` file name format
    blob_name_fix = blob_name_path_fix(blob_name)
    
    # format image timestamp to match the image dataset's format
    image_timestamp = timestamp.round('ms')  # round timestamp to miliseconds
    image_timestamp_str = round_ms(str(image_timestamp))  # round seconds of timestamp string to 3 decimal places
    
    # get image metadata
    image_metadata = {'label': ''}
    for name in (blob_name, blob_name_fix): 
        idx = (name, image_timestamp_str)
        try:
            image_metadata = image_reference.loc[idx]
        except:
            pass

    # format image timestamp string
    timestamp_str =  timestamp.strftime('OCTA %d/%m/%Y %H:%M:%S')
    frame = add_timestamp_to_frame(frame, timestamp_str)  # put timestamp text

    # get image label
    # label = metadata['label']  # video label
    label = image_metadata['label']  # image label
    
    # convert image dimension to 1D
    frame_1d = np.reshape(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY), -1) # 1D from gray 2D image (reduces columns 3 times)

    # get prediction from 1D image
    if MODEL_TYPE in ['HDA']:
        prediction = model.predict(frame)        
    else:
        prediction = model.predict([frame_1d])[0]

    # format video text annotations
    label_text = label
    prediction_text = prediction
    if MODEL_TYPE in ['HDA', 'GNB']:
        label_text = label_text.upper().replace("Ú", "U")
        prediction_text = prediction_text.upper().replace("Ú", "U")
        label_text = f'Rotulo: {label_text}'
        prediction_text = f'Previsao (IA): {prediction_text}'
    elif MODEL_TYPE in ['LR']:
        label_text = str(round(label_text * 1/5 * 100, 1)) + ' %' if label_text != '' else 'NAO-ROTULADO'
        prediction_text = str(round(prediction_text * 100, 1)) + ' %'
        label_text = f'Nivel real: {label_text}'
        prediction_text = f'Nivel detectado (IA): {prediction_text}'
    
    
    # put text annotation on video
    cv2.putText(
        frame, label_text, org=(20, 425), fontFace=cv2.FONT_HERSHEY_COMPLEX,
        fontScale=0.67, color=(40, 230, 230), thickness=2
    )
    cv2.putText(
        frame, prediction_text, org=(20, 460), fontFace=cv2.FONT_HERSHEY_COMPLEX,
        fontScale=0.67, color=(40, 230, 230), thickness=2
    )
    
    # write edited image to video
    history_writer.write(frame)
    
    # return operation record
    return {
        'write': 'success',
        'prediction': prediction,
        'timestamp': timestamp,
        **metadata,
        **image_metadata,
        'label': label,
        'image_timestamp': image_timestamp_str,
    }

### Videos frames pipeline · Concatenate multiple videos

In [17]:
# '2023-02-14 11:45:00.333' in image_reference.index.to_frame()['image_timestamp'] # Error for code 1648

In [28]:
from datetime import datetime as dt

# Pipeline parameters

pre_sampled = False

process = history_write
folder = 'Dados/flood-video-collection'

LABEL_TYPE = 'IMAGE_LABEL'
MODEL_TYPE = 'LR' # HDA, GNB, LR

# Video parameters

fps = 15
overwrite = True

# Get current date as string
date = dt.now().date().isoformat()

# path to save labeled camera video history
full_history_video_path = f'Dados/Simulação/history-labeled-code{code}_{MODEL_TYPE}_{date}.mp4'

if pre_sampled:
    X = xx.copy()  # pre-sampled from loaded videos dataset
else:
    X = videos.copy()  # complete loaded videos dataset

X['path'] = f'{folder}/' + X['blob_name']
X = X.sort_index().reset_index() # IMPORTANT TO KEEP VIDEOS TIMESTAMP ORDER CORRECT · CHECK IF INDEX IS DATETIME INDEX

# OPEN VIDEO FILE WRITER
video = Video(fps=fps, overwrite=overwrite)
history_writer = video.writer(path=full_history_video_path)

# RUN FRAME PIPELINE
pipeline = VideoProcessor(frame_dimension=3, frame_processing_function=process)
results = pipeline.process_labeled_videos(videos_dataframe=X, path_key='path', print_each=1)

# CLOSE/RELEASE VIDEO FILE WRITER
history_writer.release(); cv2.destroyAllWindows()

print(f'\nVideos Selected: {len(X)}')
print(f'Frames Loaded: {len(results)}')
print()

LABELED VIDEOS PROCESSING · PROGRESS: 360/360

Videos Selected: 360
Frames Loaded: 11354



#### Evaluate result

In [29]:
frames_concat = pd.DataFrame(results)

#### Classification Model Real Conditions Simulation Report

from sklearn.metrics import classification_report

# Get frames labels and predictions
y_true = frames_concat['label']
y_pred = frames_concat['prediction']

display(frames_concat['write'].value_counts().to_frame('Video Frame Write Operation Success'))
print()

print(classification_report(y_true, y_pred))
# print(f'\n * average absolute difference: {round(np.abs(y_true - y_pred).mean(), 2)}')
print(f'\n * Accuracy: {round((y_true == y_pred).mean(), 2)}')
print(f'\n * Mislabeled data points: {(y_true != y_pred).sum()} / {len(y_true)}')

Unnamed: 0,Video Frame Write Operation Success
success,11354





ValueError: Classification metrics can't handle a mix of multiclass and continuous targets

#### Missing labels

In [None]:
frames_concat = pd.DataFrame(results)

frames_concat[frames_concat['label']=='']#[['blob_name', 'image_timestamp']].values