# Extraction des features vidéo

### Import des librairies

In [None]:
! pip install imageio
! pip install opencv-python


%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.image import imread, imsave
import cv2
from tqdm import tqdm
import os
import glob
import shutil
from sklearn import svm, grid_search, datasets
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from random import shuffle
import imageio
from sklearn.decomposition import PCA
REP = 'data/images_10'

### Fonction pour visualiser les images

In [None]:
def show_images(*args, col=3):
    """
        Plot image(s)
        
        Take as param: list, str (for a folder's path) or np.ndarray.
    """
    for arg in args:
        if isinstance(arg, list):
            images = arg
            rows = len(images) // col + 1
            fig = plt.figure(figsize=(col*8, rows*6))
            for i, image in enumerate(images):
                try:
                    fig.add_subplot(rows, col, i+1)
                    plt.imshow(image)
                    plt.grid(False)
                    plt.axis('off')
                    plt.title(i)
                except:
                    pass
        elif isinstance(arg, str):
            folder = arg
            paths = sorted(glob.glob(f"{folder}/*.jpg"))
            if not paths:
                print(f"The folder '{folder}' does not contain any JPG image.")
            else:
                rows = len(paths) // col + 1
                fig = plt.figure(figsize=(col*8, rows*6))
                for i, path in enumerate(paths):
                    try:
                        fig.add_subplot(rows, col, i+1)
                        plt.imshow(imread(path))
                        plt.grid(False)
                        plt.axis('off')
                        plt.title(i)
                    except:
                        pass
        elif isinstance(arg, np.ndarray):
            image = arg
            plt.figure(figsize=(8, 6))
            plt.imshow(image)
            plt.grid(False)
            plt.axis('off')
        else:
            print("Invalid type of argument (must be 'list', 'str' or 'np.ndarray')")
    plt.savefig("img/grille_images.pdf")
    plt.savefig("img/grille_images.png", dpi=100)
    plt.show()
    
show_images("data/image_10/SEQ_001_VIDEO")

### Transformer video en images

In [0]:
def video_to_frames(videopath):
    frames = []
    vidcap = cv2.VideoCapture(videopath)
    framerate = int(vidcap.get(5))
    name = os.path.splitext(os.path.basename(videopath))[0]
    success, frame = vidcap.read()
    frame_number = 0
    while success:
        if frame_number % framerate == 0:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # convertion to RGB
            frames.append(frame)
        success, frame = vidcap.read() 
        frame_number += 1
    return frames, framerate, name


def seq_to_3_frames(images):
    duration = len(images)
    tiers_values = [duration//4, duration//2, 3*duration//4]
    frames = [images[tiers] for tiers in tiers_values]
    return frames, duration

# show_images(f, col=3)

### Parse all videos to extract frames


In [0]:
def build_image_folder(start=None, end=None):
    for videopath in ProgressBar((sorted(glob.glob("data/video/*.mp4")))[start:end]):
        frames_per_sec, framerate, name = video_to_frames(videopath)
        folder = f"data/image_sec/{name}"
        os.makedirs(folder, exist_ok=True)
        for i, frame in enumerate(frames_per_sec):
            imsave(f"{folder}/frame_{i:03}.jpg", frame)
            
        frames_3, duration = seq_to_3_frames(frames_per_sec)
        folder = f"data/image_3/{name}"
        os.makedirs(folder, exist_ok=True)
        for i, frame in enumerate(frames_3):
            imsave(f"{folder}/frame_{i}.jpg", frame)

In [0]:
def folder_to_list(folder):
    paths = sorted(glob.glob(f"{folder}/*.jpg"))
    if paths:
        frames = []
        for path in paths:
            frames.append(imread(path))        
    else:
        print(f"The folder '{folder}' does not contain any JPG image.")
    return frames

def folder_to_list_grey(folder):
    paths = sorted(glob.glob(f"{folder}/*.jpg"))
    if paths:
        frames = []
        for path in paths:
            frames.append(imread(path, 0))     
    else:
        print(f"The folder '{folder}' does not contain any JPG image.")
    return frames

### Récupérer 10 images par séquence

In [0]:
def seq_to_10_frames(images):
    duration = len(images)
    tiers_values = []
    for i in range(1,11):
        tiers_values.append(i*duration//11)
#         tiers_values = [duration//201, 2*duration//11, 3*duration//11, 4*duration//11, 5*duration//11, 6*duration//11,
#                     7*duration//11, 8*duration//11, 9*duration//11, 10*duration//11]
    frames = [images[tiers] for tiers in tiers_values]
    return frames, duration

In [0]:
def build_image_folder_10(start=None, end=None):
    for videopath in tqdm((sorted(glob.glob("data/video/*.mp4")))[start:end]):
        frames_per_sec, framerate, name = video_to_frames(videopath)
        #folder = f"data/image_sec/{name}"
        #os.makedirs(folder, exist_ok=True)
        #for i, frame in enumerate(frames_per_sec):
        #    imsave(f"{folder}/frame_{i:03}.jpg", frame)
            
        frames_10, duration = seq_to_10_frames(frames_per_sec)
        folder = f"data/image_10/{name}"
        os.makedirs(folder, exist_ok=True)
        for i, frame in enumerate(frames_10):
            imageio.imwrite(f"{folder}/frame_{i:03}.jpg", frame)

## Calcul descripteurs

### Calcul des histogrammes de couleurs

In [0]:
# # def quantification(img, nbits = 2):
# #     num = 0
# #     for i in range(nbits):
# #         num += 128 / (2**i)  # on determine la valeur correspondant à la quantification
# #     Rouge = np.bitwise_and(img[:,:,0], int(num))  # en fonction du nombre de bits choisits
# #     Vert = np.bitwise_and(img[:,:,1], int(num))
# #     Bleu = np.bitwise_and(img[:,:,2], int(num))
# #     Rouge = np.floor(Rouge / (2**(8-3*nbits)))
# #     Vert = np.floor(Vert / (2**(8-2*nbits)))
# #     Bleu = np.floor(Bleu / (2**(8-nbits)))
# #     return Rouge + Vert + Bleu

# def histogramme(img):
#     M = img.shape[0]
#     N = img.shape[1]
#     list_histo = []
#     val =1/(M*N)
#     for color in range(3):
#         histo = np.zeros(256)    
#         for i in range(M):
#             for j in range(N):
#                 histo[int(img[i,j, color])] += val
#         list_histo.append(histo)
#     return list_histo
        
# def dist_Manhattan(hist1, hist2):
#     return sum(np.abs(np.array(hist1) - np.array(hist2)))

In [None]:
def process_histo(start=None, end=None):
    dic = {}
    for name in tqdm(sorted(glob.glob("data/image_10/*"))[start:end]):
        images_10 = folder_to_list(name)
        features = []
        for j, frame in enumerate(images_10):
            couleur = ('b','g','r')
            plt.figure(figsize=(8, 6))
            plt.imshow(frame)
            plt.grid(False)
            plt.axis('off')
            plt.title(str(j))
            plt.show()
            imsave(f"img/{j:02}.jpg", frame)
            for i in [0,1,2]:
                histo = cv2.calcHist([frame], [i], None, [256], [0, 256])
                plt.plot(histo, color=couleur[i])
                plt.xlabel('Intensité (RGB)')
                plt.ylabel("Nombre d'occurrence")
                for h in histo:
                    features.append(int(h))
            plt.savefig(f'img/histo_couleur_{i:02}-{j:02}.pdf',bbox_inches='tight')
            plt.savefig(f'img/histo_couleur_{i:02}-{j:02}.png',bbox_inches='tight')
            plt.show()
        dic[name[14:]] = features
    return pd.DataFrame.from_dict(dic, orient="index")

In [0]:
pd.read_csv("df_histo.csv", sep="§")

In [None]:
def process_cuts(start=None, end=None, seuil=700, coef=0.6):
    dic = {}
    for videopath in tqdm((sorted(glob.glob("data/video/*.mp4")))[start:end]):
        frames = []
        features = []
        cap = cv2.VideoCapture(videopath)
        success, frame = cap.read()   # frame is a valid image if and only if success is true
        list_histo = []
        i = 0
        rate = int(cap.get(cv2.CAP_PROP_FPS))

        while success:
            frames.append(frame)
            histo = [cv2.calcHist(frame, [color], None, [256], [0, 256]) for color in [0, 1, 2]]
            histo = np.mean(histo, axis=0).reshape(len(histo[0]))
            list_histo.append(histo)
            i += 1
            success, frame = cap.read()
        cut = 0
        distance = []
        max_cut = 0
        for i in range(len(list_histo) - 1):
            distance.append(sum(np.abs(np.array(list_histo[i]) - np.array(list_histo[i+1]))))
#         plt.plot(distance)
#         plt.ylabel('Distance')
#         plt.xlabel("Numéro d'image")
#         plt.savefig('img/distance.pdf',bbox_inches='tight')
#         plt.savefig('img/distance.png',bbox_inches='tight')
#         plt.show()
        for i in range(5, len(distance) - 5):
            if distance[i]>seuil and distance[i] > coef*max(distance):
                cut+=1
#                 imsave(f"img/dist{i-1:05}.jpg", cv2.cvtColor(frames[i-1], cv2.COLOR_BGR2RGB))
#                 imsave(f"img/dist{i:05}.jpg", cv2.cvtColor(frames[i], cv2.COLOR_BGR2RGB))
#                 imsave(f"img/dist{i+1:05}.jpg", cv2.cvtColor(frames[i+1], cv2.COLOR_BGR2RGB))
#                 imsave(f"img/dist{i+2:05}.jpg", cv2.cvtColor(frames[i+2], cv2.COLOR_BGR2RGB))
        print(cut)
        
        moy = (len(distance)/(cut+1))/rate
        
        features = [cut,moy]
        
        dic[videopath[11:]] = features
    df = pd.DataFrame.from_dict(dic, orient="index")
    df.columns=['nb_cuts','moy_seq']
    return df

### Calcul de la quantité de mouvement

In [None]:
import cv2
import numpy as np
from tqdm import tqdm


# def optical_flow_smart(videopath):
#     cam = cv2.VideoCapture(videopath)
#     ret, img = cam.read()
#     prevgray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#     res = []
#     while ret:
#         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#         flow = cv2.calcOpticalFlowFarneback(prevgray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
#         prevgray = gray
#         res.append(np.sum(flow))
#         ret, img = cam.read()
#     return np.sum(res)

def quant(img, flow, step=16):
    h, w = img.shape[:2]
    y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1).astype(int)
    fx, fy = flow[y,x].T
    lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2)
    lines = np.int32(lines + 0.5)
    tot = []
    for (x1, y1), (x2, y2) in lines:
        #calcul de la distance euclidienne entre les points x et y
        tot.append(np.sqrt((x2 - x1)**2 + (y2 - y1)**2))
    return np.sum(np.abs(np.abs(tot) - np.mean(np.abs(tot))))


def optical_flow_smart(videopath):
    frames = folder_to_list(videopath)
    res = []
    paths = sorted(glob.glob(f"{videopath}/*.jpg"))
    prevgray = cv2.imread(paths[0], 0)
    for path in paths[1:]:
        distance = 0
        gray = cv2.imread(path, 0)
        flow = cv2.calcOpticalFlowFarneback(prevgray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
        q = quant(gray, flow)
        res.append(q)
    return res

def process_momentum(start=None, end=None):
    dic = {}
    for name in tqdm(sorted(glob.glob("data/image_200/*"))[start:end]):
#         images_200 = folder_to_list(name)
        features = []
        for path in glob.glob(name):
            res = optical_flow_smart(path)
#             plt.plot(res)
#             plt.title(path)
#             plt.show()
            features = []
            for i in range(len(res)):
                features.append(res[i])
        dic[name[15:]] = features
    return pd.DataFrame.from_dict(dic, orient="index")


# prompt histograms
# for path in tqdm((sorted(glob.glob("data/image_sec/*")))):
#     plot data
#     plt.plot(optical_flow_smart(path))
#     plt.title(path)
#     plt.show()
    
df_momentum = process_momentum()
df_momentum.to_csv("df_momentum.csv", sep="§")