In [49]:
!pip install facenet_pytorch
!pip install transformers
!pip install deepface

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [50]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [51]:
import cv2
import torch
import numpy as np
import pandas as pd
from facenet_pytorch import MTCNN
from PIL import Image
from matplotlib import pyplot as plt
from tqdm.notebook import tqdm

from transformers import AutoFeatureExtractor, AutoModelForImageClassification

from deepface import DeepFace
from glob import glob

In [52]:
'cuda' if torch.cuda.is_available() else 'cpu'

'cpu'

In [53]:
# video_path =  '/content/drive/MyDrive/05_pytorch/HACKATON_emotions/titanic.mp4'
# video_path = '/content/drive/MyDrive/05_pytorch/HACKATON_emotions/project.mp4'
# video_path = '/content/drive/MyDrive/05_pytorch/HACKATON_emotions/sad.mp4'

In [60]:
class FaceDetector(object):
    """
    Face detector class
    """

    def __init__(self, mtcnn, video_path):
        self.v_cap = cv2.VideoCapture(video_path)
        self.v_len = int(self.v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
        self.mtcnn = mtcnn
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.extractor = AutoFeatureExtractor.from_pretrained("Rajaram1996/FacialEmoRecog")
        self.emotion_model = AutoModelForImageClassification.from_pretrained("Rajaram1996/FacialEmoRecog")
        self.emotion_model.eval()
        self.soft_max = torch.nn.Softmax(dim=0)
        self.faces_list = []
        self.face_image_limit = 70
        self.frames = []
        self.frame_nums = []


    def predict_emotion(self, image_num):

        inputs = self.extractor(self.faces_list[image_num][0], return_tensors="pt")

        with torch.no_grad():
            logits = self.emotion_model(**inputs).logits

        variances = self.soft_max(logits[0])

        # model predicts one of the 1000 ImageNet classes
        predicted_label = variances.argmax(-1).item()
        
        print(plt.imshow(self.faces_list[image_num][0]))
        print(f'\nResult {self.emotion_model.config.id2label[predicted_label]} {variances[predicted_label]*100:.4}%')

        for i in range(len(variances)):
            print(f'{self.emotion_model.config.id2label[i]} {variances[i]*100:.4}%')


    def predict_deepFace(self):
        '''Returns the most frequent emotion.'''
        emotions = []

        for img, frame_num in self.faces_list:
            emotions.append(DeepFace.analyze(img_path = np.array(img), actions = ['emotion'], enforce_detection=False)['dominant_emotion'])
        
        return max(set(emotions), key = emotions.count)


    def get_df(self):
        '''Returns dataframe from video.'''
        df = pd.DataFrame(columns=['photo', 'frame_num', 'person_id', 'angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise', 'dominant_emotion'])

        person_list = []
        compare_model = 'ArcFace'

        for img, frame_num in self.faces_list:
            new_record = DeepFace.analyze(img_path = np.array(img), actions = ['emotion'], enforce_detection=False)['emotion']
            new_record['dominant_emotion'] = DeepFace.analyze(img_path = np.array(img), actions = ['emotion'], enforce_detection=False)['dominant_emotion']
            new_record['photo'] = np.array(img)
            new_record['frame_num'] = frame_num
            
            if person_list:
                # if not empty person_list
                added_person = 0
                for idx, exist_person in enumerate(person_list):
                    # сравниваем img с людьми в person_list (true если одинаковый)
                    if DeepFace.verify(img1_path = np.array(exist_person), img2_path = np.array(img), enforce_detection=False, model_name=compare_model)['verified']:
                        # значит уже есть в базе - заносим img как уже существующего
                        new_record['person_id'] = idx # присваиваем id существующего 
                        df = df.append(new_record, ignore_index=True)
                        break
                    else:
                        added_person += 1
                if added_person == len(person_list):
                    # add another person into DB
                    person_list.append(img)
                    new_record['person_id'] = len(person_list) # присваиваем id только что созданного чела 
                    print('Уникальных людей:', len(person_list))
                    # значит еще в базе - заносим img как нового
                    df = df.append(new_record, ignore_index=True)
                    pass

            else:
                # add first if no persons
                person_list.append(img)
                # заносим в БД первого
                new_record['person_id'] = 0
                df = df.append(new_record, ignore_index=True)
                print('Уникальных людей:', len(person_list))

        return df


    def run(self):
        batch_size = 16

        for frame_num in tqdm(range(self.v_len)):
        # for frame_num in tqdm(range(300)):

            if frame_num % 6 == 0:
                # Load frame
                success, frame = self.v_cap.read()
                if not success:
                    continue
                    
                # Add to batch
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frame = Image.fromarray(frame)
                self.frames.append(frame)
                self.frame_nums.append(frame_num)

                
                # When batch is full, detect faces and reset batch list
                if len(self.frames) >= batch_size:
                    
                    try:
                        batch_boxes, _, batch_landmarks = self.mtcnn.detect(self.frames, landmarks=True)

                        for boxes, landmarks, frame, real_frame_number in zip(batch_boxes, batch_landmarks, self.frames, self.frame_nums):
                            
                            # several faces in one frame
                            for box, landmark in zip(boxes, landmarks):
                                face_image = frame.crop((int(box[0]), int(box[1]), int(box[2]), int(box[3])))

                                # take faces more than <face_image_limit> pixels in one of the dims
                                if face_image.size[0] > self.face_image_limit and face_image.size[1] > self.face_image_limit:
                                    self.faces_list.append((face_image, real_frame_number))
                        
                    except:
                        pass
                        
                    # обнуляем данные батча
                    self.frames = []
                    self.frame_nums = []

        self.v_cap.release()
        cv2.destroyAllWindows()

# Папка с видеофайлами

In [61]:
video_path = '/content/drive/MyDrive/05_pytorch/HACKATON_emotions/video_for_test/'

video_paths_list = glob(f'{video_path}*.mp4')

In [62]:
video_paths_list[0]

'/content/drive/MyDrive/05_pytorch/HACKATON_emotions/video_for_test/test_video4.mp4'

In [63]:
def get_emotions_by_videos(video_paths_list):

    df_emo = pd.DataFrame(columns=['filename', 'dominant_emotion'])
    for video_path in video_paths_list:
        try:
            mtcnn = MTCNN()
            fcd = FaceDetector(mtcnn, video_path)
            fcd.run()
            emotion = fcd.predict_deepFace()

            df_emo = df_emo.append({'filename': video_path.split('/')[-1], 'dominant_emotion': emotion}, ignore_index=True)

        except:
            pass

    return df_emo

In [64]:
df_emotions = get_emotions_by_videos(video_paths_list)

  0%|          | 0/960 [00:00<?, ?it/s]

  0%|          | 0/527 [00:00<?, ?it/s]

  0%|          | 0/240 [00:00<?, ?it/s]

  0%|          | 0/426 [00:00<?, ?it/s]

  0%|          | 0/330 [00:00<?, ?it/s]

  0%|          | 0/579 [00:00<?, ?it/s]

  0%|          | 0/265 [00:00<?, ?it/s]

  0%|          | 0/357 [00:00<?, ?it/s]

  0%|          | 0/360 [00:00<?, ?it/s]

In [None]:
df_emotions.to_csv('video_emotion.csv', index=False)

In [65]:
df_emotions

Unnamed: 0,filename,dominant_emotion
0,test_video4.mp4,sad
1,test_video8.mp4,neutral
2,test_video10.mp4,angry
3,test_video5.mp4,happy
4,test_video9.mp4,sad
5,test_video2.mp4,happy
6,test_video1.mp4,neutral
7,test_video3.mp4,neutral
8,test_video6.mp4,neutral


In [59]:
df_emotions

Unnamed: 0,filename,dominant_emotion
0,test_video4.mp4,sad
1,test_video8.mp4,neutral
2,test_video10.mp4,angry
3,test_video5.mp4,sad
4,test_video9.mp4,sad
5,test_video2.mp4,sad
6,test_video1.mp4,neutral
7,test_video3.mp4,neutral
8,test_video6.mp4,neutral


# Папка с фотками

In [66]:
photo_path = '/content/drive/MyDrive/05_pytorch/HACKATON_emotions/emotions_jpg/'

photo_paths_list = glob(f'{photo_path}*.jpg')

In [67]:
extractor = AutoFeatureExtractor.from_pretrained("Rajaram1996/FacialEmoRecog")
emotion_model = AutoModelForImageClassification.from_pretrained("Rajaram1996/FacialEmoRecog")

soft_max = torch.nn.Softmax(dim=0)
df_photo = pd.DataFrame(columns = ['photo_name', 'emotion'])

for photo_path in photo_paths_list:

    inputs = extractor(Image.open(photo_path), return_tensors="pt")

    with torch.no_grad():
        logits = emotion_model(**inputs).logits

    variances = soft_max(logits[0])
    predicted_label = variances.argmax(-1).item()

    real_emotion = photo_path.split('/')[-1].split('.')[0]
    emotion = emotion_model.config.id2label[predicted_label]
    photo_name = photo_path.split('/')[-1]

    df_photo = df_photo.append({'photo_name': photo_name,
                                'real_emotion': real_emotion,
                                'emotion': emotion}, ignore_index=True)

df_photo

Unnamed: 0,photo_name,emotion,real_emotion
0,sadness.jpg,surprise,sadness
1,contempt.jpg,contempt,contempt
2,fear.jpg,surprise,fear
3,anger.jpg,disgust,anger
4,surprise.jpg,surprise,surprise
5,disgust.jpg,surprise,disgust
6,neutral.jpg,surprise,neutral
7,happy.jpg,happy,happy


In [68]:
(df_photo.emotion == df_photo.real_emotion).sum() / len(df_photo.emotion)

0.375

In [None]:
# len(fcd.faces_list)

In [None]:
# df_ = fcd.get_df()

In [None]:
# df_

In [None]:
# pic_box = plt.figure(figsize=(25,100))

# for i, picture in enumerate(fcd.faces_list):
#     pic_box.add_subplot(len(fcd.faces_list)//5+2,5,i+1)
#     plt.imshow(picture[0])
#     plt.title(picture[0].size, x=1.5, y=0)

#     plt.axis('off')

# plt.show()

In [None]:
# df.to_csv('12345.csv', index=False)

In [None]:
# pic_box = plt.figure(figsize=(25,100))

# for i, picture in enumerate(person_list):
#     pic_box.add_subplot(len(person_list)//5+2,5,i+1)
#     plt.imshow(picture)
#     plt.title(picture.size, x=1.5, y=0)

#     plt.axis('off')

# plt.show()

In [None]:
# plt.imshow(Image.open(foto1))

In [None]:
# !unzip /content/drive/MyDrive/05_pytorch/HACKATON_emotions/fer2013.zip

In [None]:
# def test_model(images, labels):
#     pred_labels = []

#     for image_path in images:
#         pred_labels.append(DeepFace.analyze(img_path = image_path,
#                                             actions = ['emotion'],
#                                             enforce_detection=False)['dominant_emotion'])

#         if len(pred_labels) % 1000 == 0:
#             print(len(pred_labels), 'from 7200')

#     return pred_labels