## STT(Speach to text)

In [10]:
!pip3 install whisper-timestamped
!pip install torch
!pip install moviepy

Collecting whisper-timestamped
  Using cached whisper_timestamped-1.15.4-py3-none-any.whl.metadata (1.2 kB)
Collecting Cython (from whisper-timestamped)
  Using cached Cython-3.0.11-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting dtw-python (from whisper-timestamped)
  Downloading dtw_python-1.5.3-cp311-cp311-macosx_11_0_arm64.whl.metadata (48 kB)
Collecting openai-whisper (from whisper-timestamped)
  Downloading openai-whisper-20240927.tar.gz (800 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.0/800.0 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m[31m1.4 MB/s[0m eta [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting numba (from openai-whisper->whisper-timestamped)
  Downloading numba-0.60.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (2.7 kB)
Collecting more-itertools (from openai-whisper->whisper-time

In [11]:
import whisper_timestamped as whisper     # For convert to text, STT (Speach to text)
import torch                              # For whisper
from moviepy.editor import VideoFileClip  # For convert MP4 to MP3
import uuid                               # For generate id to user video data
import pickle                             # Save user video data on file
import os                                 # Work with file

Importing the dtw module. When using in academic works please cite:
  T. Giorgino. Computing and Visualizing Dynamic Time Warping Alignments in R: The dtw Package.
  J. Stat. Soft., doi:10.18637/jss.v031.i07.



In [12]:
class STT:
    def __init__(self, modelType="base"):
        self.modelType = modelType
        self.batch_size = 16 # reduce if low on GPU mem
        self.divice = "cuda" if torch.cuda.is_available() else "cpu"
        self.compute_type = "float16" if torch.cuda.is_available() else "int8"
        self.model = whisper.load_model(self.modelType, device=self.divice)

    def convertMP3ToText(self, pathToMP3: str) -> (str, str):
        audio = whisper.load_audio(pathToMP3)
        result = whisper.transcribe(self.model, audio, language="ru")

        text: str = result["text"]
        word_data: list[dict] = list()
        for segment in result["segments"]:
            for word in segment["words"]:
                word_data.append({
                    "text": word["text"],
                    "startTime": word["start"] * 1000,
                    "endTime": word["end"] * 1000
                })
        return text, word_data

    def saveData(self, text: str, word_data: str) -> str:
        absPath = os.path.abspath("")
        
        # Saving data
        saveData = {
            "text": text,
            "word_data": word_data
        }
        
        # Setting save directory
        pathDict = f"{absPath}/user"
        if not os.path.exists(pathDict):
            os.makedirs(pathDict)

        # Create id user video
        _id = uuid.uuid4().hex
        while os.path.isfile(f"{pathDict}/{_id}.pickle"):
            _id = uuid.uuid4().hex
    
        # Save data in pickle
        with open(f"{pathDict}/{_id}.pickle", 'wb') as file:
            pickle.dump(saveData, file, protocol=pickle.HIGHEST_PROTOCOL)
        return _id

    def loadData(self, _id: str) -> (str, list[dict]):
        absPath = os.path.abspath("")
        pathDict = f"{absPath}/user"
        
        # Check created file 
        if not os.path.isfile(f"{pathDict}/{_id}.pickle"):
            print("File does not exist")
            return "", list()

        # Open file
        with open(f"{pathDict}/{_id}.pickle", 'rb') as file:
            data = pickle.load(file)

        return data["text"], data["word_data"]

    def convertMP3(self, pathToMP3: str) -> str:
        text, word_data = self.convertMP3ToText(pathToMP3)
        _id = self.saveData(text, word_data)
        return _id

    def convertMP4(self, pahtToMP4: str) -> str:
        absPath = os.path.abspath("")
        pathTempMP3 = f"{absPath}/temp/mp3"
        fileName = os.path.splitext(os.path.basename(pahtToMP4))[0]
        tempFilePath = f"{pathTempMP3}/{fileName}.mp3"

        # Setting temp directory
        if not os.path.exists(pathTempMP3):
            os.makedirs(pathTempMP3)
        
        # Convert MP4 to MP3
        video = VideoFileClip(pahtToMP4)
        video.audio.write_audiofile(tempFilePath, logger=None)
        
        # Convert MP3 to data text
        _id = self.convertMP3(tempFilePath)

        # Delete temp file MP3
        if os.path.exists(tempFilePath):
            os.remove(tempFilePath)
        
        return _id

In [94]:
converter = STT("medium")

In [95]:
test_audio = "/Users/odner/GitHub/hack20_09_27/viral_video/test_audio/test.mp3"

In [96]:
test_video = "/Users/odner/GitHub/hack20_09_27/viral_video/test/1c6bc481dd52a9938e78e755f1e5c90e.mp4"

In [97]:
converter.convertMP3(test_audio)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 874/874 [00:04<00:00, 188.74frames/s]


'11680960e1504d1cac0e07c2d70ec9cf'

In [98]:
converter.loadData("11680960e1504d1cac0e07c2d70ec9cf")

(' Привет, как дела? Hello, Bonjour',
 [{'text': 'Привет,', 'startTime': 1660.0, 'endTime': 2180.0},
  {'text': 'как', 'startTime': 3040.0, 'endTime': 3280.0},
  {'text': 'дела?', 'startTime': 3280.0, 'endTime': 3680.0},
  {'text': 'Hello,', 'startTime': 4740.0, 'endTime': 5340.0},
  {'text': 'Bonjour', 'startTime': 7240.0, 'endTime': 7700.0}])

In [99]:
converter.convertMP4(test_video)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

'dba07d847fda464e87204b3624989304'

In [31]:
text, wordData = converter.loadData("e73879291c38478fa7747b375782cdf6")

In [33]:
del converter

## Класификация тональности

In [100]:
import numpy as np
from transformers import pipeline

In [138]:
class SentimentAnalysis:
    def __init__(self):
        self.classifier = pipeline('sentiment-analysis', model="cointegrated/rubert-tiny-sentiment-balanced")

    def _sentimentText(self, text: str) -> float:
        result = self.classifier(text)[0]
        # Преобразование результатов в числовую шкалу
        if result['label'] == 'positive':
            return result['score']  # Позитивные эмоции — от 0 до 1
        elif result['label'] == 'negative':
            return -result['score']  # Негативные эмоции — от 0 до -1
        else:
            return 0.0  # Нейтральный тон = 0

    def sentimentWordData(self, wordData: list[dict], lenWords: int = 6) -> list[dict]:
        # Data scope with sentiment for ever word
        dataScope = np.zeros(len(wordData) + lenWords * 2)
        # Create zero value form start and end array
        zeroWordData = [{"text": ""} for idx in range(lenWords)]
        # New word data 
        wordData = zeroWordData + wordData + zeroWordData
    
        for idx in range(len(wordData) - lenWords):
            # Create text to sentiment score
            text = "".join([word["text"] for word in wordData[idx:idx+lenWords]])
            sentimentScore = self._sentimentText(text)
    
            # Add score
            dataScope[idx:idx+lenWords] += sentimentScore
        
        dataScope = dataScope / lenWords

        for idx in range(len(dataScope)):
            wordData[idx]["sentiment_analysis"] = dataScope[idx]
        
        return wordData[lenWords:(len(wordData) - lenWords)]

In [139]:
sentimet_analysis = SentimentAnalysis()

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [140]:
len(wordData)

1666

In [142]:
sentimet_analysis.sentimentWordData(wordData, 4)

[{'text': 'Наташа,',
  'startTime': 140.0,
  'endTime': 420.0,
  'sentiment_analysis': 0.0},
 {'text': 'ты',
  'startTime': 480.0,
  'endTime': 560.0,
  'sentiment_analysis': 0.0},
 {'text': 'эту',
  'startTime': 560.0,
  'endTime': 640.0,
  'sentiment_analysis': 0.0},
 {'text': 'кухню',
  'startTime': 640.0,
  'endTime': 900.0,
  'sentiment_analysis': 0.0},
 {'text': 'видела?',
  'startTime': 900.0,
  'endTime': 1220.0,
  'sentiment_analysis': 0.0},
 {'text': 'Это',
  'startTime': 1420.0,
  'endTime': 1620.0,
  'sentiment_analysis': 0.12090060114860535},
 {'text': '90.',
  'startTime': 1620.0,
  'endTime': 1960.0,
  'sentiment_analysis': 0.12090060114860535},
 {'text': 'Я',
  'startTime': 2460.0,
  'endTime': 2520.0,
  'sentiment_analysis': 0.2640693783760071},
 {'text': 'удивляюсь,',
  'startTime': 2520.0,
  'endTime': 2860.0,
  'sentiment_analysis': 0.45377182960510254},
 {'text': 'как',
  'startTime': 2960.0,
  'endTime': 2980.0,
  'sentiment_analysis': 0.3328712284564972},
 {'text