#**Sentiment analysis**

In [None]:
import pandas as pd
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import ast
import nltk

# Télécharger les ressources nécessaires pour l'analyse de sentiment de NLTK
nltk.download('vader_lexicon')

# Charger les données depuis le fichier CSV (en utilisant un chemin d'accès relatif)
file_path = '/content/youtube_data_vf.csv'
df = pd.read_csv(file_path)

# Remplacer les valeurs NaN par des listes vides
df['comments'] = df['comments'].fillna('[]')

# Afficher les commentaires avant d'appliquer le modèle d'analyse de sentiment
print("Comments before sentiment analysis:")
print(df['comments'])

# Créer une instance de l'analyseur de sentiment
sid = SentimentIntensityAnalyzer()

# Fonction pour obtenir le sentiment pour chaque commentaire
def get_sentiment(comment):
    sentiment_score = sid.polarity_scores(comment)
    if sentiment_score['compound'] >= 0.05:
        return 'Positive'
    elif sentiment_score['compound'] <= -0.05:
        return 'Negative'
    else:
        return 'Neutral'

# Convertir les chaînes de caractères en listes de commentaires
df['comments'] = df['comments'].apply(ast.literal_eval)

# Appliquer l'analyse de sentiment sur chaque commentaire individuellement
df['sentiment'] = df['comments'].apply(lambda x: get_sentiment(x[0] if len(x) > 0 else ''))

# Afficher les résultats
print("\nSentiment analysis results:")
print(df[['comments', 'sentiment']])


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Comments before sentiment analysis:
0      ['Thanks for all the support over the years ga...
1      ['20-30 minutes a day 4 days a week. I believe...
2      ['❤❤❤', 'انا لست ربوت حتى استمر فقط في العمل و...
3      ['As an author myself, I\'ve found it difficul...
4      ['What was the most useful/interesting part of...
                             ...                        
908    ['The nostalgia', 'love it. the high pitched n...
909    ['Love ❤ Ali Abdaal', "watching Ali's videos a...
910    ['Wow ali ❤️', 'Ali 3rd viedeo', 'Where is ali...
911    ["Assalamualaikum sir with due respect I wants...
912    ['The beginnings of a productive legend ❤ even...
Name: comments, Length: 913, dtype: object

Sentiment analysis results:
                                              comments sentiment
0    [Thanks for all the support over the years gan...  Positive
1    [20-30 minutes a day 4 days a week. I believe ...  Positive
2    [❤❤❤, انا لست ربوت حتى استمر فقط في العمل وامض...   Neutral
3    

# **Comments Translation**

In [None]:
pip install googletrans==4.0.0-rc1

Collecting googletrans==4.0.0-rc1
  Downloading googletrans-4.0.0rc1.tar.gz (20 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting httpx==0.13.3 (from googletrans==4.0.0-rc1)
  Downloading httpx-0.13.3-py3-none-any.whl (55 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.1/55.1 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Collecting hstspreload (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading hstspreload-2024.5.1-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m22.5 MB/s[0m eta [36m0:00:00[0m
Collecting chardet==3.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading chardet-3.0.4-py2.py3-none-any.whl (133 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m133.4/133.4 kB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting idna==2.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading idna-2.10-py2.py3-none-any.whl (58 kB)
[2K     [90

In [None]:
from transformers import MarianMTModel, MarianTokenizer

# Charger le modèle et le tokenizer pour la traduction de l'anglais vers le français
model_name = "Helsinki-NLP/opus-mt-fr-en"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)

# Fonction pour traduire chaque commentaire en anglais
def translate_to_english(comments):
    translated_comments = []
    for comment in comments:
        inputs = tokenizer(comment, return_tensors="pt", padding=True, truncation=True)
        translated = model.generate(**inputs)
        translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
        translated_comments.append(translated_text)
    return translated_comments

# Charger les données et afficher les commentaires avant la traduction
print("Comments before translation:")
print(df['comments'])

# Appliquer la traduction sur chaque commentaire individuellement
df['translated_comments'] = df['comments'].apply(lambda x: translate_to_english(x) if isinstance(x, list) else [])

# Afficher les commentaires après la traduction
print("\nComments after translation:")
print(df['translated_comments'])


# **Summarize Videos**

In [None]:
!pip install -q transformers

In [None]:
!pip install youtube_transcript_api



In [None]:
# importing libraries
from transformers import pipeline
from youtube_transcript_api import YouTubeTranscriptApi

In [None]:
btc_video = "https://www.youtube.com/watch?v=5Rqiba5mqLk"

In [None]:
vid_id = btc_video.split("=")[1]

In [None]:
vid_id

'5Rqiba5mqLk'

In [None]:
YouTubeTranscriptApi.get_transcript(vid_id)


[{'text': 'being able to not get distracted is',
  'start': 0.04,
  'duration': 5.239},
 {'text': 'genuinely one of the most life-changing',
  'start': 2.639,
  'duration': 4.321},
 {'text': 'skills if we want to improve our lives',
  'start': 5.279,
  'duration': 3.201},
 {'text': 'in any way if we want to make progress',
  'start': 6.96,
  'duration': 3.16},
 {'text': 'on our studies or in our work or in our',
  'start': 8.48,
  'duration': 3.52},
 {'text': 'side hustle or even like being present',
  'start': 10.12,
  'duration': 4.24},
 {'text': 'with our families for goodness sake that',
  'start': 12.0,
  'duration': 4.199},
 {'text': 'all requires this magical ability called',
  'start': 14.36,
  'duration': 3.64},
 {'text': 'focus it requires the ability to focus',
  'start': 16.199,
  'duration': 4.0},
 {'text': 'on one thing and one thing only and to',
  'start': 18.0,
  'duration': 3.96},
 {'text': 'be fully present in just doing that one',
  'start': 20.199,
  'duration': 3.

In [None]:
transcript = YouTubeTranscriptApi.get_transcript(vid_id)


In [None]:
# to check if the video transcript has text or not
transcript[0:5]

[{'text': 'being able to not get distracted is',
  'start': 0.04,
  'duration': 5.239},
 {'text': 'genuinely one of the most life-changing',
  'start': 2.639,
  'duration': 4.321},
 {'text': 'skills if we want to improve our lives',
  'start': 5.279,
  'duration': 3.201},
 {'text': 'in any way if we want to make progress',
  'start': 6.96,
  'duration': 3.16},
 {'text': 'on our studies or in our work or in our',
  'start': 8.48,
  'duration': 3.52}]

In [None]:
# iterating throughout and adding all text together
result = ""
for i in transcript:
    result += ' ' + i['text']
#print(result)
print(len(result))

23817


In [None]:
result

" being able to not get distracted is genuinely one of the most life-changing skills if we want to improve our lives in any way if we want to make progress on our studies or in our work or in our side hustle or even like being present with our families for goodness sake that all requires this magical ability called focus it requires the ability to focus on one thing and one thing only and to be fully present in just doing that one bloody thing the thing is we are bombarded by distractions all the time we have our social media notifications pinging right left and Center we have messages and emails and whatsapps and text messages and it's like it's so easy to get distracted these days with all of the Endless Options that we have available to us and the reason why I think it's so incredibly important to cultivate the ability to focus and this is the thing that I've really realized over the years that the quality of my life is directly proportional to my ability to focus and I've had perio

### **Generating Summary**
### Facebook Bart Large CNN Model


In [None]:
summarizerfb = pipeline("summarization", model="facebook/bart-large-cnn")
#sumd_text = summarizerfb(result, max_length=130, min_length=30, do_sample=False)

In [None]:
# iterating in batches since max token length for Bart models is 1024, so we divide each batch here into token lengths of <1000
num_iters = int(len(result)/1000)

# summarizing on each batch and appending to final summary
summarized_text = []
summarized_text2 = []
for i in range(0, num_iters + 1):
  start = 0
  start = i * 1000
  end = (i + 1) * 1000
  print("input text \n" + result[start:end])
  out = summarizerfb(result[start:end], max_length=130, min_length=30, do_sample=False)
  out = out[0]
  out = out['summary_text']
  print("Summarized text\n"+out)
  summarized_text.append(out)
  summarized_text2 = ' '.join(summarized_text)

In [None]:
len(result)

In [None]:
result

" this is the best piece of advice that I got recently if you're new here hello my name is Ali I'm a doctor turned entrepreneur and I'm the author of feel-good productivity which is a book about how to be more productive but in a way that's actually enjoyable meaningful and sustainable and it's that there is no achievement that you can possibly achieve that will make you any happier than you are right now that's not to say don't go for goals and achievements because it gives a sense of progress and momentum and it's you know something to pass the time but it is to say that we want to focus on enjoying the journey along the way there's this thing in Psychology called the arrival fallacy which is the idea that we believe that oh when I get to Point X or Y or Zed then I'll be happy but actually there is no happiness to be found in those points the happiness at those points is fairly fleeting doesn't last very long and so focus on enjoying the journey while at the same time pursuing your g

In [None]:
len(str(summarized_text2))

297

In [None]:
str(summarized_text2)

'There is no achievement that you can possibly achieve that will make you any happier than you are right now. Focus on enjoying the journey while at the same time pursuing your goals.  als. als als in the United States. In the U.S. they are called Americans. In Europe, they are known as Americans.'

### **T5-Base Model**


In [None]:
# defining the model from Hugging Face Transformers
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

model_name = "t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

In [None]:
# Encoding & decoding in T5. T5 uses a max_length of 512 so we cut the article to 512 tokens.

num_iters = int(len(result)/512)
summarized_text = []
for i in range(0, num_iters + 1):
  start = 0
  start = i * 1000
  end = (i + 1) * 1000
  print("input text \n" + result[start:end])
  inp = tokenizer("summarize: " + result[start:end], return_tensors="pt", max_length=512, truncation=True)
  out = model.generate(inp["input_ids"], max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
  output = tokenizer.decode(out[0])
  summarized_text.append(output)

In [None]:
len(result)

23817

In [None]:
result

" being able to not get distracted is genuinely one of the most life-changing skills if we want to improve our lives in any way if we want to make progress on our studies or in our work or in our side hustle or even like being present with our families for goodness sake that all requires this magical ability called focus it requires the ability to focus on one thing and one thing only and to be fully present in just doing that one bloody thing the thing is we are bombarded by distractions all the time we have our social media notifications pinging right left and Center we have messages and emails and whatsapps and text messages and it's like it's so easy to get distracted these days with all of the Endless Options that we have available to us and the reason why I think it's so incredibly important to cultivate the ability to focus and this is the thing that I've really realized over the years that the quality of my life is directly proportional to my ability to focus and I've had perio

In [None]:
len(summarized_text)

20

In [None]:
summarized_text

["Being able to not get distracted is genuinely one of the most life-changing skills if we want to improve our lives in any way. It requires the ability to focus on one thing and one thing only and to be fully present in just doing that one bloody thing. It's so easy to get distracted these days with all of the Endless Options.",
 "There are a bunch of different techniques that we can learn to help us focus better. If we can just learn and apply those techniques it massively boosts the quality of our life. We're going to go over the five key mistakes that cause us to get distracted.",
 " distraction is when we are intending to do one thing and then we find ourselves doing something else. In order to actually know what the thing is you want to do you have to actually have a plan cuz if you don't have aPlan you can't be distracted.",
 '"I\'m Outsourcing the decision of how I\'m going to spend my time which is the single most valuable resource that I have" "I like the team behind YouTube 

### **Distilbart Model**

In [None]:
summarizer = pipeline("summarization")

No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [None]:
# iterating in batches
num_iters = int(len(result)/1000)

summarized_text = []
for i in range(0, num_iters + 1):
  start = 0
  start = i * 1000
  end = (i + 1) * 1000
  print("input text \n" + result[start:end])
  out = summarizer(result[start:end], min_length = 30, max_length = 100)
  out = out[0]
  out = out['summary_text']
  print("Summarized text\n"+out)
  summarized_text.append(out)

#print(summarized_text)

input text 
 being able to not get distracted is genuinely one of the most life-changing skills if we want to improve our lives in any way if we want to make progress on our studies or in our work or in our side hustle or even like being present with our families for goodness sake that all requires this magical ability called focus it requires the ability to focus on one thing and one thing only and to be fully present in just doing that one bloody thing the thing is we are bombarded by distractions all the time we have our social media notifications pinging right left and Center we have messages and emails and whatsapps and text messages and it's like it's so easy to get distracted these days with all of the Endless Options that we have available to us and the reason why I think it's so incredibly important to cultivate the ability to focus and this is the thing that I've really realized over the years that the quality of my life is directly proportional to my ability to focus and I'v

In [None]:
len(result)

23817

In [None]:
result

" being able to not get distracted is genuinely one of the most life-changing skills if we want to improve our lives in any way if we want to make progress on our studies or in our work or in our side hustle or even like being present with our families for goodness sake that all requires this magical ability called focus it requires the ability to focus on one thing and one thing only and to be fully present in just doing that one bloody thing the thing is we are bombarded by distractions all the time we have our social media notifications pinging right left and Center we have messages and emails and whatsapps and text messages and it's like it's so easy to get distracted these days with all of the Endless Options that we have available to us and the reason why I think it's so incredibly important to cultivate the ability to focus and this is the thing that I've really realized over the years that the quality of my life is directly proportional to my ability to focus and I've had perio

In [None]:
len(str(summarized_text))

5986

In [None]:
str(summarized_text)

'["Being able to not get distracted is genuinely one of the most life-changing skills if we want to improve our lives in any way. It requires the ability to focus on one thing and one thing only and to be fully present in just doing that one bloody thing. It\'s so easy to get distracted these days with all of the Endless Options.", "There are a bunch of different techniques that we can learn to help us focus better. If we can just learn and apply those techniques it massively boosts the quality of our life. We\'re going to go over the five key mistakes that cause us to get distracted.", " distraction is when we are intending to do one thing and then we find ourselves doing something else. In order to actually know what the thing is you want to do you have to actually have a plan cuz if you don\'t have aPlan you can\'t be distracted.", \'"I\\\'m Outsourcing the decision of how I\\\'m going to spend my time which is the single most valuable resource that I have" "I like the team behind Y

### **Text to speech**

In [None]:
!pip install datasets

Collecting datasets
  Downloading datasets-2.19.1-py3-none-any.whl (542 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: xxhash, dill, multiprocess, datasets
Successfully installed dataset

In [None]:
import torch


Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)
Collectin

In [None]:
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
from datasets import load_dataset
import torch
import soundfile as sf
from datasets import load_dataset

processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")

inputs = processor(text="Hello, how are you", return_tensors="pt")

# load xvector containing speaker's voice characteristics from a dataset
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)

speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)

sf.write("speech.wav", speech.numpy(), samplerate=16000)
