In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
#open the tweets
with open("french.txt", "r", encoding="utf-8") as files:
    lines = files.readlines()


for line in lines:
    print(line.strip())

Bonjour à tous ! J'espère que vous passez une merveilleuse journée ! ☀️ #Bonjour #BonneJournée
2. Ce soir, prévu de regarder un film bien au chaud sous la couette. 🍿🎥 #SoiréeFilm #Cocooning
3. Aujourd'hui est une journée gourmande ! Le gâteau au chocolat est divin ! 🍰🍫 #Gourmandise #GâteauAuChocolat
4. Le temps automnal me donne envie d'une promenade dans la forêt. 🍂🍁 #Automne #Promenade
5. J'adore les weekends ! Au programme : détente et plaisir. 🎉🛋️ #Weekend #Détente
6. Aujourd'hui, je retrouve enfin mes amis. J'ai tellement hâte ! 👭❤️ #Retrouvailles #Amis
7. Je suis si reconnaissant(e) pour ma famille. Ils sont toujours là pour moi. ❤️ #Reconnaissance #Famille
8. J'ai trouvé un nouveau livre à lire ! J'ai hâte de m'y plonger. 📚🤓 #NouveauLivre #Lecture
9. Ce matin, j'ai été témoin d'un magnifique lever de soleil. La nature est superbe ! 🌅🏞️ #LeverDeSoleil #Nature
10. Bienvenue aux nouveaux abonnés ! Merci de partager ce voyage avec moi. 🙏🌟 #NouveauxAbonnés #Merci
11. C'est l'heure de

In [3]:
#Create an empty list
french_list = []

#Iterate over the txt file
for line in lines:
    french_list.append(line)
    
#get into data frame
french_df = pd.DataFrame({"french_comments":french_list})

In [4]:
import re
# Function to remove numbers and punctuation using regular expressions
def remove_numbers_and_punctuation(text):
    # Replace all numbers and punctuation with an empty string
    cleaned_text = re.sub(r'[0-9!@#$%^&*(),.?":{}|<>]', '', text)
    return cleaned_text

# Apply the function to the 'Text' column in the DataFrame
french_df['Cleaned_Text'] = french_df["french_comments"].apply(remove_numbers_and_punctuation)

#removeemoji
import emoji
import unicodedata

# Function to remove emojis from text
def remove_emojis(text):
    # Replace emojis with their names using the demojize() function
    cleaned_text = emoji.demojize(text)
    return cleaned_text

# Apply the function to the 'Text' column in the DataFrame
french_df['Cleaned_Text'] = french_df['Cleaned_Text'].apply(remove_emojis)


In [5]:
#translate to english
from googletrans import Translator
import time

# Function to translate text from spanish to English with a retry mechanism
def translate_to_english(text, max_retry=5, sleep_duration=1.0):
    retry_count = 0
    translator = Translator()
    while retry_count < max_retry:
        try:
            translated_text = translator.translate(text, src='fr', dest='en').text
            return translated_text
        except Exception as e:
            print(f"Translation failed. Retrying... ({retry_count + 1}/{max_retry})")
            retry_count += 1
            time.sleep(sleep_duration)
    print("Translation failed after maximum retries.")
    return None

# Apply the translation function to the 'French_Text' column in the DataFrame
french_df['English_Text'] = french_df['Cleaned_Text'].apply(translate_to_english)

french_df.drop(columns=["french_comments", "Cleaned_Text"], inplace=True)
french_df

Unnamed: 0,English_Text
0,Hello everyone I hope you have a wonderful day...
1,Tonight planned to watch a warm movie under th...
2,Today is a gourmet day The chocolate cake is d...
3,Fall time makes me want a walk in the forest: ...
4,I love weekends on the relaxation and pleasure...
5,Today I finally find my friends I can't wait: ...
6,I am so grateful to my family they are still t...
7,I found a new book to read I can't wait to imm...
8,This morning I witnessed a magnificent sunrise...
9,Welcome to the new subscribers thank you for s...


In [6]:
# Download the stopwords for English from NLTK
import nltk
nltk.download('stopwords')
nltk.download('punkt')

# to remove english stopwords
!pip install textblob
from textblob import TextBlob
from nltk.corpus import stopwords # get stopwords from NLTK library
from nltk.tokenize import word_tokenize # to create word tokens
from nltk.stem import WordNetLemmatizer # to reduce words to orginal form
from nltk.corpus import words # Get all words in english language

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\johns\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\johns\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!




In [7]:
#sentiment analysis
# Function to perform sentiment analysis using TextBlob
def get_sentiment(text):
    if text is not None:  # Check if the value is not None
        analysis = TextBlob(text)
        polarity = analysis.sentiment.polarity
        if polarity > 0:
            return 'Positive'
        elif polarity < 0:
            return 'Negative'
        else:
            return 'Neutral'
    return 'Neutral'  # Return 'Neutral' for None values


# Apply the function to the 'Text' column in the DataFrame
french_df['Sentiment'] = french_df['English_Text'].apply(get_sentiment)

# Create a new DataFrame to store the results
french_df = pd.DataFrame(french_df, columns=['English_Text', 'Sentiment'])

# See quick results of the Sentiment Analysis in a table format
print(french_df)


                                         English_Text Sentiment
0   Hello everyone I hope you have a wonderful day...  Positive
1   Tonight planned to watch a warm movie under th...  Positive
2   Today is a gourmet day The chocolate cake is d...   Neutral
3   Fall time makes me want a walk in the forest: ...   Neutral
4   I love weekends on the relaxation and pleasure...  Positive
5   Today I finally find my friends I can't wait: ...   Neutral
6   I am so grateful to my family they are still t...   Neutral
7   I found a new book to read I can't wait to imm...  Positive
8   This morning I witnessed a magnificent sunrise...  Positive
9   Welcome to the new subscribers thank you for s...  Positive
10  It's time to put music and store the house for...   Neutral
11  Thank you all for the birthday wishes I am tou...  Positive
12  The fall evening is so beautiful I will take a...  Positive
13  Today is a day of relaxation I will savor a go...  Positive
14  Sunday morning quiet in bed I will s

In [8]:
# See quick results of the Sentiment Analysis
frenchsentiment_stored = french_df['Sentiment'].value_counts()
french_df['Sentiment'].value_counts()

Sentiment
Positive    20
Neutral     10
Name: count, dtype: int64

In [9]:
#performing back translation and sentiment analysis

#drop the sentiment column in the german_df dataframe
french_df.drop(columns=["Sentiment"], inplace=True)

In [10]:
#translate back into french and perform sentiment analysis

# Function to translate text from english to french with a retry mechanism
def translate_to_french(text, max_retry=5, sleep_duration=1.0):
    retry_count = 0
    translator = Translator()
    while retry_count < max_retry:
        try:
            translated_text = translator.translate(text, src='en', dest='fr').text
            return translated_text
        except Exception as e:
            print(f"Translation failed. Retrying... ({retry_count + 1}/{max_retry})")
            retry_count += 1
            time.sleep(sleep_duration)
    print("Translation failed after maximum retries.")
    return None
# Apply the translation function to the 'french_Tweet' column in the DataFrame
french_df['French_Translation'] = french_df['English_Text'].apply(translate_to_french)


# drop DataFrame with English translations
french_df.drop(columns=["English_Text"], inplace=True)
french_df

Unnamed: 0,French_Translation
0,"Bonjour à tous, j'espère que vous passez une m..."
1,Ce soir prévu de regarder un film chaud sous l...
2,Aujourd'hui est une journée gastronomique le g...
3,Le temps d'automne me donne envie d'une promen...
4,J'adore les week-ends sur le programme de déte...
5,"Aujourd'hui, je trouve enfin mes amis, je ne p..."
6,Je suis tellement reconnaissant à ma famille q...
7,"J'ai trouvé un nouveau livre à lire, j'ai hâte..."
8,"Ce matin, j'ai été témoin d'un magnifique leve..."
9,Bienvenue chez les nouveaux abonnés merci d'av...


In [11]:
#to perform sentiment analysis
!pip install transformers
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline





  from .autonotebook import tqdm as notebook_tqdm


In [12]:
# Load the tokenizer and model for French sentiment analysis
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)


In [13]:
# Create a sentiment analysis pipeline using the loaded model
sentiment_analyzer = pipeline(task="sentiment-analysis", model=model, tokenizer=tokenizer)

# convert dataframe to list and filter out any none values
french_tweets = [text for text in french_df['French_Translation'].tolist() if text is not None]

# Perform sentiment analysis on the French tweets
results = sentiment_analyzer(french_tweets)

# Create a list to store the results in a table format
sentiment_table = []

# Set the threshold for considering a sentiment as positive or negative
threshold = 0.3

# Process the results and store them in the table
for result in results:
    score = result["score"]
    sentiment = "Positive" if score > threshold else "Negative"
    sentiment_table.append({"Sentiment": sentiment, "Score": score})

# Convert the list to a pandas DataFrame
df = pd.DataFrame(sentiment_table)

# Print the DataFrame (table)
print(df)

    PyTorch 2.0.1+cu118 with CUDA 1108 (you have 2.0.1+cpu)
    Python  3.11.3 (you have 3.11.4)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
  Set XFORMERS_MORE_DETAILS=1 for more details


   Sentiment     Score
0   Positive  0.540816
1   Positive  0.307856
2   Positive  0.313858
3   Positive  0.386552
4   Positive  0.465478
5   Positive  0.431574
6   Positive  0.763481
7   Positive  0.314949
8   Positive  0.744787
9   Positive  0.583507
10  Positive  0.456393
11  Positive  0.664719
12  Positive  0.372038
13  Positive  0.352988
14  Positive  0.320223
15  Positive  0.343338
16  Positive  0.323851
17  Negative  0.269405
18  Negative  0.252331
19  Positive  0.414432
20  Positive  0.490786
21  Positive  0.379723
22  Positive  0.329717
23  Positive  0.452521
24  Positive  0.373027
25  Negative  0.286848
26  Positive  0.431570
27  Positive  0.360648
28  Positive  0.365383
29  Positive  0.400756


In [14]:
print(frenchsentiment_stored)

Sentiment
Positive    20
Neutral     10
Name: count, dtype: int64


In [15]:
# Group by 'Sentiment_Group' and calculate the count of each group
summary_df = df.groupby('Sentiment').size().reset_index(name='Count')

# Display the summary DataFrame
print(summary_df)

  Sentiment  Count
0  Negative      3
1  Positive     27
