In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [2]:
#read txt file into notebook
with open("spanish.txt", "r", encoding="utf-8") as files:
    lines = files.readlines()

for line in lines:
    print(line.strip())

1. ¡Buenos días a todos! ¡Espero que tengan un día maravilloso! ☀️ #BuenosDías #FelizDía
2. Esta noche, planifico una velada de películas acogedora bajo la manta. 🍿🎥 #NocheDePelículas #Acogedor
3. ¡Hoy es un día para disfrutar de los placeres! ¡La tarta de chocolate es divina! 🍰🍫 #DíaDeDisfrute #TartaDeChocolate
4. El clima otoñal invita a dar un paseo por el bosque. 🍂🍁 #Otoño #PaseoPorElBosque
5. ¡Me encantan los fines de semana! Relax y diversión en la agenda. 🎉🛋️ #FinDeSemana #Relax
6. Hoy por fin voy a reencontrarme con mis amigos. ¡Estoy emocionado(a)! 👭❤️ #Amigos #Reencuentro
7. Estoy tan agradecido(a) por mi familia. Siempre están ahí para mí. ❤️ #Agradecido #Familia
8. ¡Encontré un nuevo libro para leer! No puedo esperar a sumergirme en él. 📚🤓 #NuevoLibro #Lectura
9. Esta mañana presencié un hermoso amanecer. ¡La naturaleza es grandiosa! 🌅🏞️ #Amanecer #Naturaleza
10. ¡Bienvenidos a mis nuevos seguidores! Gracias por compartir este viaje conmigo. 🙏🌟 #NuevosSeguidores #Gracias
11

In [3]:
#Create an empty list
spanish_list = []

#Iterate over the txt file
for line in lines:
    spanish_list.append(line)
    
#get into data frame
spanish_df = pd.DataFrame({"spanish_texts":spanish_list})

In [4]:
import re
# Function to remove numbers and punctuation using regular expressions
def remove_numbers_and_punctuation(text):
    # Replace all numbers and punctuation with an empty string
    cleaned_text = re.sub(r'[0-9!@#$%^&*(),.?":{}|<>]', '', text)
    return cleaned_text

# Apply the function to the 'Text' column in the DataFrame
spanish_df['Cleaned_Text'] = spanish_df["spanish_texts"].apply(remove_numbers_and_punctuation)

#removeemoji
import emoji
import unicodedata

# Function to remove emojis from text
def remove_emojis(text):
    # Replace emojis with their names using the demojize() function
    cleaned_text = emoji.demojize(text)
    return cleaned_text

# Apply the function to the 'Text' column in the DataFrame
spanish_df['Cleaned_Text'] = spanish_df['Cleaned_Text'].apply(remove_emojis)


In [5]:
#translate spanish texts to english
from googletrans import Translator
import time

# Function to translate text from spanish to English with a retry mechanism
def translate_to_english(text, max_retry=5, sleep_duration=1.0):
    retry_count = 0
    translator = Translator()
    while retry_count < max_retry:
        try:
            translated_text = translator.translate(text, src='es', dest='en').text
            return translated_text
        except Exception as e:
            print(f"Translation failed. Retrying... ({retry_count + 1}/{max_retry})")
            retry_count += 1
            time.sleep(sleep_duration)
    print("Translation failed after maximum retries.")
    return None

# Apply the translation function to the 'Spanish_Text' column in the DataFrame
spanish_df['English_Text'] = spanish_df['Cleaned_Text'].apply(translate_to_english)

spanish_df.drop(columns=["spanish_texts", "Cleaned_Text"], inplace=True)
spanish_df

Unnamed: 0,English_Text
0,Good morning to all I hope you have a wonderfu...
1,Tonight I plan a welcoming movies under the bl...
2,Today is a day to enjoy the pleasures The choc...
3,The autumnal climate invites you to take a wal...
4,I love relax weekends and fun on the agenda: P...
5,"Today I will finally meet with my friends, I'm..."
6,I am so grateful for my family are always ther...
7,I found a new book to read I can't wait to imm...
8,This morning I witnessed a beautiful dawn. Nat...
9,Welcome to my new followers thank you for shar...


In [6]:
# Download the stopwords for English from NLTK
import nltk
nltk.download('stopwords')
nltk.download('punkt')

# to remove english stopwords
!pip install textblob
from textblob import TextBlob
from nltk.corpus import stopwords # get stopwords from NLTK library
from nltk.tokenize import word_tokenize # to create word tokens
from nltk.stem import WordNetLemmatizer # to reduce words to orginal form
from nltk.corpus import words # Get all words in english language

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\johns\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\johns\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!




In [7]:
# Function to remove English stopwords from text
def remove_english_stopwords(text):
    if pd.notna(text):  # Check if the value is not None
        stop_words = set(stopwords.words('english'))
        word_tokens = word_tokenize(text)
        filtered_text = [word for word in word_tokens if word.lower() not in stop_words]
        return ' '.join(filtered_text)
    return text  # Return the original text if it's None

# Apply the function to the 'Text' column in the DataFrame
spanish_df['English_Text'] = spanish_df['English_Text'].apply(remove_english_stopwords)

In [8]:
#sentiment analysis
# Function to perform sentiment analysis using TextBlob
def get_sentiment(text):
    if text is not None:  # Check if the value is not None
        analysis = TextBlob(text)
        polarity = analysis.sentiment.polarity
        if polarity > 0:
            return 'Positive'
        elif polarity < 0:
            return 'Negative'
        else:
            return 'Neutral'
    return 'Neutral'  # Return 'Neutral' for None values


# Apply the function to the 'Text' column in the DataFrame
spanish_df['Sentiment'] = spanish_df['English_Text'].apply(get_sentiment)

# Create a new DataFrame to store the results
sentiment_df = pd.DataFrame(spanish_df, columns=['English_Text', 'Sentiment'])

# See quick results of the Sentiment Analysis in a table format
print(sentiment_df)



                                         English_Text Sentiment
0   Good morning hope wonderful day : Sun : Good H...  Positive
1   Tonight plan welcoming movies blanket : Popcor...  Negative
2   Today day enjoy pleasures chocolate cake divin...  Positive
3   autumnal climate invites take walk forest : Fa...   Neutral
4   love relax weekends fun agenda : Party_popper ...  Positive
5   Today finally meet friends , 'm excited : Wome...  Positive
6   grateful family always : Red_heart : grateful ...   Neutral
7   found new book read ca n't wait immerse : Book...  Positive
8   morning witnessed beautiful dawn . Nature grea...  Positive
9   Welcome new followers thank sharing trip : Fol...  Positive
10  Music cleaning time weekend : Musical_notes : ...   Neutral
11  Thank birthday messages move much love : two_h...  Positive
12  autumal night beautiful enjoy time outdoors : ...  Positive
13  Today day relax enjoy good book cup tea : Open...  Positive
14  Quiet Sunday morning bed stay : Slee

In [9]:
# See quick results of the Sentiment Analysis
spanishsentiment_stored = sentiment_df['Sentiment'].value_counts() 
sentiment_df['Sentiment'].value_counts()

Sentiment
Positive    20
Neutral      9
Negative     1
Name: count, dtype: int64

In [10]:
#performing back translation and sentiment analysis

#drop the sentiment column in the spanish_df dataframe
spanish_df.drop(columns=["Sentiment"], inplace=True)

In [11]:
#translate back into spanish 
# Function to translate text from english to spanish with a retry mechanism
def translate_to_spanish(text, max_retry=5, sleep_duration=1.0):
    retry_count = 0
    translator = Translator()
    while retry_count < max_retry:
        try:
            translated_text = translator.translate(text, src='en', dest='es').text
            return translated_text
        except Exception as e:
            print(f"Translation failed. Retrying... ({retry_count + 1}/{max_retry})")
            retry_count += 1
            time.sleep(sleep_duration)
    print("Translation failed after maximum retries.")
    return None
# Apply the translation function to the 'Spanish_Tweet' column in the DataFrame
spanish_df['Spanish_Translation'] = spanish_df['English_Text'].apply(translate_to_spanish)

#drop english column after translation
spanish_df.drop(columns=["English_Text"], inplace=True)
spanish_df

Unnamed: 0,Spanish_Translation
0,Buenos días la esperanza maravillosa Día: Sol:...
1,Plan de esta noche Belling Movies Blanket: Pop...
2,Hoy Day Disfrute de placeres pastel de chocola...
3,Invitaciones del clima otoñal Take Walk Forest...
4,Love Relá Relájese los fines de semana Agenda ...
5,"Hoy finalmente se encuentran con amigos, 'M em..."
6,Familia agradecida Siempre: Red_heart: Familia...
7,Encontrado nuevo libro Leer Ca n't Wait Inmers...
8,La mañana presenció el hermoso amanecer.Natura...
9,Bienvenidos nuevos seguidores Agradezca el via...


In [12]:
#to perform sentiment analysis
!pip install sentiment-analysis-spanish
from sentiment_analysis_spanish import sentiment_analysis


# convert dataframe to list and filter out any none values
texts = [text for text in spanish_df['Spanish_Translation'].tolist() if text is not None]

# Create the SentimentAnalysisSpanish instance
analyzer = sentiment_analysis.SentimentAnalysisSpanish()

# Perform sentiment analysis on the list of Spanish tweets
results = [analyzer.sentiment(text) for text in texts]

# Create a DataFrame to store the tweets and their sentiment scores
sentiment_df = pd.DataFrame({'Text': texts, 'Sentiment Score': results})

# Define a function to get the sentiment label based on the sentiment score
def get_sentiment_label(score):
    if score > 0.2:
        return 'Positive'
    elif score < -0.2:
        return 'Negative'
    else:
        return 'Neutral'

# Add a new column to the DataFrame for the sentiment label
sentiment_df['Sentiment'] = sentiment_df['Sentiment Score'].apply(get_sentiment_label)

# Print the DataFrame to view the results in a table format
print(sentiment_df)

                                                 Text  Sentiment Score  \
0   Buenos días la esperanza maravillosa Día: Sol:...         0.838647   
1   Plan de esta noche Belling Movies Blanket: Pop...         0.002205   
2   Hoy Day Disfrute de placeres pastel de chocola...         0.021948   
3   Invitaciones del clima otoñal Take Walk Forest...         0.573458   
4   Love Relá Relájese los fines de semana Agenda ...         0.973651   
5   Hoy finalmente se encuentran con amigos, 'M em...         0.002233   
6   Familia agradecida Siempre: Red_heart: Familia...         0.777788   
7   Encontrado nuevo libro Leer Ca n't Wait Inmers...         0.290222   
8   La mañana presenció el hermoso amanecer.Natura...         0.796403   
9   Bienvenidos nuevos seguidores Agradezca el via...         0.467995   
10  Fin de semana de tiempo de limpieza de música:...         0.375616   
11  Gracias mensajes de cumpleaños Mover mucho amo...         0.818316   
12  Noche de otoñal Hermosa Disfrute d

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [13]:
print(spanishsentiment_stored)

Sentiment
Positive    20
Neutral      9
Negative     1
Name: count, dtype: int64


In [14]:
# Group the DataFrame by sentiment and calculate the count for each sentiment group
sentiment_summary = sentiment_df.groupby('Sentiment').size().reset_index(name='Count')

# Print the summarized results in a table format
print(sentiment_summary)

  Sentiment  Count
0   Neutral      9
1  Positive     21
