### 0 - packages

In [1]:
# warnings
import warnings
warnings.filterwarnings("ignore")

# general packages
import pandas as pd
import numpy as np
import pymongo, yaml, re, string
from datetime import datetime

# nlp
import nltk
from nltk import word_tokenize
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
from nltk import bigrams

# spacy
import spacy

# gensim
from gensim.models import Word2Vec
from gensim import corpora
from gensim.models import LdaModel
from gensim.models import CoherenceModel
from gensim.test.utils import common_texts
from pprint import pprint
from collections import Counter
from gensim.models import Phrases
from gensim.models.phrases import Phraser
from gensim.corpora import Dictionary
import gensim.downloader as api

# scikit learn
from sklearn.decomposition import PCA

#lda model
from gensim.models import LdaModel

# matplot
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

### 1 - Functions

In [2]:
# ==================================================================================================
# ==================================================================================================
# YAML
def read_yaml_file(yaml_file):
    """ load yaml cofigurations """

    config = None
    try:
        with open(yaml_file, 'r') as f:
            config = yaml.safe_load(f)
    except:
        raise FileNotFoundError('Couldnt load the file')

    return config

# ==================================================================================================
# ==================================================================================================
# Mongo DB
def get_conn_mongo(creds_file, collection):
    creds = read_yaml_file(creds_file)['mongo']
    url=creds['url']
    db=creds['db']
    collection=collection

    return url, db, collection

# ==================================================================================================
# ==================================================================================================
# clean text
def clean_text(text, tokenizer, stopwords):
    text = str(text).lower()                            # lowercase words
    text = text.replace('_', ' ')                       # replace character
    text = text.replace('-', ' ')                       # replace character
    text = text.replace(',', ' ')                       # replace character
    text = text.replace('á', 'a')                       # replace character
    text = text.replace('é', 'e')                       # replace character
    text = text.replace('í', 'i')                       # replace character
    text = text.replace('ó', 'o')                       # replace character
    text = text.replace('ú', 'u')                       # replace character
    text = re.sub(r"\[(.*?)\]", "", text)               # remove [+XYZ chars] in content
    text = re.sub(r"\s+", " ", text)                    # remove multiple spaces in content
    text = re.sub(r"\w+…|…", "", text)                  # remove ellipsis (and last word)
    text = re.sub(r"(?<=\w)-(?=\w)", " ", text)         # replace dash between words
    text = re.sub(r'(www\.|http://)', '', text)
    text = re.sub(r'\b\w*\d\w*\b', '', text)
    text = re.sub(
        f"[{re.escape(string.punctuation)}]", "", text
    )                                                   # remove punctuation

    tokens = tokenizer(text)                            # get tokens from text
    tokens = [t for t in tokens if not t in stopwords]  # remove stopwords
    tokens = ["" if t.isdigit() else t for t in tokens] # remove digits
    tokens = [t for t in tokens if len(t) > 1]          # remove short tokens
    return tokens

### 2 - Load the pre-trained Word2Vec model

In [3]:
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
nltk.download('averaged_perceptron_tagger_eng')
nlp_spacy = spacy.load("es_core_news_sm")

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\migue\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\migue\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\migue\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\migue\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     C:\Users\migue\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!


### 3 - Preprocessing text

In [4]:
# ==================================================================================================
# ==================================================================================================
# Convert NLTK POS tags to WordNet-compatible format
def get_wordnet_pos(word):    
    """Map POS tag to first character lemmatize() accepts"""
    tag = nltk.pos_tag([word])[0][1][0].upper()
    tag_dict = {"J": nltk.corpus.wordnet.ADJ,
                "N": nltk.corpus.wordnet.NOUN,
                "V": nltk.corpus.wordnet.VERB,
                "R": nltk.corpus.wordnet.ADV}

    return tag_dict.get(tag, nltk.corpus.wordnet.NOUN)

# ==================================================================================================
# ==================================================================================================
# Perform text lemmatization with Spacy
def lemma_tokens(text):
    doc = nlp_spacy(" ".join(text))
    return [token.lemma_ for token in doc]
    
# ==================================================================================================
# ==================================================================================================
# Extract n-grams from the text
def get_ngrams(df, column):    
    sentences_with_bigrams_list=[]
    sentences=df[column]    
    bigram_model=Phrases(sentences, min_count=20, threshold=1)                    # train the model Phrases
    bigram_phraser=Phraser(bigram_model)                                          # create the object Phraser
    sentences_with_bigrams=[bigram_phraser[sentence] for sentence in sentences]   # apply the ngrams to the sentences
    
    for sentence in sentences_with_bigrams:
        sentences_with_bigrams_list.append(sentence)
    return sentences_with_bigrams_list

In [5]:
from IPython.display import display, HTML

image_path = "img\preprocess-text.png"
html_code = f"""
<center>
    <img src="{image_path}" width="800">
</center>
"""
display(HTML(html_code))

## LDA Topic Modeling: A Step-by-Step Guide

When working with Latent Dirichlet Allocation (LDA) for topic modeling, the quality of your results often hinges on how well you prepare your text data. Raw text is messy and filled with noise that can mislead your analysis. To make your data LDA-ready, you need a solid preprocessing workflow. In the next stage, I’ll walk you through a simple yet effective preprocessing pipeline and explain why each step matters.

**What is LDA, and Why Does Preprocessing Matter?**

LDA is a powerful algorithm for uncovering hidden topics in text data. It groups words that frequently co-occur into topics, helping you make sense of large text datasets. However, LDA is only as good as the data you feed it. Without proper preprocessing, irrelevant or noisy data can muddy your results, making topics less meaningful or harder to interpret.

This is where preprocessing comes in: it cleans, standardizes, and structures your data, ensuring LDA focuses on what truly matters.

### 4 - LDA process

In [6]:
def compute_coherence_values(docs, dictionary, corpus, texts, start, limit, step, id2word):

    # ==================================================================================================
    # ==================================================================================================
    # LDA params    
    chunksize=2000
    passes=20
    iterations=500
    eval_every=None    # do not evaluate model perplexity, takes too much time

    # ==================================================================================================
    # ==================================================================================================
    coherence_values = []
    model_list = []

    # ==================================================================================================
    # ==================================================================================================
    for num_topics in range(start, limit, step):            
        
        # ==================================================================================================
        # LDA model
        model=LdaModel(corpus=corpus, 
                       id2word=id2word,
                       chunksize=chunksize,
                       alpha='auto',
                       eta='auto',
                       iterations=iterations,
                       num_topics=num_topics,
                       passes=passes, 
                       eval_every=eval_every)
        
        # ==================================================================================================
        # append each model
        model_list.append(model)

        # ==================================================================================================
        # compute the model coherence
        coherence_model = CoherenceModel(model=model, 
                                         texts=docs, 
                                         dictionary=dictionary, 
                                         coherence='c_v')
    
        coherence_values.append(coherence_model.get_coherence())
    
    return model_list, coherence_values

### 5 - Define the context and problem

`Define the stopwords`

In [7]:
stopwords = set(stopwords.words("spanish") + ['app', 'banco', 'si', 'mas', 'gracias', 'aplicacion', 'santander', 'bbva', 'banco azteca', 'citiBanamex', 'hsbc', 'banorte', 'banamex'])

`Get the reviews from MongoDB`

In [8]:
# ====================================================================
# ====================================================================
# load data
url_mg, db_mg, collection_mg = get_conn_mongo("key.yaml", 'bank_reviews')

# ====================================================================
# ====================================================================
# create the client and get the documents
client = pymongo.MongoClient(url_mg)
db = client[db_mg]
collection = db[collection_mg]

# ====================================================================
# ====================================================================
data=collection.find({}) 
documents=list(data)

# ====================================================================
# ====================================================================
# create the pandas
df=pd.DataFrame(documents)
df.head(2)

Unnamed: 0,_id,at,score,content,bank
0,670ac0db151b1cffe5ea06a4,2024-09-07 19:04:07,3,"Buena pero tiene muchos detalles, uno de los m...",Santander
1,670ac0db151b1cffe5ea06a5,2024-09-09 18:06:53,1,La interfaz es lenta y la cuestión del superto...,Santander


`Remove unnecessary columns`

In [9]:
df.drop(columns=["_id", "at", "score"], inplace=True)

In [10]:
df.isnull().sum()

content    0
bank       0
dtype: int64

`Get the unique keys`

In [11]:
banks=df["bank"].unique()
print(banks)

['Santander' 'BBVA' 'Banco-Azteca' 'CitiBanamex' 'HSBC' 'Banorte']


**Clean the Text**
- *What It Is:* Text cleaning involves removing unnecessary characters like punctuation, numbers, special symbols, and stop words (e.g., "and," "is," "the"). It also converts all text to lowercase.
- *Why It Matters:* Cleaning eliminates noise and ensures the algorithm isn't distracted by irrelevant details. For example, "Machine Learning" and "machine learning" should be treated as the same word, not two different entities.
- *Example:*
    - Raw text: "The quick brown fox jumps over 2 lazy dogs!! 🦊🐶"
    - Cleaned text: "quick brown fox jumps lazy dogs"

In [12]:
df["clean-reviews"]=df["content"].map(lambda x: clean_text(x, word_tokenize, stopwords))
df.head(2)

Unnamed: 0,content,bank,clean-reviews
0,"Buena pero tiene muchos detalles, uno de los m...",Santander,"[buena, detalles, frecuentes, visto, solo, usu..."
1,La interfaz es lenta y la cuestión del superto...,Santander,"[interfaz, lenta, cuestion, supertoken, tedios..."


**Lemmatize the Text**
- *What It Is:* Lemmatization reduces words to their base or dictionary form. For instance, "running," "ran," and "runs" all become "run."
- *Why It Matters:* By grouping variations of the same word, LDA can better identify patterns and relationships. Without lemmatization, the algorithm might treat these variations as separate words, weakening the coherence of topics.
- *Example:*
  - Original: "Dogs are running in the park."
  - Lemmatized: "dog be run in the park."

In [13]:
df["lemmatize-reviews"]=df["clean-reviews"].map(lambda x: lemma_tokens(x))
df.head(2)

Unnamed: 0,content,bank,clean-reviews,lemmatize-reviews
0,"Buena pero tiene muchos detalles, uno de los m...",Santander,"[buena, detalles, frecuentes, visto, solo, usu...","[buen, detalle, frecuente, visto, solo, usuari..."
1,La interfaz es lenta y la cuestión del superto...,Santander,"[interfaz, lenta, cuestion, supertoken, tedios...","[interfaz, lento, cuestion, supertoken, tedios..."


**Find N-grams**
- *What It Is:* N-grams are sequences of N words that appear together in text. Common n-grams include:
    - Bigrams (2-grams): "data science"
    - Trigrams (3-grams): "machine learning model"
- Why It Matters: Some ideas are better represented as phrases rather than individual words. For example, "New York" as a bigram carries more meaning than "New" and "York" separately.
- Example:
    - Text: "I love machine learning models."
    - Bigrams: ["machine learning," "learning models"]

In [14]:
df['ngrams-lem-reviews']=pd.DataFrame({'data':list(get_ngrams(df, 'lemmatize-reviews'))})
df.head(2)

Unnamed: 0,content,bank,clean-reviews,lemmatize-reviews,ngrams-lem-reviews
0,"Buena pero tiene muchos detalles, uno de los m...",Santander,"[buena, detalles, frecuentes, visto, solo, usu...","[buen, detalle, frecuente, visto, solo, usuari...","[buen, detalle, frecuente, visto, solo, usuari..."
1,La interfaz es lenta y la cuestión del superto...,Santander,"[interfaz, lenta, cuestion, supertoken, tedios...","[interfaz, lento, cuestion, supertoken, tedios...","[interfaz, lento, cuestion, supertoken, tedios..."


In [15]:
banks

array(['Santander', 'BBVA', 'Banco-Azteca', 'CitiBanamex', 'HSBC',
       'Banorte'], dtype=object)

In [16]:
df.head(1)

Unnamed: 0,content,bank,clean-reviews,lemmatize-reviews,ngrams-lem-reviews
0,"Buena pero tiene muchos detalles, uno de los m...",Santander,"[buena, detalles, frecuentes, visto, solo, usu...","[buen, detalle, frecuente, visto, solo, usuari...","[buen, detalle, frecuente, visto, solo, usuari..."


Before running the LDA algorithm, it’s crucial to filter out certain words. This step involves removing terms that are either too common (like "great" or "people") or too rare to add value. By doing so, we ensure that the algorithm focuses on words that truly matter, leading to more meaningful and coherent topics.

**Filter Out Words**
- *What It Is:* Filtering involves removing extremely common words (e.g., "good," "people") or extremely rare words that don't contribute meaningful insights.
- *Why It Matters:* Common words can dominate topics, making them less specific, while rare words add noise. Filtering ensures that only relevant and impactful words are included.
- *Example:*
    - Original list: ["machine," "learning," "data," "great," "obscuretermxyz"]
    - Filtered list: ["machine," "learning," "data"]

**Why This Workflow is Crucial**
- Without proper preprocessing, raw text can mislead the LDA algorithm, resulting in topics that are incoherent or irrelevant. By cleaning, lemmatizing, finding n-grams, and filtering your text, you ensure the algorithm focuses on the most meaningful patterns.<br><br>
  Preprocessing makes your topics more interpretable and actionable, whether you're analyzing customer feedback, scientific papers, or social media conversations.

### 6 - LDA(Latent Dirichlet Allocation)

**What is the LDA Algorithm?**
- Latent Dirichlet Allocation (LDA) is a popular algorithm used for topic modeling, which helps uncover hidden themes or topics within a large collection of text. Think of it as a way to automatically group words into clusters that represent the main ideas in your dataset.

**How Does LDA Work?**
- LDA assumes that:
    - Each document in your dataset is a mix of different topics. For example, a news article might be 60% about politics and 40% about technology.
    - Each topic is made up of certain words that are likely to appear together. For instance, a "sports" topic might include words like "game," "team," and "score."<br><br>
*The algorithm analyzes the data and assigns a probability for each word belonging to a topic, then assigns a mix of topics to each document.*

In [17]:
image_path = "img\lda.jpg"
html_code = f"""
<center>
    <img src="{image_path}" width="800">
</center>
"""
display(HTML(html_code))

In [18]:
# =========================================================================================
# define the dataframe for save all the models
all_models=pd.DataFrame(columns=['bank', 'model', 'topics', 'coherence-value'])
corpus_dict=dict()

for b in banks:

    # =========================================================================================
    # define the condition
    condition=df['bank']==b
    
    # =========================================================================================
    # get the data by bank
    df_slice=df[condition]

    # =========================================================================================
    # =========================================================================================
    # create the corpus and the dictionary
    docs = df_slice['ngrams-lem-reviews']
    dictionary=Dictionary(docs)                                # create a dictionary representation of the documents
    dictionary.filter_extremes(no_below=20, no_above=0.5)      # filter out words that occur less than 20 documents or more than 50% of the documents
    corpus = [dictionary.doc2bow(doc) for doc in docs]         # bag of owrds representation of the documents
    corpus_dict[b]=corpus

    # =========================================================================================
    # =========================================================================================
    # create the LDA model
    print('='*50)
    print('bank: ', b)
    print('number of unique token: %d' % len(dictionary))
    print('number of documents: %d' % len(corpus))    

    start = 2    # Número mínimo de temas
    limit = 8    # Número máximo de temas a probar
    step = 2     # Incremento del número de temas a probar

    # Make an index to word dictionary.
    temp=dictionary[0]           # This is only to "load" the dictionary.
    id2word=dictionary.id2token

    model_list, coherence_values = compute_coherence_values(docs=docs,
                                                            dictionary=dictionary, 
                                                            corpus=corpus, 
                                                            texts=docs, 
                                                            id2word=id2word,
                                                            start=start, 
                                                            limit=limit, 
                                                            step=step)
    # =========================================================================================
    # =========================================================================================
    # get the summary form all the models
    models=pd.DataFrame(columns=['bank', 'model', 'topics', 'coherence-value'])

    print('='*50)
    print('bank: ', b)
    
    for m, model, cv in zip(range(start, limit, step), model_list, coherence_values):
        print(f"Num Topics = {m}, Coherence Value = {cv}")
        models=pd.concat([models, 
                          pd.DataFrame(data=[b, model, m, cv], index=['bank', 'model', 'topics', 'coherence-value']).T], 
                          ignore_index=True)
        
    all_models=pd.concat([all_models, models], ignore_index=True)

all_models

bank:  Santander
number of unique token: 684
number of documents: 7233
bank:  Santander
Num Topics = 2, Coherence Value = 0.40943621592681284
Num Topics = 4, Coherence Value = 0.40821300673091765
Num Topics = 6, Coherence Value = 0.42557485098860354
bank:  BBVA
number of unique token: 1622
number of documents: 13854
bank:  BBVA
Num Topics = 2, Coherence Value = 0.3364297833376455
Num Topics = 4, Coherence Value = 0.4044771197146468
Num Topics = 6, Coherence Value = 0.4195420020957279
bank:  Banco-Azteca
number of unique token: 564
number of documents: 7774
bank:  Banco-Azteca
Num Topics = 2, Coherence Value = 0.46448844550853774
Num Topics = 4, Coherence Value = 0.47384827755227366
Num Topics = 6, Coherence Value = 0.44147786681322176
bank:  CitiBanamex
number of unique token: 1015
number of documents: 10736
bank:  CitiBanamex
Num Topics = 2, Coherence Value = 0.38224633922207996
Num Topics = 4, Coherence Value = 0.44410978375420623
Num Topics = 6, Coherence Value = 0.4117370470738067


Unnamed: 0,bank,model,topics,coherence-value
0,Santander,"LdaModel<num_terms=684, num_topics=2, decay=0....",2,0.409436
1,Santander,"LdaModel<num_terms=684, num_topics=4, decay=0....",4,0.408213
2,Santander,"LdaModel<num_terms=684, num_topics=6, decay=0....",6,0.425575
3,BBVA,"LdaModel<num_terms=1622, num_topics=2, decay=0...",2,0.33643
4,BBVA,"LdaModel<num_terms=1622, num_topics=4, decay=0...",4,0.404477
5,BBVA,"LdaModel<num_terms=1622, num_topics=6, decay=0...",6,0.419542
6,Banco-Azteca,"LdaModel<num_terms=564, num_topics=2, decay=0....",2,0.464488
7,Banco-Azteca,"LdaModel<num_terms=564, num_topics=4, decay=0....",4,0.473848
8,Banco-Azteca,"LdaModel<num_terms=564, num_topics=6, decay=0....",6,0.441478
9,CitiBanamex,"LdaModel<num_terms=1015, num_topics=2, decay=0...",2,0.382246


Now that we’ve generated multiple models with various topics and evaluated their coherence scores to determine which models are the most meaningful for each bank, the next step is to select the best models and their corresponding topics. This ensures that the chosen models provide the most accurate and insightful representation of the data for each specific case.

In [19]:
best_models=pd.DataFrame()

for b in banks:
    condition=all_models['bank']==b
    best_models=pd.concat([best_models,
                           pd.DataFrame(all_models.iloc[all_models[condition]['coherence-value'].idxmax()]).T],
                          ignore_index=True)
best_models

Unnamed: 0,bank,model,topics,coherence-value
0,Santander,"LdaModel<num_terms=684, num_topics=6, decay=0....",6,0.425575
1,BBVA,"LdaModel<num_terms=1622, num_topics=6, decay=0...",6,0.419542
2,Banco-Azteca,"LdaModel<num_terms=564, num_topics=4, decay=0....",4,0.473848
3,CitiBanamex,"LdaModel<num_terms=1015, num_topics=4, decay=0...",4,0.44411
4,HSBC,"LdaModel<num_terms=359, num_topics=4, decay=0....",4,0.395419
5,Banorte,"LdaModel<num_terms=572, num_topics=4, decay=0....",4,0.445642


In [20]:
top_topics_dict=dict()
for k in range(len(best_models)):

    print('='*90)
    print('Bank:', best_models['bank'].iloc[k])
    # =============================================================================================
    # get the model for each bank
    top_topics=best_models['model'].iloc[k].top_topics(corpus_dict[best_models['bank'].iloc[k]])

    # =============================================================================================
    # get the top topics
    top_topics_array=[]
    for t in top_topics:
        top_topics_array.append(np.array(t[0])[:, 1])
        print(np.array(t[0])[:, 1])
        
    print('='*90)        
    # =============================================================================================
    # ad to the dictionary
    top_topics_dict[best_models['bank'].iloc[k]]=top_topics_array

Bank: Santander
['poder' 'sucursal' 'solo' 'hacer' 'token' 'ir' 'dinero' 'transferencia'
 'decir' 'tarjeta' 'cuenta' 'pedir' 'poder_hacer' 'servir' 'pesimo'
 'dejar' 'dejar_hacer' 'contar' 'super_token' 'peor']
['transferencia' 'problema' 'ubicacion' 'decir' 'abrir' 'poder_hacer'
 'cerrar' 'actualizacion' 'solo' 'entrar' 'funcionar' 'vez' 'fallar'
 'poder' 'pasar' 'hacer' 'ultimo_actualizacion' 'ahora'
 'hacer_transferencia' 'mismo']
['telefono' 'nuevo' 'celular' 'pedir' 'proceso' 'vez' 'tardar'
 'complicado' 'dato' 'demasiado' 'super_token' 'hacer' 'informacion'
 'actualizar' 'token' 'cambio' 'decir' 'mismo' 'supertoken' 'realizar']
['buen' 'mejor' 'util' 'servicio' 'bien' 'momento' 'bastante' 'facil_usar'
 'lento' 'ahora' 'mejorado' 'uso' 'exelente' 'rapido' 'verdad' 'opción'
 'mejorar' 'eficiente' 'interfaz' 'adema']
['usar' 'bien' 'problema' 'pago' 'hacer' 'instalar' 'utilizar'
 'complicado' 'seguridad' 'facil' 'movimiento' 'activar' 'bloqueo' 'falla'
 'excelente_facil' 'sistema' '

Now that we have evaluated the top-performing models based on coherence scores derived from customer reviews for each bank, we can uncover valuable insights into how customers perceive and associate with their financial institutions. This analysis not only sheds light on the strengths and weaknesses of each bank but also helps identify specific issues that customers face. Armed with this understanding, we can develop tailored strategies to address these challenges effectively, improve customer satisfaction, and strengthen brand loyalty. For instance, if the analysis highlights recurring concerns about digital services, the bank can prioritize enhancing its app functionality or user experience. By aligning strategies with customer feedback, we can ensure a data-driven approach to solving key issues and driving growth.

#### **`LDA - Insights`**

In the next stage, we delve into uncovering insights specifically related to the customer experience (CX) of banking applications. This phase focuses on understanding how users interact with the app, identifying its strengths, and pinpointing areas for improvement. By analyzing customer feedback and behavioral patterns, we can gain a comprehensive view of what resonates with users and where the app may fall short. For example, recurring themes around navigation issues or slow response times could signal areas needing immediate attention. These insights will guide us in refining the application to enhance user satisfaction, boost engagement, and ultimately solidify the app as a competitive advantage for the bank.

**`Santander`**

- *Topic 1:*
    - Dependency on Branches and Token Issues
    - *Key Terms:* 'access', 'branch', 'token', 'transfer', 'account', 'terrible'.
    - *Description:*  Users express frustration about the need to visit branches to complete certain operations, particularly those involving tokens or authentication. These limitations lead to dissatisfaction, as customers expect a fully digital experience.
    - *Recommendation:* Enable critical functions (such as transfers) directly through the app, eliminating the need for physical visits and improve the stability and ease of use of the token, ensuring it is reliable and accessible.
<br><br>
- *Topic 2:*
    - Recurring Technical Issues
    - *Key Terms:* 'transfer', 'problem', 'update', 'failure', 'function'.
    - *Description:* Users report frequent technical issues, particularly following updates. These failures affect transfers and other basic functions, negatively impacting the perception of reliability.
    - *Recommendation:* Implement thorough testing processes before releasing updates and establish a dedicated support channel to quickly address problems caused by technical failures.
<br><br>
- *Topic 3:*
    - Complications When Changing Devices
    - *Key Terms:* 'phone', 'new', 'process', 'complicated', 'update', 'super_token'.
    - *Description:* Changing devices is perceived as cumbersome and confusing. Issues with token setup and data updates are common.
    - *Recommendations:* Simplify the device change process with interactive guides and a more intuitive experience and offer dedicated support to address questions during the transition.
<br><br>
- *Topic 4:*
    - Perception of Improvement and Suggestions
    - *Key Terms:* 'good', 'better', 'slow', 'interface', 'option'.
    - *Description:* Some users acknowledge improvements in the app but still experience slowness in certain functionalities and feel the interface could be more efficient.
    - *Recommendations:* Continue optimizing the speed and design of the interface and proactively communicate implemented improvements to reinforce positive perceptions.
<br><br>
- *Topic 5:*
    - Specific Issues and Functionality
    - *Key Terms:* 'use', 'payment', 'complicated', 'security', 'movement'.
    - *Description:* While basic functionality is appreciated, users face issues with payments, activations, and security blocks. Some processes are seen as overly complex or unintuitive.
    - *Recommendations:* Simplify key processes, such as payments and activations, by reducing unnecessary steps and ensure that security measures do not compromise user experience.
<br><br>
- *Topic 6:*
    - Positive Experiences
    - *Key Terms:* 'excellent', 'fast', 'secure', 'easy', 'intuitive'.
    - *Description:* Many users highlight positive aspects of the app, such as its speed, security, and ease of use. These experiences can serve as a differentiator against competitors.
    - *Recommendations:* Identify the elements driving these positive experiences and use them as pillars for marketing campaigns and maintain these high standards and replicate them in areas that need improvement.
<br><br>
- *`Conclusion:`*
    - The thematic analysis reveals a mixed landscape, with highly valued aspects alongside significant areas for improvement. By addressing critical issues and leveraging strengths, Santander has the opportunity to transform the perception of its app and strengthen customer relationships. This analysis serves as an example of how processing customer feedback can be a powerful tool for improving digital products in the financial industry.
---

**`BBVA`**
- *Topic 1:*
    - Issues with Login and Basic Usage
    - *Key Terms:* 'allow_login', 'access', 'install', 'problem', 'function', 'mobile'.
    - *Description:* Users frequently face challenges when trying to access the app, caused by technical issues or additional requirements (such as location or device data). This negatively impacts the basic user experience.
    - *Recommendations:* Improve login stability by removing unnecessary barriers and provide clear support for resolving technical issues related to accessing the app.
<br><br>
- *Topic 2:*
    - Issues with Biometric Login and Authentication
    - *Key Terms:* 'login', 'fingerprint', 'password', 'biometric', 'allow_access', 'failure'.
    - *Description:* Biometric features (fingerprint and facial recognition) are unreliable, disrupting smooth access for users. Frequent authentication failures are a source of frustration.
    - *Recommendations:* Optimize the biometric system to enhance consistency and offer accessible alternative login options in case the biometric system fails.
<br><br>
- *Topic 3:*
    - Polarized Opinions on Updates
    - *Key Terms:* 'better', 'excellent', 'previous', 'terrible', 'new_update', 'previous_version'.
    - *Description:* App updates evoke mixed reactions: some users appreciate improvements, while others prefer earlier versions. Interface changes and performance appear to be contentious points.
    - *Recommendations:* Conduct usability testing with users before launching new versions and communicate the benefits of updates and provide tutorials to facilitate adaptation.
<br><br>
- *Topic 4:*
    - Issues with Services and Branch Dependency
    - *Key Terms:* 'access', 'branch', 'card', 'money', 'payment', 'facial_recognition'.
    - *Description:* Persistent issues require users to rely on branches for tasks such as card management, payments, or facial recognition-related processes, highlighting barriers to a fully digital experience.
    - *Recommendations:* Digitize critical processes, such as payments and card management, to reduce branch dependency. Ensure facial recognition functions seamlessly as part of the digital experience.
<br><br>
- *Topic 5:*
    - Transfer Issues Post-Updates
    - *Key Terms:* 'transfer', 'enable_transfer', 'allow_transfer', 'error', 'new_update'.
    - *Description:* Transfers are one of the functionalities most affected by recent updates, causing significant frustration as this is an essential feature.
    - *Recommendations:* Prioritize the stability of transfer functionalities during updates. Implement rigorous testing mechanisms to ensure transfers remain unaffected.
<br><br>
- *Topic 6:*
    - Negative Perception of Updates
    - *Key Terms:* 'perform', 'worse', 'update', 'function', 'bad', 'garbage'.
    - *Descriptions:* Many updates are associated with negative experiences, as users perceive them as regressions rather than improvements. This harms the app's overall reputation.
    - *Recommendations:* Focus on addressing reported issues rather than adding new features without optimizing existing ones. Implement a feedback system to better understand user needs before rolling out updates.
<br><br>
- `General Insights:`
    - Critical Areas:
        - Access issues (login and authentication).
        - Transfer failures, particularly after updates.
        - Dependency on branches for basic services.
    - Perception of Updates:
        - Polarized views exist, with some users appreciating improvements while others believe updates worsen the experience.
    - Positive Aspects:
        - Some users value specific improvements in speed and design, but these cases are less frequent.
---
**`Banco Azteca`**
- *Topic 1:*
    - Basic Banking Operations and Branch Use
    - *Key Terms:* 'payment,' 'power,' 'money,' 'do,' 'branch,' 'credit,' 'collect,' 'card.'
    - *Interpretation:* Users mention basic operations such as payments, money management, and card usage, as well as their interaction with physical branches. There seems to be frustration about the need to visit branches to complete certain tasks, along with issues related to credit and collections. The word "truth" may indicate frustration or distrust in the information provided.
    - *Recommendation:* Digitize key processes to reduce dependency on branches. Improve communication with users to avoid misunderstandings related to credit and collections.
<br><br>
- *Topic 2:*
    - Service Perception and Performance
    - *Key Terms:* 'good,' 'excellent,' 'fast,' 'problem,' 'transfer,' 'reliable,' 'recharge,' 'simple.'
    - *Interpretation:* Some users appreciate positive aspects like speed, reliability, and ease of use. However, terms like "problem" and "failure" indicate that some users experience technical issues, especially with transfers and recharges.
    - *Recommendation:* Ensure the stability of critical features such as transfers and recharges. Highlight positive feedback in marketing campaigns to reinforce the perception of reliability.
<br><br>
- *Topic 3:*
    - Technical Issues and Updates
    - *Key Terms:* 'update,' 'work,' 'problem,' 'log_in,' 'error,' 'internet.'
    - *Interpretation:* App updates seem to be a recurring source of technical issues, such as difficulties accessing the app or functional errors. Terms like "log_in" and "error" reflect access problems, possibly related to connectivity or authentication.
    - *Recommendation:* Conduct thorough testing before releasing updates. Implement a communication channel to inform users about known issues and solutions. Provide a smoother and more reliable access experience, even under limited connectivity conditions.
<br><br>
- *Topic 4:*
    - User Experience and Functionality
    - *Key Terms:* 'easy_to_use,' 'excellent,' 'secure,' 'fast,' 'practical,' 'efficient,' 'functional.'
    - *Interpretation:* This topic highlights positive feedback on the app's ease of use, security, and functionality. Users value aspects such as speed, practicality, and efficiency, suggesting these are strong points of the app.
    - *Recommendation:* Maintain and enhance these positive features. Leverage this feedback to craft marketing messages that emphasize the app's key benefits.
<br><br>
- `General Insights:`
    - *Critical Areas:*
        - Technical Issues: Updates, errors, and limited access are frequent complaints.
        - Branch Dependency: Users seek greater digital autonomy for managing critical operations.
    - *Positive Aspects:*
        - Users appreciate ease of use, security, and speed, which can be a competitive advantage over other banking apps.
    - *Opportunities for Improvement:*
        - Enhance technical stability, especially after updates.
        - Focus on digitalizing key processes to reduce friction and increase customer autonomy.
        - Strengthen communication with users regarding the app's status and resolutions to technical issues.
---
**`CitiBanamex`**
- *Topic 1:*
    - Issues with Viewing Transactions and Balances
    - *Key Terms:* 'transaction,' 'view,' 'appear,' 'period,' 'balance,' 'screenshot,' 'view_transaction.'
    - *Interpretation:* Users report difficulties in viewing transactions or balances correctly in the app. The term "annoying" indicates frustration with the functionality or data presentation. Issues with screenshots suggest users attempt to record information due to app limitations.
    - *Recommendation:* Improve the clarity and availability of transaction and balance information, ensuring real-time updates. Include options to export or share account statements directly from the app.
<br><br>
- *Topic 2:*
    - Technical Issues and Slowness
    - *Key Terms:* 'transfer,' 'slow,' 'work,' 'fail,' 'login,' 'make_transfer,' 'update,' 'fingerprint.'
    - *Interpretation:* Transfers are a critical feature but are affected by technical issues and slowness. The use of fingerprint authentication appears inconsistent. Terms like "fail," "worse," and "bad" reflect a negative perception of the technical experience.
    - *Recommendation:* Prioritize technical stability for transfers and the biometric authentication system. Optimize the app's overall performance to reduce loading times and improve responsiveness.
<br><br>
- *Topic 3:*
    - Branch Dependency and Basic Service Issues
    - *Key Terms:* 'ability,' 'account,' 'branch,' 'do,' 'bank,' 'money,' 'service,' 'payment,' 'security.'
    - *Interpretation:* Users mention tasks that still require branch visits, limiting digital autonomy. While security is valued, it might be affecting the ease of performing basic transactions.
    - *Recommendation:* Digitize key processes that currently require physical visits. Balance security with ease of use, reducing unnecessary friction in transactions.
<br><br>
- *Topic 4:*
    - Positive Experiences
    - *Key Terms:* 'good,' 'excellent,' 'fast,' 'secure,' 'easy,' 'practical,' 'functional,' 'moment,' 'operation.'
    - *Interpretation:* Some users highlight positive aspects of the app, such as speed, security, and ease of use. Despite criticisms, there are strengths that drive satisfaction among certain users.
    - *Recommendation:* Identify which features generate these positive experiences and enhance them further. Communicate the highlighted benefits in marketing campaigns to strengthen the app's perceived value.
<br><br>
- `General Insights:`
    - *Critical Areas:*
        - Technical Issues: Transfers and biometric access face recurring failures that impact user experience.
        - Transaction Visibility: Users seek clearer, more accessible information on transactions and balances.
        - Branch Dependency: A clear desire for greater digital autonomy exists.
    - *Positive Aspects:*
        - Ease of use, security, and functionality are well-regarded by some users.
    - *Opportunities for Improvement:*
        - Technical Stability: Address slowness and technical issues in key features.
        - UX/UI Optimization: Simplify the visualization of transactions and account statements.
        - Digitalization: Reduce the need for branch visits for basic services.
---
**`HSBC`**
- *Topic 1:*
    - Issues with Accounts, Transfers, and Customer Service
    - *Key Terms:* 'account,' 'worst,' 'terrible,' 'transfer,' 'service,' 'branch.'
    - *Interpretation:* Users express frustration with service quality and issues related to transfers and account management. Terms like "terrible," "terrible_service," and "working" indicate a generally negative perception of functionality and support. Dependency on branches remains a pain point.
    - *Recommendation:* Improve the stability of transfers and account-related functions. Strengthen customer support both in-app and at branches to efficiently resolve issues.
<br><br>
- *Topic 2:*
    - Technical Issues and User Experience
    - *Key Terms:* 'fast,' 'token,' 'login,' 'fail,' 'complicated,' 'log_in.'
    - *Interpretation:* Token-based authentication presents issues, making it difficult to access the app. While some users describe the app as "fast" and "excellent," terms like "bad" and "complicated" suggest an inconsistent user experience.
    - *Recommendation:* Optimize token functionality to ensure a smooth authentication experience. Simplify complicated processes to facilitate access and overall app usability.
<br><br>
- *Topic 3:*
    - Time Restrictions and Limited Functionality
    - *Key Terms:* 'open,' 'new,' 'hour,' 'night,' 'transfer,' 'difficult.'
    - *Interpretation:* Users perceive restrictions in app functionality during certain hours (e.g., at night), which causes frustration. There are mentions of prolonged times to complete tasks, such as transfers or accessing the app.
    - *Recommendation:* Ensure the app and its features are available 24/7, particularly for transfers. Reduce wait times for executing critical operations.
<br><br>
- *Topic 4:*
    - Mixed Experiences: Positive and Negative
    - *Key Terms:* 'good,' 'excellent,' 'slow,' 'secure,' 'garbage,' 'useful.'
    - *Interpretation:* Some users appreciate the app's security and ease of use, highlighting terms like "excellent" and "good_service." However, others describe the experience with negative terms like "slow" and "garbage," indicating inconsistent service quality.
    - *Recommendation:* Identify and replicate elements that generate positive experiences, such as security and ease of use. Continue working on optimizing speed and problem resolution to deliver a consistent experience.
<br><br>
- `General Insights:`
    - *Critical Areas:*
        - Technical Issues: Authentication with tokens, time restrictions, and prolonged operational delays are common complaints.
        - Branch Dependency: Complaints persist about needing to visit branches to resolve issues or complete tasks.
        - Service Perception: There is a polarization between users who appreciate certain aspects and those who find the service deficient.
    - *Positive Aspects:*
        - Users value the app's security, good service in some cases, and ease of use when it works correctly.
    - *Opportunities for Improvement:*
        - Technical Stability: Optimize critical functions such as transfers and authentication.
        - Availability: Eliminate time restrictions and ensure a seamless 24/7 experience.
        - Communication: Inform users about changes or updates to reduce frustration.
---
**`Banorte`**
- *Topic 1:*
    - Positive Experiences and Ease of Use
    - *Key Terms:* 'good,' 'excellent,' 'fast,' 'secure,' 'easy_to_use,' 'efficient,' 'simple,' 'intuitive.'
    - *Interpretation:* Many users highlight positive aspects of the app, such as its ease of use, speed, security, and intuitive design. This suggests the app meets the functionality and usability expectations for a significant portion of its users.
    - *Recommendation:* Maintain and enhance standards for ease of use and speed. Emphasize these positive aspects in communication campaigns to reinforce the app's value perception.
<br><br>
- *Topic 2:*
    - Technical Issues and Authentication
    - *Key Terms:* 'able,' 'user,' 'data,' 'problem,' 'account,' 'error,' 'mobile,' 'appear,' 'phone.'
    - *Interpretation:* Users face technical issues related to account access and authentication. Terms like "error" and "appear" suggest inconsistencies in functionality, possibly linked to device configurations or personal data handling.
    - *Recommendation:* Optimize the login and authentication experience, including support for quick technical issue resolution. Improve synchronization between user data and the app system to prevent errors.
<br><br>
- *Topic 3:*
    - Issues with Basic Banking Operations
    - *Key Terms:* 'card,' 'account,' 'money,' 'transfer,' 'ATM,' 'payment,' 'branch.'
    - *Interpretation:* Basic operations like transfers, payments, and card management occasionally cause frustration. Terms like "branch" and "ATM" indicate that some operations may be limited or require physical interaction, posing a barrier for users seeking digital autonomy.
    - *Recommendation:* Digitize more operations to reduce dependency on branches and ATMs. Ensure the stability of critical functionalities like transfers and payments.
<br><br>
- *Topic 4:*
    - Slowness and Wait Times
    - *Key Terms:* 'slow,' 'make_transfer,' 'delay,' 'update,' 'password,' 'interface,' 'improved.'
    - *Interpretation:* Users report slowness in certain operations, especially transfers and updates. While the user interface has improved, it appears not fully optimized for all scenarios.
    - *Recommendation:* Focus efforts on reducing loading times and improving the fluidity of key operations. Continue optimizing the user interface to make it more agile and efficient.
<br><br>
- `General Insights:`
    - *Highlighted Positive Aspects:*
        - The app is highly valued for its ease of use, security, speed, and intuitive interface.
        - There is a user base that considers it efficient and reliable.
    - *Critical Improvement Areas:*
        - Technical Issues: Errors in authentication and access generate frustration.
        - Basic Operations: Transactions like payments and transfers need greater stability and simplicity.
        - Slowness: Wait times affect the overall experience, particularly for transfers and updates.
    - *Opportunities for Improvement:*
        - Technical Optimization: Enhance stability and speed of critical functionalities.
        - Digitalization: Expand the offering of fully digital services to reduce reliance on physical interactions.
        - Communication: Highlight positive aspects in user communication and proactively address common complaints.

### 7 - General Conclusion

The thematic analysis of user reviews across major banking apps (Santander, BBVA, Banco Azteca, CitiBanamex, HSBC, and Banorte) reveals several converging issues and areas where improvements can enhance user experience. It also provides insights into which app might be the best-rated and highlights shared opportunities for all competitors in the digital banking space.

- **Common Problems Across Banking Apps**
    - Technical Issues and Stability:
        - Problems with updates often lead to new errors or worsen existing functionalities.
        - Slowness in executing critical operations, such as transfers or loading account information, is a common complaint.
    - Authentication and Login Challenges:
        - Issues with biometric authentication (e.g., fingerprint and facial recognition) and token systems disrupt user access.
        - Errors in login processes, including device compatibility and synchronization problems, frustrate users.
    - Dependency on Branches:
        - Despite offering digital platforms, many users report needing to visit branches for basic operations, such as managing cards, payments, or resolving account issues.
    - Inconsistent User Experience:
        - While some users highlight positive aspects like ease of use, security, and speed, others experience frequent failures and a lack of reliability.
    - Perception of Updates:
        - App updates often result in polarized reactions: some users welcome improvements, while others perceive updates as detrimental, especially when they disrupt critical features like transfers.

- **Best-Performing App Based on Reviews**
    - From the reviews, **Banorte** appears to be the best-rated app due to:
        - Strong positive feedback on its ease of use, speed, and intuitive interface.
        - High marks for security and efficiency in its core functionalities.
        - Fewer complaints about branch dependency compared to competitors.

However, Banorte still has areas to improve, such as reducing slowness in updates and transfers and addressing authentication issues.

- **Key Considerations for All Competitors**
    - To stay competitive and enhance user satisfaction, all banking apps should focus on the following:
        - Technical Optimization:
            - Implement rigorous testing and quality assurance for app updates to prevent the introduction of bugs.
            - Improve the stability of critical features such as transfers, authentication, and account management.
        - Enhanced Digital Experience:
            - Fully digitalize key banking operations to eliminate the need for branch visits.
            - Ensure 24/7 availability of features like transfers, reducing operational restrictions.
        - User-Centric Design:
            - Simplify interfaces to make them more intuitive and efficient for users of all technical abilities.
            - Provide real-time updates on account balances, transactions, and other critical data.
        - Transparent Communication:
            - Clearly communicate the benefits of updates and provide tutorials to ease user transitions.
            - Establish proactive support channels to address technical issues and user concerns quickly.
        - Leverage Positive Feedback:
            - Identify features that users value most (e.g., security, speed, and ease of use) and highlight them in marketing efforts.
            - Use positive user experiences as benchmarks to replicate and improve across all functionalities.

**Conclusion**
<br>
While all apps share similar challenges, Banorte sets itself apart with a more consistent positive user experience, particularly in terms of ease of use and functionality. To maintain competitive edges, all banking apps must address their common pain points, prioritize digitalization, and ensure stable, reliable, and user-friendly platforms. These measures will not only improve customer satisfaction but also position these apps as leaders in the increasingly competitive digital banking market.