In [None]:
import pandas as pd
import os
from textblob import TextBlob
from transformers import pipeline
from tqdm import tqdm

# 1. Initialize Sentiment Analyzers
def setup_analyzers():
    """Initialize TextBlob and DistilBERT analyzers"""
    try:
        print("Initializing sentiment analyzers...")
        # TextBlob doesn't need initialization
        distilbert = pipeline(
            "sentiment-analysis",
            model="distilbert-base-uncased-finetuned-sst-2-english"
        )
        return distilbert
    except Exception as e:
        print(f"Error initializing analyzers: {str(e)}")
        return None

In [3]:
# 2. Sentiment Analysis Functions
def analyze_with_textblob(text):
    """Get TextBlob sentiment scores"""
    analysis = TextBlob(text)
    return {
        'textblob_polarity': analysis.sentiment.polarity,
        'textblob_subjectivity': analysis.sentiment.subjectivity,
        'textblob_sentiment': 'positive' if analysis.sentiment.polarity > 0 
                             else 'negative' if analysis.sentiment.polarity < 0 
                             else 'neutral'
    }

def analyze_with_distilbert(text, analyzer):
    """Get DistilBERT sentiment scores"""
    try:
        result = analyzer(text[:512])[0]  # Truncate to model limit
        return {
            'distilbert_score': result['score'],
            'distilbert_sentiment': result['label'].lower()
        }
    except:
        return {
            'distilbert_score': 0.5,
            'distilbert_sentiment': 'neutral'
        }

In [4]:
# 3. Main Analysis Function
def analyze_reviews(input_file='cleaned_reviews.csv'):
    """Run complete sentiment analysis pipeline"""
    # File validation
    if not os.path.exists(input_file):
        print(f"Error: File {os.path.abspath(input_file)} not found")
        return None
    
    try:
        # Load data
        df = pd.read_csv(input_file)
        print(f"✅ Loaded {len(df)} reviews from {input_file}")
        
        # Initialize analyzer
        distilbert = setup_analyzers()
        if distilbert is None:
            return None
        
        # Analyze each review
        results = []
        print("Analyzing sentiments...")
        for _, row in tqdm(df.iterrows(), total=len(df)):
            text = str(row.get('clean_review', row.get('review', '')))
            if not text.strip():
                continue
                
            # Get TextBlob results
            textblob_results = analyze_with_textblob(text)
            
            # Get DistilBERT results
            distilbert_results = analyze_with_distilbert(text, distilbert)
            
            # Combine results
            results.append({
                **row.to_dict(),
                **textblob_results,
                **distilbert_results
            })
        
        # Create analyzed DataFrame
        analyzed_df = pd.DataFrame(results)
        
        # Save results
        output_file = 'analyzed_reviews.csv'
        analyzed_df.to_csv(output_file, index=False)
        print(f"✅ Saved {len(analyzed_df)} analyzed reviews to {output_file}")
        return analyzed_df
    
    except Exception as e:
        print(f"❌ Analysis failed: {str(e)}")
        return None

In [6]:
# 4. Aggregation Function
def aggregate_sentiments(analyzed_df):
    """Aggregate by bank and rating"""
    if analyzed_df is None or len(analyzed_df) == 0:
        return None
    
    print("\nAggregating results by bank and rating...")
    
    # Define aggregations
    aggregations = {
        'textblob_polarity': ['mean', 'count'],
        'distilbert_score': ['mean'],
        'textblob_sentiment': lambda x: x.value_counts().to_dict(),
        'distilbert_sentiment': lambda x: x.value_counts().to_dict()
    }
    
    # Filter for only existing columns
    available_cols = [col for col in aggregations.keys() if col in analyzed_df.columns]
    aggregations = {col: aggregations[col] for col in available_cols}
    
    if not aggregations:
        print("No sentiment metrics available for aggregation")
        return None
    
    # Group and aggregate
    grouped = analyzed_df.groupby(['bank', 'rating']).agg(aggregations)
    
    # Flatten multi-index columns
    grouped.columns = ['_'.join(col).strip() for col in grouped.columns.values]
    
    # Save and return
    grouped.to_csv('aggregated_sentiments.csv')
    print("✅ Saved aggregated results to aggregated_sentiments.csv")
    return grouped


In [9]:
# 5. Run Complete Analysis
if __name__ == "__main__":
    # First install TextBlob if needed
    try:
        from textblob import TextBlob
    except ImportError:
        print("Installing TextBlob...")
        os.system("pip install textblob")
        from textblob import TextBlob
    
    # Run analysis
    analyzed_data = analyze_reviews()
    
    # Aggregate results
    if analyzed_data is not None:
        aggregated = aggregate_sentiments(analyzed_data)
        if aggregated is not None:
            print("\nSample Aggregated Results:")
            print(aggregated.head(15))

✅ Loaded 987 reviews from cleaned_reviews.csv
Initializing sentiment analyzers...


Device set to use cpu


Analyzing sentiments...


100%|███████████████████████████████| 987/987 [00:47<00:00, 20.89it/s]


✅ Saved 987 analyzed reviews to analyzed_reviews.csv

Aggregating results by bank and rating...
✅ Saved aggregated results to aggregated_sentiments.csv

Sample Aggregated Results:
               textblob_polarity_mean  textblob_polarity_count  \
bank   rating                                                    
BOA    1                    -0.092426                      162   
       2                     0.078333                       10   
       3                     0.079761                       26   
       4                     0.194597                       13   
       5                     0.301560                      116   
CBE    1                    -0.036426                       46   
       2                     0.087803                       14   
       3                     0.107333                       18   
       4                     0.362444                       37   
       5                     0.397172                      206   
Dashen 1                    

In [17]:
import pandas as pd
import re
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Download required NLTK data
nltk.download(['punkt', 'stopwords', 'wordnet'])

# Initialize NLTK
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

# Define themes
themes = {
    'Account Access': ['login', 'password', 'account'],
    'Transactions': ['transfer', 'payment', 'transaction'],
    'App Issues': ['crash', 'error', 'bug', 'slow'],
    'UI/UX': ['interface', 'design', 'layout']
}

def clean_text(text):
    text = str(text).lower()
    return re.sub(r'[^\w\s]', '', text)

def extract_keywords(text):
    tokens = word_tokenize(clean_text(text))
    return [lemmatizer.lemmatize(t) for t in tokens if t not in stop_words and len(t) > 2]

def assign_themes(text):
    keywords = extract_keywords(text)
    found = []
    for theme, terms in themes.items():
        if any(term in keywords for term in terms):
            found.append(theme)
    return ', '.join(found) if found else 'Other'

# Load and process data
df = pd.read_csv('cleaned_reviews.csv')
df['themes'] = df['review'].apply(assign_themes)
df.to_csv('thematic_analysis_results.csv', index=False)

print("Analysis complete. Sample results:")
print(df[['review', 'themes']].head())

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Hp\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Hp\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Hp\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Analysis complete. Sample results:
                                              review themes
0                                           20 years  Other
1  A great app. It's like carrying a bank in your...  Other
2                      More than garrantty bank EBC.  Other
3  really am happy to this app it is Siple to use...  Other
4  I liked this app. But the User interface is ve...  UI/UX
