In [39]:
import pandas as pd

In [40]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

def process_csv(csv_file):
    
    # Read the CSV file
    topic_df = pd.read_csv(csv_file)

    # Initialize the SentimentIntensityAnalyzer
    analyzer = SentimentIntensityAnalyzer()

    # Initialize lists to store data
    review_texts = []
    positive_scores = []
    negative_scores = []
    neutral_scores = []
    compound_scores = []
    nps_indiv = []
    nps_category = []  # New column for NPS categories

    # Perform sentiment analysis and store scores in lists
    for review in topic_df['review']:
        vs = analyzer.polarity_scores(review)

        review_texts.append(review)
        positive_scores.append(vs['pos'])
        negative_scores.append(vs['neg'])
        neutral_scores.append(vs['neu'])
        compound_scores.append(vs['compound'])

        # Map compound scores to nps_indiv based on specified intervals
        if -1 <= vs['compound'] <= -9/11:
            nps_indiv.append(0)
        elif -9/11 < vs['compound'] <= -7/11:
            nps_indiv.append(1)
        elif -7/11 < vs['compound'] <= -5/11:
            nps_indiv.append(2)
        elif -5/11 < vs['compound'] <= -3/11:
            nps_indiv.append(3)
        elif -3/11 < vs['compound'] <= -1/11:
            nps_indiv.append(4)
        elif -1/11 < vs['compound'] <= 1/11:
            nps_indiv.append(5)
        elif 1/11 < vs['compound'] <= 3/11:
            nps_indiv.append(6)
        elif 3/11 < vs['compound'] <= 5/11:
            nps_indiv.append(7)
        elif 5/11 < vs['compound'] <= 7/11:
            nps_indiv.append(8)
        elif 7/11 < vs['compound'] <= 9/11:
            nps_indiv.append(9)
        else:
            nps_indiv.append(10)

        # Map nps_indiv scores to NPS categories
        if nps_indiv[-1] >= 9:  # Promoters
            nps_category.append('Promoter')
        elif nps_indiv[-1] >= 7:  # Passives
            nps_category.append('Passive')
        else:  # Detractors
            nps_category.append('Detractor')

    # Add sentiment scores and NPS categories to the DataFrame
    topic_df['positive_scores'] = positive_scores
    topic_df['negative_scores'] = negative_scores
    topic_df['neutral_scores'] = neutral_scores
    topic_df['compound_scores'] = compound_scores
    topic_df['nps_indiv'] = nps_indiv
    topic_df['nps_category'] = nps_category

    return topic_df


In [41]:
#Testing example
process_csv('App_Responsiveness.csv').head()

Unnamed: 0.1,Unnamed: 0,key,review,positive_scores,negative_scores,neutral_scores,compound_scores,nps_indiv,nps_category
0,0,Difficulties in account registration,Is GXS looking into this issue? Going to be a ...,0.094,0.076,0.83,0.1926,6,Detractor
1,1,Difficulties in account registration,"Cannot register, keep nothing pop up On sign up",0.0,0.0,1.0,0.0,5,Detractor
2,2,Difficulties in account registration,"Stuck on Oops, something went wrong page when ...",0.096,0.213,0.691,-0.8316,0,Detractor
3,3,Difficulties in account registration,Cannot seem to open the app suddenly. Did not ...,0.0,0.089,0.911,-0.6652,1,Detractor
4,4,Difficulties in account registration,I seem to be having issues with the opening of...,0.0,0.0,1.0,0.0,5,Detractor


In [42]:
import pandas as pd
import numpy as np

def topic_nps(topic_df):
    # Count the occurrences of each label
    label_counts = topic_df['nps_category'].value_counts()

    # Calculate Net Promoter Score (NPS)
    promoter_count = label_counts.get('Promoter', 0)
    detractor_count = label_counts.get('Detractor', 0)
    passive_count = label_counts.get('Passive', 0)
    total_count = promoter_count + detractor_count + passive_count

    # Calculate NPS
    if total_count == 0:
        nps = None
    else:
        nps = ((promoter_count - detractor_count) / total_count) * 100
        nps = round(nps, 2)
    
    return nps

        

In [43]:
#Testing example
topic_df=process_csv('App_Responsiveness.csv')
topic_nps(topic_df)

-11.43

In [44]:
def subtopic_nps(topic_df):
    #get subtopic
    unique_keys = topic_df['key'].unique()
    subtopics_nps_scores = {}

    for key in unique_keys:
        key_df = topic_df[topic_df['key'] == key]
        # Count the occurrences of each label
        label_counts = key_df['nps_category'].value_counts()

        # Calculate Net Promoter Score (NPS)
        promoter_count = label_counts.get('Promoter', 0)
        detractor_count = label_counts.get('Detractor', 0)
        passive_count = label_counts.get('Passive', 0)
        total_count = promoter_count + detractor_count + passive_count

        # Calculate NPS
        if total_count == 0:
            subtopics_nps_scores[key] = None
        else:
            nps = ((promoter_count - detractor_count) / total_count) * 100
            subtopics_nps_scores[key] = round(nps, 2)
        subtopicsNPS = pd.DataFrame(list(subtopics_nps_scores.items()), columns=['subtopic', 'NPS'])

    return subtopicsNPS

In [45]:
#Testing example
subtopic_nps(topic_df)

Unnamed: 0,subtopic,NPS
0,Difficulties in account registration,-85.71
1,Issues with loan application and approval process,-25.0
2,Slow transfer speed and connectivity problems,0.0
3,Lack of certain features such as bank transfer,0.0
4,Poor customer service,-100.0
5,User interface and design issues,-100.0
6,Difficulties in navigating the app,100.0
7,Bugs and glitches in the app,-100.0
8,Lack of clear instructions or error messages,0.0
9,Others,35.71
