In [50]:
import pandas as pd

In [51]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

def process_csv(csv_file):
    
    # Read the CSV file
    topic_df = pd.read_csv(csv_file)
    topic_df['review']= topic_df['review'].astype(str)

    # Initialize the SentimentIntensityAnalyzer
    analyzer = SentimentIntensityAnalyzer()

    # Initialize lists to store data
    review_texts = []
    positive_scores = []
    negative_scores = []
    neutral_scores = []
    compound_scores = []
    nps_indiv = []
    nps_category = []  # New column for NPS categories

    # Perform sentiment analysis and store scores in lists
    for review in topic_df['review']:
        vs = analyzer.polarity_scores(review)

        review_texts.append(review)
        positive_scores.append(vs['pos'])
        negative_scores.append(vs['neg'])
        neutral_scores.append(vs['neu'])
        compound_scores.append(vs['compound'])

        # Map compound scores to nps_indiv based on specified intervals
        if -1 <= vs['compound'] <= -9/11:
            nps_indiv.append(0)
        elif -9/11 < vs['compound'] <= -7/11:
            nps_indiv.append(1)
        elif -7/11 < vs['compound'] <= -5/11:
            nps_indiv.append(2)
        elif -5/11 < vs['compound'] <= -3/11:
            nps_indiv.append(3)
        elif -3/11 < vs['compound'] <= -1/11:
            nps_indiv.append(4)
        elif -1/11 < vs['compound'] <= 1/11:
            nps_indiv.append(5)
        elif 1/11 < vs['compound'] <= 3/11:
            nps_indiv.append(6)
        elif 3/11 < vs['compound'] <= 5/11:
            nps_indiv.append(7)
        elif 5/11 < vs['compound'] <= 7/11:
            nps_indiv.append(8)
        elif 7/11 < vs['compound'] <= 9/11:
            nps_indiv.append(9)
        else:
            nps_indiv.append(10)

        # Map nps_indiv scores to NPS categories
        if nps_indiv[-1] >= 9:  # Promoters
            nps_category.append('Promoter')
        elif nps_indiv[-1] >= 7:  # Passives
            nps_category.append('Passive')
        else:  # Detractors
            nps_category.append('Detractor')

    # Add sentiment scores and NPS categories to the DataFrame
    topic_df['positive_scores'] = positive_scores
    topic_df['negative_scores'] = negative_scores
    topic_df['neutral_scores'] = neutral_scores
    topic_df['compound_scores'] = compound_scores
    topic_df['nps_indiv'] = nps_indiv
    topic_df['nps_category'] = nps_category

    return topic_df


In [52]:
#Testing example
process_csv("../Data/Topics/App Responsiveness.csv").head()

Unnamed: 0.1,Unnamed: 0,key,review,positive_scores,negative_scores,neutral_scores,compound_scores,nps_indiv,nps_category
0,0,User interface issues,I have to say that the UIUX is one of the best...,0.134,0.07,0.796,0.6908,9,Promoter
1,1,User interface issues,User friendly,0.762,0.0,0.238,0.4939,8,Passive
2,2,User interface issues,Great UI and userfriendly,0.577,0.0,0.423,0.6249,8,Passive
3,3,User interface issues,Friendly user,0.762,0.0,0.238,0.4939,8,Passive
4,4,Account sign up issues,Have been waiting for a slot for the account s...,0.225,0.032,0.744,0.9671,10,Promoter


In [53]:
import pandas as pd
import numpy as np

def topic_nps(topic_df):
    # Count the occurrences of each label
    label_counts = topic_df['nps_category'].value_counts()

    # Calculate Net Promoter Score (NPS)
    promoter_count = label_counts.get('Promoter', 0)
    detractor_count = label_counts.get('Detractor', 0)
    passive_count = label_counts.get('Passive', 0)
    total_count = promoter_count + detractor_count + passive_count

    # Calculate NPS
    if total_count == 0:
        nps = None
    else:
        nps = ((promoter_count - detractor_count) / total_count) * 100
        nps = round(nps, 2)
    
    return nps

        

In [54]:
#Testing example
topic_df=process_csv("../Data/Topics/App Responsiveness.csv")
topic_nps(topic_df)

-17.5

In [55]:
def subtopic_nps(topic_df):
    #get subtopic
    unique_keys = topic_df['key'].unique()
    subtopics_nps_scores = {}

    for key in unique_keys:
        key_df = topic_df[topic_df['key'] == key]
        # Count the occurrences of each label
        label_counts = key_df['nps_category'].value_counts()

        # Calculate Net Promoter Score (NPS)
        promoter_count = label_counts.get('Promoter', 0)
        detractor_count = label_counts.get('Detractor', 0)
        passive_count = label_counts.get('Passive', 0)
        total_count = promoter_count + detractor_count + passive_count

        # Calculate NPS
        if total_count == 0:
            subtopics_nps_scores[key] = None
        else:
            nps = ((promoter_count - detractor_count) / total_count) * 100
            subtopics_nps_scores[key] = round(nps, 2)
        subtopicsNPS = pd.DataFrame(list(subtopics_nps_scores.items()), columns=['subtopic', 'NPS'])

    return subtopicsNPS

In [56]:
#Testing example
subtopic_nps(topic_df)

Unnamed: 0,subtopic,NPS
0,User interface issues,25.0
1,Account sign up issues,-57.14
2,Login issues,-100.0
3,Fingerprint login issues,-100.0
4,Application processing issues,-100.0
5,Interest rate issues,100.0
6,Loan approval issues,100.0
7,Transfer issues,40.0
8,App performance issues,-80.0
9,Customer support issues,0.0
