In [1]:
# Import the libraries

import pandas as pd

In [2]:
# paths to the CSVs

path_email_file = 'C:/Users/shree/Datacamp_Projects/Writing Functions for Product Analysis/datasets/2020Q4_nps_email.csv'
path_mobile_file = 'C:/Users/shree/Datacamp_Projects/Writing Functions for Product Analysis/datasets/2020Q4_nps_mobile.csv'
path_web_file = 'C:/Users/shree/Datacamp_Projects/Writing Functions for Product Analysis/datasets/2020Q4_nps_web.csv'
path_corrupted_file = 'C:/Users/shree/Datacamp_Projects/Writing Functions for Product Analysis/datasets/corrupted.csv'

### 1) Function to convert CSV file into a dataframe. Add a column for the source category name.

In [3]:
def convert_csv_to_df(csv_name, source_type):
    """ Converts a CSV into a DataFrame, adds a column for the source category.

    Args:
        csv_name (str): The name of the NPS CSV file.
        source_type (str): The source category of the NPS responses.

    Returns:
        A DataFrame with the CSV data and an added column, source.
    """ 
    df = pd.read_csv(csv_name)
    df['source'] = source_type
    return df

convert_csv_to_df(path_email_file, "email")

Unnamed: 0,response_date,user_id,nps_rating,source
0,2020-11-06,11037,7,email
1,2020-12-24,34434,9,email
2,2020-12-03,49547,8,email
3,2020-10-04,13821,7,email
4,2020-10-23,29407,9,email
...,...,...,...,...
1947,2020-12-11,27706,9,email
1948,2020-10-20,35876,7,email
1949,2020-12-04,39929,9,email
1950,2020-11-23,46900,6,email


### 2) Function that verifies that the files inputted are valid (i.e., in the required format).

In [4]:
def check_csv(csv_name):
    """ Checks if a CSV has the three columns: response_date, user_id, nps_rating

    Args:
        csv_name (str): The name of the CSV file.

    Returns:
        Boolean: True if the CSV is valid, False otherwise.
    """
    with open(csv_name) as file:
        
        first_line = file.readline()
        
        if first_line == "response_date,user_id,nps_rating\n":
            result = True   
        else:
            result = False
            
        return result

check_csv(path_corrupted_file)

False

### 3) Function to concatenate the dataframes into one.

In [12]:
def combine_nps_csvs(csvs_dict):
    """ Checks if a CSV has the three columns: response_date, user_id, nps_rating

    Args:
        csvs_dict (dict): The dict of the CSV file names and source types.

    Returns:
         A combined DataFrame with the data
    """
    combined = pd.DataFrame()

    for key, value in csvs_dict.items():

        if check_csv(key):
            temp = convert_csv_to_df(key, value)
            combined = pd.concat([combined,temp])
        else:
            file_name = key.rsplit('/', 1)[-1]
            print(file_name + " is not a valid file and will not be added.")
            
    return combined

my_files = {
          path_email_file: "email",
          path_mobile_file: "mobile",
          path_web_file: "web",
          path_corrupted_file: "social_media"
}


combine_nps_csvs(my_files)

corrupted.csv is not a valid file and will not be added.


Unnamed: 0,response_date,user_id,nps_rating,source,nps_group
0,2020-11-06,11037,7,email,passive
1,2020-12-24,34434,9,email,promoter
2,2020-12-03,49547,8,email,passive
3,2020-10-04,13821,7,email,passive
4,2020-10-23,29407,9,email,promoter
...,...,...,...,...,...
2285,2020-12-25,10656,8,web,passive
2286,2020-11-07,32918,10,web,promoter
2287,2020-10-16,15667,10,web,promoter
2288,2020-11-20,47153,7,web,passive


### 4) Function to categorize the NPS rating into appropriate group.

In [13]:
def categorize_nps(rating):
    """ Takes a NPS rating and outputs whether it is a "promoter", "passive", "detractor", or "invalid" rating. \
        "invalid" is returned when the rating is not between 0-10.

    Args:
        rating: The NPS rating

    Returns:
        String: The NPS category or "invalid".
    """

    if rating>=0 and rating <=6:
        result = 'detractor'
    elif rating>6 and rating<9:
        result = 'passive'
    elif rating>8 and rating<11:
        result = 'promoter'
    else:
        result = 'invalid'
        
    return result


categorize_nps(8)

'passive'

### 5) Function that adds a column for the NPS group and populate it depending on the rating.

In [14]:
def convert_csv_to_df(csv_name, source_type):    
    """ Populates newly added column nps_group.

    Args:
        csv_name (str): The name of the NPS CSV file.
        source_type (str): The source category of the NPS responses.

    Returns:
         A DataFrame with the data.
    """
    df = pd.read_csv(csv_name)
    df['source'] = source_type
    df['nps_group'] = df['nps_rating'].apply(categorize_nps)
    
    return df

convert_csv_to_df(path_mobile_file, "mobile")

Unnamed: 0,response_date,user_id,nps_rating,source,nps_group
0,2020-12-29,14178,3,mobile,detractor
1,2020-10-29,33221,1,mobile,detractor
2,2020-11-01,21127,10,mobile,promoter
3,2020-12-07,42894,3,mobile,detractor
4,2020-11-26,30501,5,mobile,detractor
...,...,...,...,...,...
1796,2020-12-29,49529,3,mobile,detractor
1797,2020-12-24,23671,7,mobile,passive
1798,2020-11-28,39954,7,mobile,passive
1799,2020-12-19,21098,7,mobile,passive


### 6) Function to calculate the NPS across all sources.

In [15]:
def calculate_nps(dataframe):
    """ Calculates Net promoter score for all the data combined.

    Args:
        dataframe : The combined dataframe.

    Returns:
         float : The Net promoter score of the entire data
    """
    detractor_num = dataframe[dataframe['nps_group'] == 'detractor']['nps_rating'].count()
    promoter_num = dataframe[dataframe['nps_group'] == 'promoter']['nps_rating'].count()
    total_num = dataframe['nps_rating'].count()

    result = (promoter_num - detractor_num)/total_num * 100
    
    return result

my_files = {
          path_email_file: "email",
          path_mobile_file: "mobile",
          path_web_file: "web"
}

q4_nps = combine_nps_csvs(my_files)
calculate_nps(q4_nps)

9.995035578355122

### 7) Function to output the NPS for every source 

In [16]:
def calculate_nps_by_source(dataframe):
    """ Calculates Net promoter score for each source by grouping.

    Args:
        dataframe : The combined dataframe.

    Returns:
         series : series with each source category and its respective net promoter score
    """
    series = dataframe.groupby('source').apply(calculate_nps)
    return series

my_files = {
          path_email_file: "email",
          path_mobile_file: "mobile",
          path_web_file: "web"
}


q4_nps = combine_nps_csvs(my_files)
calculate_nps_by_source(q4_nps)

source
email     18.596311
mobile   -14.714048
web       22.096070
dtype: float64

## Conclusion:<br>
    
From amongst the source categories – email, mobile, and web, the mobile responses have an NPS score of about -15, 
which is noticeably lower than the other two sources. <br>
Hence it is a data-driven inference that people are rating lower on the mobile than other sources. 
So, this is some evidence that the mobile app UI experience needs an improvement. 