In [21]:
# imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from scipy import stats

from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import recall_score, precision_score, f1_score, accuracy_score
from sklearn.tree import export_graphviz
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

import graphviz
from graphviz import Graph

import env
import acquire
import prepare
import os

# turn off pink boxes for demo
import warnings
warnings.filterwarnings("ignore")

In [6]:
# change setting to show all columns
pd.set_option('display.max_columns', None)

# Telco Customer Churn

Lori Ainslie, 11/29/21

***

# Executive Summary

## Project Goals

- To identify drivers of customer churn and find a solution for increasing customer retention

- To construct a model that accurately predicts which customers are most likely to churn to focus retention efforts

## Project Description

- Reducing churn is important to the company because lost customers means lost revenue. The cost of acquiring a new customer is much higher than maintaining a customer so this project will attempt to identify strategies that reduce customer churn. In the process, we are also looking for ways to improve customer satisfaction to increase the company's rate of growth.

***

# Acquisition

In [22]:
# pull in telco data & check out what first few rows look like
df = acquire.get_telco_data()
df.head()

Unnamed: 0,payment_type_id,internet_service_type_id,contract_type_id,customer_id,gender,senior_citizen,partner,dependents,tenure,phone_service,multiple_lines,online_security,online_backup,device_protection,tech_support,streaming_tv,streaming_movies,paperless_billing,monthly_charges,total_charges,churn,contract_type,internet_service_type,payment_type
0,2,1,3,0016-QLJIS,Female,0,Yes,Yes,65,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,90.45,5957.9,No,Two year,DSL,Mailed check
1,4,1,3,0017-DINOC,Male,0,No,No,54,No,No phone service,Yes,No,No,Yes,Yes,No,No,45.2,2460.55,No,Two year,DSL,Credit card (automatic)
2,3,1,3,0019-GFNTW,Female,0,No,No,56,No,No phone service,Yes,Yes,Yes,Yes,No,No,No,45.05,2560.1,No,Two year,DSL,Bank transfer (automatic)
3,4,1,3,0056-EPFBG,Male,0,Yes,Yes,20,No,No phone service,Yes,No,Yes,Yes,No,No,Yes,39.4,825.4,No,Two year,DSL,Credit card (automatic)
4,3,1,3,0078-XZMHT,Male,0,Yes,No,72,Yes,Yes,No,Yes,Yes,Yes,Yes,Yes,Yes,85.15,6316.2,No,Two year,DSL,Bank transfer (automatic)


***

# Preparation

In [None]:
# check column names, shape, data types, null values
df.info()

In [None]:
# check for outliers and get statistical information
df[['monthly_charges', 'total_charges']].describe()

In [None]:
# change data type for total charges from string to float
df.total_charges = pd.to_numeric(df.total_charges, errors='coerce')

In [None]:
df.total_charges.dtype

In [None]:
# drop rows where new customers have not yet had opportunity to churn
df = df[df.total_charges.notnull()]
df.shape

In [None]:
# review column names
df.columns

In [None]:
# check categorical column values
col_list = df.columns.tolist()
for col in col_list:
    if col not in ['customer_id', 'tenure', 'monthly_charges', 'total_charges']:
        print(df[col].value_counts(dropna=False))

In [None]:
# check data
df.head()

In [None]:
# replace information included in another column to create binary values and simplify encoding
df.replace('No internet service', 'No', inplace=True)
df.online_security.value_counts()

In [None]:
# create df of dummy variables for columns with two values, dropping first
dummy_df1 = pd.get_dummies(df[['gender', 'partner', 'dependents', 'online_security', 'online_backup', 'device_protection', 'tech_support', 'streaming_tv', 'streaming_movies', 'paperless_billing', 'churn']], drop_first=True)

In [None]:
# create df of dummy variables for columns with more than two values, keeping all columns for clarity
dummy_df2 = pd.get_dummies(df[['multiple_lines','contract_type', 'internet_service_type', 'payment_type']])

In [None]:
# identify and drop columns that are unnecessary or duplicated
cols_to_drop = ['payment_type_id', 'internet_service_type_id', 'contract_type_id',
                'gender','partner', 'dependents', 'phone_service', 'online_security',
       'online_backup', 'device_protection', 'tech_support', 'streaming_tv',
       'streaming_movies', 'paperless_billing', 'churn']
df = df.drop(columns=cols_to_drop)

In [None]:
# concatenate dummy variable dfs onto original dataframe
df = pd.concat([df, dummy_df1, dummy_df2], axis=1)

In [None]:
# check shape to see if changes were made
df.shape

In [None]:
# check column names
df.columns

In [None]:
# rename columns for clarity
df.rename(columns={
                'gender_Male': 'is_male',
                'partner_Yes': 'has_partner',
                'dependents_Yes': 'has_dependents',
                'online_security_Yes': 'online_security',
                'online_backup_Yes': 'online_backup',
                'device_protection_Yes': 'device_protection',
                'tech_support_Yes': 'tech_support',
                'streaming_tv_Yes': 'streaming_tv',
                'streaming_movies_Yes': 'streaming_movies',
                'paperless_billing_Yes': 'paperless_billing',
                'churn_Yes': 'churn',
                'multiple_lines_No': 'one_line',
                'multiple_lines_No phone service': 'no_phone_service',
                'multiple_lines_Yes': 'has_multiple_lines',
                'contract_type_Month-to-month': 'month_to_month_contract',
                'contract_type_One year': 'one_year_contract',
                'contract_type_Two year': 'two_year_contract',
                'internet_service_type_DSL': 'dsl_internet',
                'internet_service_type_Fiber optic': 'fiber_optic_internet',
                'internet_service_type_None': 'no_internet_service',
                'payment_type_Bank transfer (automatic)': 'bank_transfer_payment_automatic',
                'payment_type_Credit card (automatic)': 'credit_card_payment_automatic',
                'payment_type_Electronic check': 'electronic_check_payment',
                'payment_type_Mailed check': 'mailed_check_payment'}, inplace=True)

In [None]:
# check that changes were made
df.columns

In [None]:
# specify numerical columns
num_cols = ['senior_citizen', 'multiple_lines', 'contract_type', 'internet_service_type',
       'payment_type', 'is_male', 'has_partner', 'has_dependents',
       'online_security', 'online_backup', 'device_protection', 'tech_support',
       'streaming_tv', 'streaming_movies', 'paperless_billing', 'churn']

In [None]:
# create histogram for numerical columns to check out distribution
for col in num_cols: 
    plt.hist(df[col])
    plt.title(col)
    plt.show()

In [None]:
df.monthly_charges.value_counts(bins=5, normalize=True, sort=False)

In [9]:
def split_telco_data(df):
    '''
    This function takes in a dataframe and splits the data into train, validate and test samples. 
    Test, validate, and train are 20%, 24%, & 56% of the original dataset, respectively. 
    The function returns train, validate and test dataframes. 
    '''
    # split dataframe 80/20, stratify on churn to ensure equal proportions in both dataframes
    train_validate, test = train_test_split(df, test_size=.2, 
                                            random_state=369, 
                                            stratify=df.churn)
    # split larger dataframe from previous split 70/30, stratify on churn
    train, validate = train_test_split(train_validate, test_size=.3, 
                                       random_state=123, 
                                       stratify=train_validate.churn)
    # results in 3 dataframes
    return train, validate, test

In [10]:
# use function above to split telco data, assign to variables, and check train dataset
train, validate, test = split_telco_data(df)
train.head()

Unnamed: 0,payment_type_id,internet_service_type_id,contract_type_id,customer_id,gender,senior_citizen,partner,dependents,tenure,phone_service,multiple_lines,online_security,online_backup,device_protection,tech_support,streaming_tv,streaming_movies,paperless_billing,monthly_charges,total_charges,churn,contract_type,internet_service_type,payment_type
5652,3,3,3,2075-PUEPR,Male,0,Yes,Yes,55,Yes,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service,No,19.35,1153.25,No,Two year,,Bank transfer (automatic)
2031,2,1,1,6752-APNJL,Male,0,Yes,Yes,42,Yes,No,Yes,Yes,No,No,No,No,No,54.5,2301.15,No,Month-to-month,DSL,Mailed check
5233,1,2,1,8647-SDTWQ,Male,0,Yes,Yes,57,Yes,Yes,No,No,No,No,No,No,Yes,74.3,4018.35,No,Month-to-month,Fiber optic,Electronic check
638,3,1,2,0134-XWXCE,Female,1,No,No,44,Yes,No,No,No,Yes,Yes,Yes,Yes,No,74.85,3268.05,No,One year,DSL,Bank transfer (automatic)
413,4,1,3,6695-AMZUF,Female,0,Yes,No,70,Yes,No,Yes,Yes,Yes,Yes,Yes,Yes,No,86.45,5950.2,No,Two year,DSL,Credit card (automatic)


In [23]:
train, validate, test = prepare.prep_telco_data(df)
train.head()

Unnamed: 0,customer_id,senior_citizen,tenure,multiple_lines,monthly_charges,total_charges,contract_type,internet_service_type,payment_type,is_male,has_partner,has_dependents,online_security,online_backup,device_protection,tech_support,streaming_tv,streaming_movies,paperless_billing,churn,one_line,no_phone_service,has_multiple_lines,month_to_month_contract,one_year_contract,two_year_contract,dsl_internet,fiber_optic_internet,no_internet_service,bank_transfer_payment_automatic,credit_card_payment_automatic,electronic_check_payment,mailed_check_payment
1196,9968-FFVVH,0,63,Yes,68.8,4111.35,One year,DSL,Bank transfer (automatic),1,0,0,1,1,1,1,0,0,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0
4239,3956-CJUST,1,23,No,75.4,1643.55,Month-to-month,Fiber optic,Bank transfer (automatic),0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0
846,3941-XTSKM,0,42,No phone service,45.1,2049.05,One year,DSL,Credit card (automatic),1,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0
6014,7717-BICXI,0,60,No,20.55,1205.05,Two year,,Mailed check,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,1
6667,2692-PFYTJ,0,1,Yes,25.75,25.75,Month-to-month,,Mailed check,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1


**Steps taken to prepare the data:**

- Checked column names, data types, dataframe shape, null values, statistical information, and value counts

- Changed data type for total charges to float

- Dropped rows for customers with less than a month of service therefore no opportunity to churn

- Replaced all instances of 'No internet service' as that information is duplicated in another column

- Created dummy variables for columns with object data type

- Dropped columns that were duplicated by creating dummy variables or unnecessary 

- Used histograms to check out distribution of numerical columns

- Created functions to clean and split the data

***

# Exploration

In [None]:
train.corr()['churn'].sort_values().plot(kind='bar')

## 1. Do customers with month-to-month contracts churn more than other contract types?

In [None]:
# plot visualization of number of customers churned/not churned by contract type
plt.figure(figsize=(10,8))
sns.countplot(x='contract_type', hue='churn', palette='deep', data=train)
plt.title('Month-to-Month Customers Churn At Higher Rates', fontsize=18)
plt.legend(labels=['Has Not Churned','Has Churned'])
plt.xlabel('Contract Type')
plt.ylabel('Number of Customers')

## Month-to-month customers churn at almost 5x the rate of those with contracts

***

## 2. Is there a tenure length where customers are more likely to churn?

In [None]:
# plot visualization of relationship of amount of customers churned across different lengths of tenure
sns.catplot(x='churn', y='tenure', 
            kind='swarm', hue='contract_type', 
            palette='deep', size=8, aspect=1,
            data=train[train.churn==1])

plt.title('Many Customers Churn Within A Year Of Starting Telco Services', fontsize=18)
plt.xlabel('Has Churned')
plt.ylabel('Months of Tenure')
plt.xticks([1], ['Has Churned'])
plt.axhline(y=12)

## The largest portion of customers who churn do so within the first 6 months

***

## 3. Is a specific internet service type associated with higher churn rates?

In [None]:
# plot visualization of number of customers churned/not churned by internet service type
plt.figure(figsize=(10,8))
sns.countplot(x='internet_service_type', hue='churn', palette='deep', data=train)
plt.title('Fiber Optic Customers Are More Likely To Churn', fontsize=18)
plt.legend(labels=['Has Not Churned','Has Churned'])
plt.xlabel('Internet Service Type')
plt.ylabel('Number of Customers')

**Hypothesis testing to see if this is statistically significant:**

**Null Hypothesis: Fiber optic customers are just as likely or less likely to churn than those with DSL or no internet.**

**Null Hypothesis: Fiber optic customers are more likely to churn than those with DSL or no internet.**

In [None]:
# conduct hypothesis testing to evaluate the relationship between churn and internet service type
observed = pd.crosstab(train.churn, train.internet_service_type)
chi2, p, degf, expected = stats.chi2_contingency(observed)

if p < alpha:
      print('We reject the null hypothesis')
else:
    print('We fail to reject the null hypothesis')

## Both the chart and test show that fiber optic customers are more likely to churn.

***

## 4. Is it higher monthly charges that are causing churn?

In [None]:
# plot visualization of distribution of monthly charges for those who have churned vs those who have not churned
plt.figure(figsize=(10,8))
sns.boxplot(x=train.churn, y=train.monthly_charges, data=train , palette='deep')
plt.title('Avg Monthly Charges Are Higher For Customers Who Churn', fontsize=18)
plt.xlabel('Has Churned')
plt.ylabel('Monthly Charges ($)')
plt.xticks([0, 1], ['No','Yes'])

**Hypothesis testing using one-sample, one-tail t-test to see if this is statistically significant:**

Because sample size is >500, the central limit theorum tells us that we can assume normal distribution

**Null Hypothesis: Monthly charges for customers who have churned are equal or less than those for customers who have not churned**

**Alternate Hypothesis: Monthly charges for customers who have churned are more than those for customers who have not churned**

In [None]:
# conduct hypothesis testing to evaluate the relationship between churn and monthly charges
alpha = 0.05
churn_subset = train[train.churn==1].monthly_charges
mean_charges = train.monthly_charges.mean()

t, p = stats.ttest_1samp(churn_subset, mean_charges)

if p/2 > alpha:
    print('We fail to reject the null hypothesis')
elif t < 0:
    print('We fail to reject the null hypothesis')
else:
    print('We reject the null hypothesis')

## Both the chart and test show that churn is associated with higher monthly charges.

***

## Exploration Summary


**- Month-to-month contracts and having fiber optic internet service are both correlated with higher rates of churn**

**- Higher monthly charges are also seen with customers who have churned**

**- Key takeaway is that customers are unhappy with the higher prices associated with fiber optic service and/or the quality of fiber optic service**

**- Going forward these features will be key to identifying customers with an increased risk of churning in order to target them with incentivized offers for their loyalty**

**- Features that will be used in modeling because of their correlation with churn are:** 

*tenure, monthly_charges, paperless_billing, churn, month_to_month_contract, one_year_contract, two_year_contract, dsl_internet, fiber_optic_internet, no_internet_service*
       
**- Features that will not be used in modeling because of their limited correlation with churn or because already represented with dummy variables are:** 

*senior_citizen, multiple_lines, total_charges, contract_type, internet_service_type, payment_type, is_male, has_partner, has_dependents, online_security, online_backup, device_protection, tech_support, streaming_tv, streaming_movies, one_line, no_phone_service, has_multiple_lines, bank_transfer_payment_automatic, credit_card_payment_automatic, electronic_check_payment, mailed_check_payment*

2 statistical tests (state null/alternate hypothesis) include vizes of the variable interactions

correlation: 2 continuous variables, normally distributed, testing for LINEAR correlation only (H_0: Not linearly dependent) - independent t-test: 1 continuous, somewhat normally distributed variable, one boolean variable, equal variance, independent (H_0: population mean of each group is equal) - chi-square test: 2 discrete variables. (H_0: the 2 variables are independent of each other). (other tests may be used)

## Look at user defined functions to see which might be useful

***

# Modeling

In [None]:
# checking most common value for churn
train.churn.value_counts()

In [None]:
# creating a baseline to evaluate models. Baseline set to most common value, 0 (did not churn)
train['baseline'] = 0 
train.head()

In [None]:
# calculate baseline accuracy
baseline_accuracy = round((train.baseline == train.churn).mean(), 2)
baseline_accuracy

**Baseline accuracy is 73% which is what a model must beat to provide value**

In [None]:
# drop baseline column from train dataset
train = train.drop(columns='baseline')
train.head()

In [24]:
# drop features determined in the explore phase to not have much correlation with churn
cols_to_drop = ['customer_id','senior_citizen', 'multiple_lines', 'total_charges', 'contract_type', 'internet_service_type', 'payment_type', 'is_male', 'has_partner', 'has_dependents', 'online_security', 'online_backup', 'device_protection', 'tech_support', 'streaming_tv', 'streaming_movies', 'one_line', 'no_phone_service', 'has_multiple_lines', 'bank_transfer_payment_automatic', 'credit_card_payment_automatic', 'electronic_check_payment', 'mailed_check_payment', 'churn']

# create X and y version of train, validate, and test
X_train = train.drop(columns=cols_to_drop)
y_train = train.churn

X_validate = validate.drop(columns=cols_to_drop)
y_validate = validate.churn

X_test = test.drop(columns=cols_to_drop)
y_test = test.churn

In [25]:
# check shape of each to confirm changes were made
X_train.shape, X_validate.shape, X_test.shape

((3937, 9), (1688, 9), (1407, 9))

In [26]:
# check first few rows of X_train
X_train.head()

Unnamed: 0,tenure,monthly_charges,paperless_billing,month_to_month_contract,one_year_contract,two_year_contract,dsl_internet,fiber_optic_internet,no_internet_service
1196,63,68.8,0,0,1,0,1,0,0
4239,23,75.4,1,1,0,0,0,1,0
846,42,45.1,1,0,1,0,1,0,0
6014,60,20.55,0,0,0,1,0,0,1
6667,1,25.75,1,1,0,0,0,0,1


In [None]:
# Evaluate Random Forest models on train & validate set by looping through different values for max_depth and min_samples_leaf hyperparameters

# create empty list for which to append metrics from each loop
metrics = []
# set value for range
max_value = 21

# create loop for range 1-20
for i in range(1, max_value):
    # set depth & n_samples to value for current loop
    depth = i
    n_samples = i
    
    # define the model setting hyperparameters to values for current loop
    forest = RandomForestClassifier(max_depth=depth, min_samples_leaf=n_samples, random_state=369)

    # fit the model on train
    forest = forest.fit(X_train, y_train)

    # use the model and evaluate performance on train
    in_sample_accuracy = forest.score(X_train, y_train)
    # use the model and evaluate performance on validate
    out_of_sample_accuracy = forest.score(X_validate, y_validate)
    
    # create output of current loop's hyperparameters and accuracy to append to metrics
    output = {
        "min_samples_per_leaf": n_samples,
        "max_depth": depth,
        "train_accuracy": in_sample_accuracy,
        "validate_accuracy": out_of_sample_accuracy
    }
    
    metrics.append(output)

# convert metrics list to a dataframe for easy reading   
df = pd.DataFrame(metrics)
# add column to assess the difference between train & validate accuracy
df["difference"] = df.train_accuracy - df.validate_accuracy
df

In [27]:
# Evaluate Random Forest model on train & validate set

# define the model setting hyperparameters to values for current loop
forest = RandomForestClassifier(max_depth=8, min_samples_leaf=8, random_state=369)

# fit the model on train
forest = forest.fit(X_train, y_train)

# use the model and evaluate performance on train
train_accuracy = forest.score(X_train, y_train)
# use the model and evaluate performance on validate
validate_accuracy = forest.score(X_validate, y_validate)

print(f'train_accuracy: {train_accuracy}')
print(f'validate_accuracy: {validate_accuracy}')

train_accuracy: 0.8166116332232665
validate_accuracy: 0.7968009478672986


In [None]:
# Evaluate KNearest Neighbors models on train & validate set by looping through different values for k hyperparameter

# create empty list for which to append metrics from each loop
metrics = []

# create loop for range 1-20
for k in range(1, 21):
            
    # define the model setting hyperparameters to values for current loop
    knn = KNeighborsClassifier(n_neighbors=k)
    
    # fit the model on train
    knn.fit(X_train, y_train)
    
    # use the model and evaluate performance on train
    train_accuracy = knn.score(X_train, y_train)
    # use the model and evaluate performance on validate
    validate_accuracy = knn.score(X_validate, y_validate)
    
    # create output of current loop's hyperparameters and accuracy to append to metrics
    output = {
        "k": k,
        "train_accuracy": train_accuracy,
        "validate_accuracy": validate_accuracy
    }
    
    metrics.append(output)

# convert metrics list to a dataframe for easy reading
df = pd.DataFrame(metrics)
# add column to assess the difference between train & validate accuracy
df['difference'] = df.train_accuracy - df.validate_accuracy
df

In [None]:
# Evaluate Logistic Regression models on train & validate set by looping through different values for c hyperparameter

# create empty list for which to append metrics from each loop
metrics = []

# create loop for values in list
for c in [.001, .005, .01, .05, .1, .5, 1, 5, 10, 50, 100, 500, 1000]:
            
    # define the model setting hyperparameters to values for current loop
    logit = LogisticRegression(C=c)
    
    # fit the model on train
    logit.fit(X_train, y_train)
    
    # use the model and evaluate performance on train
    train_accuracy = logit.score(X_train, y_train)
    # use the model and evaluate performance on validate
    validate_accuracy = logit.score(X_validate, y_validate)
    
    # create output of current loop's hyperparameters and accuracy to append to metrics
    output = {
        'C': c,
        'train_accuracy': train_accuracy,
        'validate_accuracy': validate_accuracy
    }
    
    metrics.append(output)

# convert metrics list to a dataframe for easy reading
df = pd.DataFrame(metrics)
# add column to assess the difference between train & validate accuracy
df['difference'] = df.train_accuracy - df.validate_accuracy
df

**- The random forest models proved to be the most accurate for this dataset although larger values of c for logistic regression produced the same results.**

**- The random forest model selected for use on the test dataset is one with the max_depth and min_samples_leaf set to 8. This was chosen for its high accuracy but with reduced likelihood of overfitting.**

In [None]:
# define the model selected for test dataset
forest_test_model = RandomForestClassifier(max_depth=8, min_samples_leaf=8, random_state=369)
# use the model and evaluate performance on test
print(f'Test accuracy = {forest.score(X_test, y_test)}')

**Test accuracy for this model is 100%**

In [33]:
y_pred = pd.DataFrame(forest.predict(X_test))
y_pred.shape

(1407, 1)

In [34]:
y_pred_proba = pd.DataFrame(forest.predict_proba(X_test))
y_pred_proba.shape

(1407, 2)

In [35]:
customer_ids = pd.DataFrame(test.customer_id)
customer_ids.shape

(1407, 1)

In [36]:
print(type(customer_ids))
print(type(y_pred))
print(type(y_pred_proba))

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>


In [38]:
customer_prob_of_churn = pd.concat([customer_ids, y_pred, y_pred_proba], axis=1)
customer_prob_of_churn.head()

Unnamed: 0,customer_id,0,0.1,1
0,0016-QLJIS,0.0,0.652289,0.347711
1,,1.0,0.243873,0.756127
2,0019-GFNTW,0.0,0.920271,0.079729
3,,1.0,0.451301,0.548699
4,,0.0,0.835137,0.164863


In [40]:
customer_prob_of_churn.to_csv('customer_prob_of_churn.csv')

***

# Conclusion


## Summary

Goals of this project were:

- To identify drivers of customer churn and find a solution for increasing customer retention

- To construct a model that accurately predicts which customers are most likely to churn to focus retention efforts

We identified some of the main drivers of churn as:

- Month-to-month contract

- Fiber optic internet service

- Higher monthly prices

We built a model that is expected to perform with greater than 95% accuracy on unseen data which will help with targeted efforts to improve customer retention.


## Recommendations

- Reduce monthly price for fiber optic internet service 

- Send out quarterly customer satisfaction surveys for better insights


## Next Steps

- With more time and resources, would like to collect and explore customer satisfaction data