# **Experiment Notebook**



In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

<hr>

## A. Project


In [2]:
student_name = 'Max Chew'

In [3]:
student_id = '13552169'

In [4]:
experiment_id = '1'

<hr>

## B. Experiment Description


In [5]:
experiment_hypothesis = 'Customer experience and longevity are negatively associated with customer churn. '

In [6]:
experiment_expectations = 'The variables MonthlyCharges, TotalCharges, AccountAge, SupportTicketsPerMonth and user rating, are useful in predicting customer churn. All variables will be associated with a decreased churn likelihood. Logistic Regression should be able to provide a interpretable analysis of whether the hypthesis is true and the realtive weights of these features. '

<hr>

## C. Data Understanding


### C.0 Import Packages

In [7]:
# Pandas for data handling
import pandas as pd

# Scikit Learn for ML training
import sklearn

# Altair for plotting
import altair as alt

# numpy for math
import numpy as np

<hr>

### C.1   Load Datasets

In [8]:
# Load training set
# Do not change this code

X_train = pd.read_csv('X_train.csv')
y_train = pd.read_csv('y_train.csv')

In [9]:
# Load validation set
# Do not change this code

X_val = pd.read_csv('X_val.csv')
y_val = pd.read_csv('y_val.csv')

In [10]:
# Load testing set
# Do not change this code

X_test = pd.read_csv('X_test.csv')
y_test = pd.read_csv('y_test.csv')

<hr>

<hr>

## D. Feature Selection


In [11]:
feature_selection_executive_summary = 'The features selected demonstrated a balance of representative characteristics of the customers. These features may be related to churn.'

> Rationale: Features irrelevant to the analysis for this specific hypothesis will be dropped. 

In [12]:
# select relevant features

features_list = ['AccountAge', 'MonthlyCharges', 'TotalCharges', 'SupportTicketsPerMonth', 'SubscriptionType']

# reduce datasets to selected features

X_train = X_train[features_list]
X_val = X_val[features_list]
X_test = X_test[features_list]

# get shape of datasets
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)

(18712, 5)
(6238, 5)
(6237, 5)


> Results: Features selected. The training, validation and testing set each have 5 features and 18,712, 6,238 and 6,237 records respectively. 

<hr>

## E. Data Preparation

In [13]:
data_preparation_executive_summary = 'In order to prepare the data for quantitative analysis, one-hot encoding must be performed on categorical features. In this subset of features used for this experiment that is only the Subscription Type feature. '

> Rationale: Categorical data needs to be one-hot encoded in order to perform logistic regression.  

In [14]:
# Categorical features encoded using one-hot encoding

from sklearn.preprocessing import OneHotEncoder

# Create OneHotEncoder
Log_reg_encoder = OneHotEncoder(drop='first', sparse=False)

# save unique values from variable for verification in val and test set 
subscription_values = list(X_train['SubscriptionType'].unique())
subscription_values

['Premium', 'Standard', 'Basic']

In [15]:
# Fit and transform the 'SubscriptionType' column
encoded_cols = Log_reg_encoder.fit_transform(X_train[['SubscriptionType']])

In [16]:
# Convert the result to a df and set names 
encoded_df = pd.DataFrame(encoded_cols, columns=Log_reg_encoder.get_feature_names_out(['SubscriptionType']))

In [17]:
# Rejoin the encoded columns to the original DataFrame
X_train = X_train.drop(columns=['SubscriptionType'])
X_train = pd.concat([X_train, encoded_df], axis=1)
X_train

Unnamed: 0,AccountAge,MonthlyCharges,TotalCharges,SupportTicketsPerMonth,SubscriptionType_Premium,SubscriptionType_Standard
0,1.588845,0.016420,1.614839,-0.869823,1.0,0.0
1,-1.110144,1.500649,-0.711572,-0.171142,1.0,0.0
2,1.220801,-1.169333,0.097870,-0.520483,1.0,0.0
3,1.036779,0.843668,1.837829,0.876880,0.0,1.0
4,-0.619419,1.380670,-0.034457,-1.219164,0.0,1.0
...,...,...,...,...,...,...
18707,-0.312715,-0.078773,-0.272961,-0.171142,1.0,0.0
18708,-0.711430,0.963047,-0.303451,0.527540,1.0,0.0
18709,1.282142,-1.008796,0.293294,0.527540,1.0,0.0
18710,-1.478188,1.440237,-1.256199,1.575562,1.0,0.0


In [18]:
# remove rows from test and val sets which do not contain same values as test for categorical variable
X_val = X_val[X_val['SubscriptionType'].isin(subscription_values)]
X_test = X_test[X_test['SubscriptionType'].isin(subscription_values)]

# use one hot encoder to encode categorical variable for val 
val_encoded_cols = Log_reg_encoder.transform(X_val[['SubscriptionType']])
encoded_df = pd.DataFrame(val_encoded_cols, columns=Log_reg_encoder.get_feature_names_out(['SubscriptionType']))
X_val = X_val.drop(columns=['SubscriptionType'])
X_val = pd.concat([X_val, encoded_df], axis=1)
X_val



Unnamed: 0,AccountAge,MonthlyCharges,TotalCharges,SupportTicketsPerMonth,SubscriptionType_Premium,SubscriptionType_Standard
0,-0.251375,1.625393,0.609444,0.876880,1.0,0.0
1,0.147340,1.480921,1.122167,0.876880,1.0,0.0
2,0.760746,0.679451,1.358220,1.575562,0.0,1.0
3,-0.864781,0.383981,-0.661901,-1.219164,0.0,0.0
4,1.282142,-0.633403,0.668306,0.178199,0.0,1.0
...,...,...,...,...,...,...
6233,0.423372,0.474775,0.810556,-1.568505,0.0,1.0
6234,1.098120,0.800447,1.873835,-0.869823,1.0,0.0
6235,-1.018133,0.801074,-0.735009,0.527540,0.0,0.0
6236,0.607394,-0.293580,0.423813,-0.520483,1.0,0.0


In [19]:
# use one hot encoder to encode categorical variable for test
test_encoded_cols = Log_reg_encoder.transform(X_test[['SubscriptionType']])
encoded_df = pd.DataFrame(test_encoded_cols, columns=Log_reg_encoder.get_feature_names_out(['SubscriptionType']))
X_test = X_test.drop(columns=['SubscriptionType'])
X_test = pd.concat([X_test, encoded_df], axis=1)
X_test

Unnamed: 0,AccountAge,MonthlyCharges,TotalCharges,SupportTicketsPerMonth,SubscriptionType_Premium,SubscriptionType_Standard
0,1.650186,-0.783305,0.776564,0.527540,0.0,1.0
1,0.760746,-0.281495,0.566491,0.527540,1.0,0.0
2,0.944768,-0.027375,0.951576,0.178199,0.0,1.0
3,-0.711430,1.790799,-0.030655,1.575562,1.0,0.0
4,-0.466067,0.722307,-0.086736,-1.219164,1.0,0.0
...,...,...,...,...,...,...
6232,1.220801,0.234552,1.471415,0.876880,1.0,0.0
6233,0.975438,1.068298,1.962583,-1.219164,1.0,0.0
6234,-1.232825,1.506704,-0.890727,-0.869823,0.0,0.0
6235,0.822087,0.548539,1.320795,1.575562,0.0,0.0


> Results: Successfully one-hot encoded variables 

<hr>

## F. Feature Engineering

In [20]:
data_preparation_executive_summary_2 = 'This is not necessary, all new features and transformations were performed in pre-processing in experiment 0.'

<hr>

## G. Train Machine Learning Model

In [21]:
train_model_executive_summary = 'The purpose of this section is to train a logistic regression model on the processed data. Following this the model will be tuned using the validation set, before finally being assessed using the test set. '

### G.1 Import Algorithm

> Rationale: Importing the log regression from sklearn to model the data processed 

In [22]:
# import log regression

from sklearn.linear_model import LogisticRegression


<hr>

### G.2 Set Hyperparameters

> Rationale: values for c and penalty which are to be tested 

In [23]:
# import metrics for performance
from sklearn.metrics import roc_auc_score, recall_score, f1_score, confusion_matrix, classification_report


# Set Hyperparameters
best_params = None
best_score = 0

c_value = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
penalty_type = ['l1', 'l2']

<hr>

### G.3 Fit Model

In [24]:
y_train = np.ravel(y_train)
y_val = np.ravel(y_val)
y_test = np.ravel(y_test)

for current_c in c_value:
    for current_penalty in penalty_type:
        try:
            model = LogisticRegression(class_weight='balanced', C = current_c, penalty=current_penalty, solver='liblinear', max_iter=10000)
            model.fit(X_train, y_train)
            
            # Predict on validation set
            y_pred_val = model.predict(X_val)
            
            # Calculate F1 score
            score = f1_score(y_val, y_pred_val, average='weighted')
            
            # Update best parameters if current score is better
            if score > best_score:
                best_score = score
                best_params = {'C': current_c, 'penalty': current_penalty}
        except Exception as e:
            print(f"Skipping C={current_c}, penalty={current_penalty} due to error: {e}")


print(f"Best Parameters: {best_params}")
print(f"Best F1 Score on Validation Set: {best_score}")

Best Parameters: {'C': 0.001, 'penalty': 'l2'}
Best F1 Score on Validation Set: 0.6606697801757698


<hr>

### G.4 Model Technical Performance

In [25]:
# apply best params from hyperparameter tuning on testing set 

model_test = LogisticRegression(C=0.1,penalty='l2', solver='liblinear', max_iter=10000)
model_test.fit(X_train, y_train)
y_test_pred = model_test.predict(X_test)

# evaluate model performance 

print(roc_auc_score(y_test, y_test_pred))

print(recall_score(y_test, y_test_pred))

print(f1_score(y_test, y_test_pred, average='weighted'))
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred))

0.5007530330779368
0.0017035775127768314
0.728119921994071
[[5062    1]
 [1172    2]]
              precision    recall  f1-score   support

           0       0.81      1.00      0.90      5063
           1       0.67      0.00      0.00      1174

    accuracy                           0.81      6237
   macro avg       0.74      0.50      0.45      6237
weighted avg       0.78      0.81      0.73      6237



> Results: Model run successfully. The results are an improvement over the baseline model by a very small margin. It appears that the features selected for this model do not have strong predictive power in assessing the likelihood of churn in the test set. 

<hr>

### G.5 Business Impact from Current Model Performance

> Results: There is insufficient evidence to accept that these features have an influence on predicting churn. 

<hr>

## H. Experiment Outcomes

In [26]:
final_experiment_outcome = 'Hypothesis Rejected'

> Key Learnings: There is insufficient evidence to overturn the null hypothesis. As as result there is no clear correlation or predictive ability when using logistic regression to predict churn in customers. 

> Recommendations for Next Experiment: The chosen features appear to only slightly improve from the baseline model. Future experiments should focus on different features and model selection in order to attempt to further improve performance.  

<hr>

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=8c3586fc-cd83-4e7b-a04a-11476af0d44a' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>