In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, accuracy_score, precision_score
import functools
import time

In [2]:
#runtime 
def runtime_monitor(input_function):
    @functools.wraps(input_function)
    def runtime_wrapper(*args, **kwargs):
        start_value = time.perf_counter()  
        return_value = input_function(*args, **kwargs)
        end_value = time.perf_counter() 
        runtime_value = end_value - start_value  
        print(f"Finished executing {input_function.__name__} in {runtime_value} seconds")
        return return_value
    return runtime_wrapper

In [3]:
#debugger
def debugging_method(input_function):
    @functools.wraps(input_function)
    def debugging_wrapper(*args, **kwargs):
        arguments = []
        keyword_arguments = []
        for a in args:
           arguments.append(repr(a))     
        for key, value in kwargs.items():
           keyword_arguments.append(f"{key}={value}")
        function_signature = arguments + keyword_arguments 
        function_signature = "; ".join(function_signature)       
        print(f"{input_function.__name__} has the following signature: {function_signature}")
        return_value = input_function(*args, **kwargs)
        print(f"{input_function.__name__} has the following return: {return_value}")  
        return return_value
    return debugging_wrapper

In [4]:
@debugging_method
@runtime_monitor
def data_preparation(columns, test_size, datatype_dict):
   df = pd.read_csv("telco_churn.csv")
   df_subset = df[columns].copy()
   
   for col in columns:
      df_subset[col] = df_subset[col].astype(datatype_dict[col])

   for col in columns:
    if datatype_dict[col] == "category":
      df_subset[col] = df_subset[col].cat.codes
   X = df_subset[["gender", "tenure", "PhoneService", "MultipleLines","MonthlyCharges",]]
   y = df_subset["Churn"]
   X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
   return X_train, X_test, y_train, y_test

columns = ["gender", "tenure", "PhoneService", "MultipleLines","MonthlyCharges", "Churn"]
datatype_dict = {"gender":"category", "tenure":"float", "PhoneService":"category", "MultipleLines":"category", "MonthlyCharges":"float", "Churn":"category"}
X_train, X_test, y_train, y_test = data_preparation(columns, 0.33, datatype_dict)

data_preparation has the following signature: ['gender', 'tenure', 'PhoneService', 'MultipleLines', 'MonthlyCharges', 'Churn']; 0.33; {'gender': 'category', 'tenure': 'float', 'PhoneService': 'category', 'MultipleLines': 'category', 'MonthlyCharges': 'float', 'Churn': 'category'}
Finished executing data_preparation in 0.04060269000183325 seconds
data_preparation has the following return: (      gender  tenure  PhoneService  MultipleLines  MonthlyCharges
298        1    40.0             1              2           74.55
3318       1    10.0             0              1           29.50
5586       0    27.0             1              0           19.15
6654       0     7.0             1              2           86.50
5362       1    65.0             1              2           24.75
...      ...     ...           ...            ...             ...
3772       1     1.0             1              0           95.00
5191       0    23.0             1              2           91.10
5226       1  

In [5]:
@debugging_method
@runtime_monitor
def fit_model(X_train,y_train):
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train,y_train)
    return model

model = fit_model(X_train,y_train)

fit_model has the following signature:       gender  tenure  PhoneService  MultipleLines  MonthlyCharges
298        1    40.0             1              2           74.55
3318       1    10.0             0              1           29.50
5586       0    27.0             1              0           19.15
6654       0     7.0             1              2           86.50
5362       1    65.0             1              2           24.75
...      ...     ...           ...            ...             ...
3772       1     1.0             1              0           95.00
5191       0    23.0             1              2           91.10
5226       1    12.0             1              0           21.15
5390       1    12.0             1              2           99.45
860        1    26.0             1              0           19.80

[4718 rows x 5 columns]; 298     0
3318    1
5586    0
6654    1
5362    0
       ..
3772    1
5191    0
5226    0
5390    1
860     0
Name: Churn, Length: 4718, dtype:

In [6]:
@debugging_method
@runtime_monitor
def predict(X_test, model):
    y_pred = model.predict(X_test)
    return y_pred 

y_pred = predict(X_test, model)

predict has the following signature:       gender  tenure  PhoneService  MultipleLines  MonthlyCharges
185        0     1.0             0              1           24.80
2715       1    41.0             1              2           25.25
3825       0    52.0             1              0           19.35
1807       0     1.0             1              0           76.35
132        1    67.0             1              0           50.55
...      ...     ...           ...            ...             ...
4147       1    71.0             1              2           24.85
3542       1    29.0             0              1           55.35
3759       1     7.0             1              2           89.35
1114       1    32.0             1              2           98.85
4958       0    59.0             1              2           94.75

[2325 rows x 5 columns]; RandomForestClassifier(random_state=42)
Finished executing predict in 0.05748910900001647 seconds
predict has the following return: [1 0 0 ... 1 

In [7]:
@debugging_method
@runtime_monitor
def model_performance(y_pred, y_test):
    print("f1_score", f1_score(y_test, y_pred))
    print("accuracy_score", accuracy_score(y_test, y_pred))
    print("precision_score", precision_score(y_test, y_pred))
    
model_performance(y_pred, y_test)

model_performance has the following signature: array([1, 0, 0, ..., 1, 1, 0], dtype=int8); 185     1
2715    0
3825    0
1807    1
132     0
       ..
4147    0
3542    0
3759    1
1114    0
4958    0
Name: Churn, Length: 2325, dtype: int8
f1_score 0.5083848190644307
accuracy_score 0.7604301075268817
precision_score 0.5702970297029702
Finished executing model_performance in 0.0064978350019373465 seconds
model_performance has the following return: None


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=7a27001d-66b4-4830-aed7-1edc28bc9d88' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>