In [9]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [4]:
data = pd.read_csv('/kaggle/input/telecom-churn/telecom_churn.csv')
data

Unnamed: 0,Churn,AccountWeeks,ContractRenewal,DataPlan,DataUsage,CustServCalls,DayMins,DayCalls,MonthlyCharge,OverageFee,RoamMins
0,0,128,1,1,2.70,1,265.1,110,89.0,9.87,10.0
1,0,107,1,1,3.70,1,161.6,123,82.0,9.78,13.7
2,0,137,1,0,0.00,0,243.4,114,52.0,6.06,12.2
3,0,84,0,0,0.00,2,299.4,71,57.0,3.10,6.6
4,0,75,0,0,0.00,3,166.7,113,41.0,7.42,10.1
...,...,...,...,...,...,...,...,...,...,...,...
3328,0,192,1,1,2.67,2,156.2,77,71.7,10.78,9.9
3329,0,68,1,0,0.34,3,231.1,57,56.4,7.67,9.6
3330,0,28,1,0,0.00,2,180.8,109,56.0,14.44,14.1
3331,0,184,0,0,0.00,2,213.8,105,50.0,7.98,5.0


In [6]:
df = data.copy()

In [10]:
X = df.drop(columns=['Churn'])
y = df['Churn']

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

___
# Build a model selection module

In [13]:
# Classification models

from lightgbm import LGBMClassifier
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression

```python
>> selecion_pipe = GMSModule(mode='binary_classification',
metrics=['f1-score', 'accuracy', 'precision'],
include=[LinearRegression(),
RidgeRegression()...]),
data=[X_train, X_test, y_train, y_test],
verbose=True)

>> selection_pipe.run()
>> selection_pipe.name() # LGBMCLassifier
>> selection_pipe.describe() # Linear Regression - 0.94...
>> selection_pipe.evaluation() # F1-score: 0.94
                               # Accuracy: 0.96...
```

In [26]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

In [126]:
## Import necessary libraries

import math
# Scorings for 'classification'
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
# Scorings for 'regression'
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error, r2_score
                            


## Class
class GMSModule:
    ## Initiate variables for work
    def __init__(self, mode: str, include: list, data: list, metrics: list = ['accuracy']):
        self.mode = mode
        self.metrics = metrics
        self.include = include
        self.X_train, self.X_test, self.y_train, self.y_test = data

    ## Get all models included into evaluation
    def select_models(self):
        return [i for i in self.include] 

    ## Evaluate models
    def evaluate_models(self):
        selected_models = self.select_models()
        results = []

        for model in selected_models:
            model = model.fit(self.X_train, self.y_train)
            y_pred = model.predict(self.X_test)
            scores = {}

            if self.mode == 'classification':
                if 'accuracy' in self.metrics:
                    scores['accuracy'] = accuracy_score(self.y_test, y_pred)
                if 'precision' in self.metrics:
                    scores['precision'] = precision_score(self.y_test, y_pred)
                if 'recall' in self.metrics:
                    scores['recall'] = recall_score(self.y_test, y_pred)
                if 'f1-score' in self.metrics:
                    scores['f1-score'] = f1_score(self.y_test, y_pred)
                if 'roc-auc' in self.metrics:
                    scores['roc-auc'] = roc_auc_score(self.y_test, y_pred)
                results.append((model, scores))
                
            if self.mode == 'regression':
                if 'mae' in self.metrics:
                    scores['MAE (Mean Abs. Error)'] = mean_absolute_error(self.y_test, y_pred)
                if 'mape' in self.metrics:
                    scores['MAPE (Mean Abs. Percent. Error)'] = mean_absolute_percentage_error(self.y_test, y_pred)
                if 'mse' in self.metrics:
                    scores['MSE (Mean Squared Error)'] = mean_squared_error(self.y_test, y_pred)
                if 'rmse' in self.metrics:
                    scores['RMSE (Rooted Mean Squared Error)'] = math.sqrt(mean_squared_error(self.y_test, y_pred))
                if 'r2-score' in self.metrics:
                    scores['r2-score'] = r2_score(self.y_test, y_pred)
                results.append((model, scores))

        return results
    
    
    ## Verbose descrition of each model
    def describe(self):
        result = self.evaluate_models()
        
        for model in result:
            print(f"{model[0]}: {model[1]}")
            
            
    ## Get info about the best model
    def best_model(self, print_info: bool = False):
        result = self.evaluate_models()
        
        # Initialize an empty dictionary
        model_scores_dict = {}

        # Iterate through the results and calculate the sum of scores
        for model, scores in result:
            model_name = model.__class__.__name__  # Get the model name as a string
            score_sum = sum(scores.values())
            model_scores_dict[model_name] = score_sum


        # Find the key with the maximum value in the dictionary
        best_model = max(model_scores_dict, key=model_scores_dict.get)

        # Print the key of the model with the highest score
        if print_info:
            print("Model with the highest score:", best_model)
        else:
            return best_model


In [127]:
GMSPipe = GMSModule(mode="classification", metrics=['accuracy', 'f1-score'], include=[LogisticRegression(), RandomForestClassifier()], data=[X_train, X_test, y_train, y_test])
GMSPipe.best_model()

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


'RandomForestClassifier'