In [None]:
import os
from pathlib import Path

import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score

from sklearn_pandas import DataFrameMapper

from risk_learning.config import filenames

In [None]:
df = pd.read_csv(filenames.telcom_churn)
df.info()

## Split off data and target and split off test set

In [None]:
data = df[[c for c in df.columns if c != 'Churn']]
lb = LabelBinarizer()
target = lb.fit_transform(df['Churn']).ravel()

X_train, X_test, y_train, y_test = train_test_split(
    data, target, test_size=0.25, random_state=42, stratify=target
)

## Transform features

In [None]:
mapper = DataFrameMapper([
    ('gender', LabelBinarizer()),
    ('SeniorCitizen', LabelBinarizer()), 
])

## Combine in a pipeline

In [None]:
pipe = Pipeline([
    ('featurize', mapper),
    ('lr', LogisticRegression(solver='lbfgs'))])
cross_val_score(pipe, X=X_train, y=y_train, scoring='r2', cv=5)