In [20]:
####CH 3 Machine Learning Classification
#Using machine learning to predict churn 

[Dataset: Telco Churn Dataset](https://www.kaggle.com/datasets/blastchar/telco-customer-churn?resource=download)

Dataset provides:
* Services that the customer is signed up for (i.e. phone, multiple lines, tech support, and extra services like online security, back up etc
* Account Info: Membership tenure, contract type, payment type
* Charges: Membership charges - past month & total
* demographic info
* churned variable (y/n)

In [21]:
# Import dataset from Kaggle CLI(come back and set up on this 2nd computer)
import kaggle
from kaggle.api.kaggle_api_extended import KaggleApi
api = KaggleApi()
api.authenticate()

api.dataset_download_file('blastchar/telco-customer-churn', file_name = 'WA_Fn-UseC_-Telco-Customer-Churn.csv')


False

In [22]:
import pandas as pd
import numpy as np

import seaborn as sns
from matplotlib import pyplot as plt

# import os
# # os.listdir()
# #pandas support zip file reads
# df = pd.read_csv("Datasets/telco_customer_churn.csv")

df = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')
# len(df) # number of rows in data
df.head(5)

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


<span style="color:green; font-size:1.5em">**Data Cleaning**</span>


* Correct for datatypes where needed
  * TotalCharges to numeric
  * Churn to int
* clean up column names -- lower case and replace spaces w/ '_'


In [23]:
df.dtypes

#Note: TotalCharges is identified as an object -- contians a space(" ") to represent missing value
#We can confirm this with the folowing

TotalCharges = pd.to_numeric(df.TotalCharges, errors = 'coerce')

df[TotalCharges.isnull()][['customerID','TotalCharges']]


Unnamed: 0,customerID,TotalCharges
488,4472-LVYGI,
753,3115-CZMZD,
936,5709-LVOEQ,
1082,4367-NUYAO,
1340,1371-DWPAZ,
3331,7644-OMVMY,
3826,3213-VVOLG,
4380,2520-SGTTA,
5218,2923-ARZLG,
6670,4075-WKNIU,


In [24]:
# df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors = 'coerce')
df.TotalCharges = pd.to_numeric(df.TotalCharges, errors = 'coerce')
df.TotalCharges = df.TotalCharges.fillna(0)

df[TotalCharges.isnull()][['customerID','TotalCharges']]


Unnamed: 0,customerID,TotalCharges
488,4472-LVYGI,0.0
753,3115-CZMZD,0.0
936,5709-LVOEQ,0.0
1082,4367-NUYAO,0.0
1340,1371-DWPAZ,0.0
3331,7644-OMVMY,0.0
3826,3213-VVOLG,0.0
4380,2520-SGTTA,0.0
5218,2923-ARZLG,0.0
6670,4075-WKNIU,0.0


In [25]:
#cleaning up column names
df.columns = df.columns.str.lower().str.replace(' ','_')

string_columns = list(df.dtypes[df.dtypes == 'object'].index)

for col in string_columns:
    df[col] = df[col].str.lower().str.replace(' ','_')
    
    
#convert churn to int

# df.churn = (df.churn == 'yes').astype(int) #creating a pandas series to turn type to true/false
# (df.churn == 'yes').astype(int).head()

df['churn'] = (df.churn == 'yes').astype('int')


In [26]:
# df = df.assign(churn_int = (df.churn == 'yes').astype(int))
# df = df.assign(churn_copy = df.churn)
#tested tomake sure the astype.int work by creating out a copy of the column


In [27]:
df.head()
df.churn.value_counts()


0    5174
1    1869
Name: churn, dtype: int64

In [28]:
#Using Scikit-learn model_selection module for data splitting
from sklearn.model_selection import train_test_split

df_train_full, df_test = train_test_split(df, test_size = 0.2, random_state = 1)

print(len(df_train_full), len(df_test))

df_train_full.head()

#In the previous chapter we split the data set into 3 parts: train, validation, and test.
# Here we split it twice, but can split 1 more time --> taking df_train_ful and split 1 more time into train and validation

df_train, df_val = train_test_split(df_train_full, test_size = 0.33, random_state = 11)
#random_state controls the shuffling applied to the data before applying the split. Pass an int for reproducible output across multiple function calls. See Glossary.

#takes the target variable (churn) and saves it outside the dataframe
y_train = df_train.churn.values 
y_val = df_val.churn.values 

del df_train['churn']
del df_val['churn']


5634 1409


<span style="color:green; font-size:1.5em">**EDA**</span>

* Identify missing values
* Check distribution of target variables



In [29]:
df_train_full.isnull().sum()


customerid          0
gender              0
seniorcitizen       0
partner             0
dependents          0
tenure              0
phoneservice        0
multiplelines       0
internetservice     0
onlinesecurity      0
onlinebackup        0
deviceprotection    0
techsupport         0
streamingtv         0
streamingmovies     0
contract            0
paperlessbilling    0
paymentmethod       0
monthlycharges      0
totalcharges        0
churn               0
dtype: int64

In [30]:
df.churn.value_counts()

0    5174
1    1869
Name: churn, dtype: int64

In [31]:
print('% of churned users: ' , round((df['churn'].sum() *100.00 / df['churn'].count()),2), '%')

#other way to capture this is:
global_mean = df_train_full.churn.mean() ## since the mean is the sum of all the values / total rows

print(global_mean)

% of churned users:  26.54 %
0.26996805111821087


In [32]:
categorical = ['gender', 'seniorcitizen', 'partner', 'dependents',
               'phoneservice', 'multiplelines', 'internetservice',
               'onlinesecurity', 'onlinebackup', 'deviceprotection', 
               'techsupport','streamingtv', 'streamingmovies', 
               'contract', 'paperlessbilling','paymentmethod']

numerical = ['tenure','monthlycharges', 'totalcharges']

#Identify number of unique values for the categorical variables -- all variables have few unique values won't need to clean/group
df_train_full[categorical].nunique()



gender              2
seniorcitizen       2
partner             2
dependents          2
phoneservice        2
multiplelines       3
internetservice     3
onlinesecurity      3
onlinebackup        3
deviceprotection    3
techsupport         3
streamingtv         3
streamingmovies     3
contract            3
paperlessbilling    2
paymentmethod       4
dtype: int64

In [33]:
#Feature Importance - understanding how important a feature is for the model's prediction
#Start by simple EDA on variables 

female_mean = df_train_full[df_train_full['gender'] == 'female'].churn.mean()
male_mean = df_train_full[df_train_full['gender'] == 'male'].churn.mean()

print(female_mean, male_mean)
#difference is quite small --> gender isn't helpful in identifying who will churn

#in addition to looking at the difference between the groups, we can look at the risk ratio or the ratio between probabilities in different groups
#risk = group_rate / global_rate or negative outcome for group 1 / negative outcome global

print('Risk Ratio: ', female_mean / global_mean)

0.27682403433476394 0.2632135306553911
Risk Ratio:  1.0253955354648652


In [34]:
partner_yes = df_train_full[df_train_full['partner'] == 'yes'].churn.mean()
partner_no = df_train_full[df_train_full['partner'] == 'no'].churn.mean()

partner_yes, partner_no
#Shows users with a partner have a lower rate of churn than the average and also those without a partner
#Users without a partner have a higher rate of churn the average


(0.20503330866025166, 0.3298090040927694)

In [35]:
#Easier way to calculate across all:

global_mean = df_train_full.churn.mean()

df_group = df_train_full.groupby(by = 'gender').churn.agg(['mean'])
df_group['diff']  = df_group['mean'] - global_mean
df_group['risk'] = df_group['mean'] / global_mean

df_group

Unnamed: 0_level_0,mean,diff,risk
gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,0.276824,0.006856,1.025396
male,0.263214,-0.006755,0.97498


In [36]:
##to do this for all categorical variables create function to iterate through them

for col in categorical:
    df_group = df_train_full.groupby(by = col).churn.agg(['mean','count'])
    df_group['diff'] = df_group['mean'] - global_mean
    df_group['risk'] = df_group['mean'] / global_mean
    display(df_group)
    
#Variables that have significant differences: Clients w/no tech support and users on month-to-month contracts, 
# and those w/fiber optic internet


Unnamed: 0_level_0,mean,count,diff,risk
gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
female,0.276824,2796,0.006856,1.025396
male,0.263214,2838,-0.006755,0.97498


Unnamed: 0_level_0,mean,count,diff,risk
seniorcitizen,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0.24227,4722,-0.027698,0.897403
1,0.413377,912,0.143409,1.531208


Unnamed: 0_level_0,mean,count,diff,risk
partner,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
no,0.329809,2932,0.059841,1.221659
yes,0.205033,2702,-0.064935,0.759472


Unnamed: 0_level_0,mean,count,diff,risk
dependents,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
no,0.31376,3968,0.043792,1.162212
yes,0.165666,1666,-0.104302,0.613651


Unnamed: 0_level_0,mean,count,diff,risk
phoneservice,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
no,0.241316,547,-0.028652,0.89387
yes,0.273049,5087,0.003081,1.011412


Unnamed: 0_level_0,mean,count,diff,risk
multiplelines,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
no,0.257407,2700,-0.012561,0.953474
no_phone_service,0.241316,547,-0.028652,0.89387
yes,0.290742,2387,0.020773,1.076948


Unnamed: 0_level_0,mean,count,diff,risk
internetservice,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
dsl,0.192347,1934,-0.077621,0.712482
fiber_optic,0.425171,2479,0.155203,1.574895
no,0.077805,1221,-0.192163,0.288201


Unnamed: 0_level_0,mean,count,diff,risk
onlinesecurity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
no,0.420921,2801,0.150953,1.559152
no_internet_service,0.077805,1221,-0.192163,0.288201
yes,0.153226,1612,-0.116742,0.56757


Unnamed: 0_level_0,mean,count,diff,risk
onlinebackup,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
no,0.404323,2498,0.134355,1.497672
no_internet_service,0.077805,1221,-0.192163,0.288201
yes,0.217232,1915,-0.052736,0.80466


Unnamed: 0_level_0,mean,count,diff,risk
deviceprotection,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
no,0.395875,2473,0.125907,1.466379
no_internet_service,0.077805,1221,-0.192163,0.288201
yes,0.230412,1940,-0.039556,0.85348


Unnamed: 0_level_0,mean,count,diff,risk
techsupport,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
no,0.418914,2781,0.148946,1.551717
no_internet_service,0.077805,1221,-0.192163,0.288201
yes,0.159926,1632,-0.110042,0.59239


Unnamed: 0_level_0,mean,count,diff,risk
streamingtv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
no,0.342832,2246,0.072864,1.269897
no_internet_service,0.077805,1221,-0.192163,0.288201
yes,0.302723,2167,0.032755,1.121328


Unnamed: 0_level_0,mean,count,diff,risk
streamingmovies,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
no,0.338906,2213,0.068938,1.255358
no_internet_service,0.077805,1221,-0.192163,0.288201
yes,0.307273,2200,0.037305,1.138182


Unnamed: 0_level_0,mean,count,diff,risk
contract,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
month-to-month,0.431701,3104,0.161733,1.599082
one_year,0.120573,1186,-0.149395,0.446621
two_year,0.028274,1344,-0.241694,0.10473


Unnamed: 0_level_0,mean,count,diff,risk
paperlessbilling,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
no,0.172071,2313,-0.097897,0.637375
yes,0.338151,3321,0.068183,1.25256


Unnamed: 0_level_0,mean,count,diff,risk
paymentmethod,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bank_transfer_(automatic),0.168171,1219,-0.101797,0.622928
credit_card_(automatic),0.164339,1217,-0.10563,0.608733
electronic_check,0.45589,1893,0.185922,1.688682
mailed_check,0.19387,1305,-0.076098,0.718121


<h1> Mutual Information</h1>

Accounting for Mutual Information of variables - tells us how much we learn about 1 variable if we learn the value of another 
--> used to measure mutual dependency b/w 2 variables; higher values of mutual information --> higher degree of dependence b/w 2 variables


In [39]:
from sklearn.metrics import mutual_info_score
# sklearn.metrics.mutual_info_score(labels_true, labels_pred, *, contingency=None)

#creating a function to calcualte mutual info score
def calculate_mi(series):
    return mutual_info_score(series, df_train_full.churn)

#compute the mutual_info_score b/w the series and the churn variable
df_mi = df_train_full[categorical].apply(calculate_mi)
df_mi = df_mi.sort_values(ascending=False).to_frame(name='MI')
df_mi

#contract, onlinesecurity, techsupport among the most important features

Unnamed: 0,MI
contract,0.09832
onlinesecurity,0.063085
techsupport,0.061032
internetservice,0.055868
onlinebackup,0.046923
deviceprotection,0.043453
paymentmethod,0.04321
streamingtv,0.031853
streamingmovies,0.031581
paperlessbilling,0.017589


<h> Correlation Coefficient <h>
Used to dependency b/w binary target variable & numerical variable (in this case) and is a value b/w -1 to 1

* Positive correlation: value of the variable goes up the target variable will see more 1s more often than zeros
* No Correlation = 0; variables are independent of each other
* Negative Correlation: target variable will do the opposite of the variable


In [41]:
df_train_full[numerical].corrwith(df_train_full.churn) 
#https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.corrwith.html

* Tenure & Churn have an inverse relationship --> the longer a customer stays the less likely they will churn
* Monthly Charges & Churn --> the higher the charges the more likely a customer will churn
* Total Charges --> the total charges over the tenure of the customer --> the longer someone stays the higher their total charge 
     --> the less likely they will churn and the inverse for low total charges

tenure           -0.351885
monthlycharges    0.196805
totalcharges     -0.196353
dtype: float64

<b>3.2 Feature Engineering<b>
    
* Transform categorical variables to numeric features
* Then train the logisitic regression
    
3.2.1. One-hot encoding for categorical variables
    
To simplify the encoding of categorical variables, we cna use Scikit_learn's function for DictVectorizer
    --> takes a dictionary and vectorizes it 
    --> vectors are put together as rows of a matrix --> used in model
        

In [46]:
train_dict = df_train[categorical + numerical].to_dict(orient='records')

# train_dict[1]

from sklearn.feature_extraction import DictVectorizer

dv = DictVectorizer(sparse = False) #sparse= False will create a simple NumPy array
dv.fit(train_dict)

DictVectorizer(sparse=False)

In [47]:
X_train = dv.transform(train_dict)

In [50]:
X_train[0]

array([0.0000e+00, 0.0000e+00, 1.0000e+00, 1.0000e+00, 0.0000e+00,
       0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 1.0000e+00,
       1.0000e+00, 0.0000e+00, 0.0000e+00, 8.6100e+01, 1.0000e+00,
       0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00,
       0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 1.0000e+00,
       0.0000e+00, 1.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00,
       0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00,
       0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00,
       0.0000e+00, 0.0000e+00, 1.0000e+00, 7.1000e+01, 6.0459e+03])

In [51]:
dv.get_feature_names()

['contract=month-to-month',
 'contract=one_year',
 'contract=two_year',
 'dependents=no',
 'dependents=yes',
 'deviceprotection=no',
 'deviceprotection=no_internet_service',
 'deviceprotection=yes',
 'gender=female',
 'gender=male',
 'internetservice=dsl',
 'internetservice=fiber_optic',
 'internetservice=no',
 'monthlycharges',
 'multiplelines=no',
 'multiplelines=no_phone_service',
 'multiplelines=yes',
 'onlinebackup=no',
 'onlinebackup=no_internet_service',
 'onlinebackup=yes',
 'onlinesecurity=no',
 'onlinesecurity=no_internet_service',
 'onlinesecurity=yes',
 'paperlessbilling=no',
 'paperlessbilling=yes',
 'partner=no',
 'partner=yes',
 'paymentmethod=bank_transfer_(automatic)',
 'paymentmethod=credit_card_(automatic)',
 'paymentmethod=electronic_check',
 'paymentmethod=mailed_check',
 'phoneservice=no',
 'phoneservice=yes',
 'seniorcitizen',
 'streamingmovies=no',
 'streamingmovies=no_internet_service',
 'streamingmovies=yes',
 'streamingtv=no',
 'streamingtv=no_internet_servic

Logistic Regression:
    The output is a probability of an event happening
    
g(xi) = sigmoid(w0 + x^Ti*w) (note a linear regression model doesn't utilize the sigmoid function). Also the sigmoid function will make sure the output is between 0 and 1 (or intrepreted as a probability)

Sigmoid = 1 / 1 + exp(-x)

In [54]:
# def linear regression(xi):
#     result = bias
#     for j in range(n):
#         result = result + xi[j] * w[j]
#     return result

def logistic_regression(xi):
    score = bias
    for j in range(n):
        score = score + xi[j] * w[j]
    prob = sigmoid(score)        
    return prob

import math

def sigmoid(score):
    return 1 / (1 + math.exp(-score))


In [55]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(solver = 'liblinear', random_state = 1)
model.fit(X_train, y_train)

LogisticRegression(random_state=1, solver='liblinear')

In [60]:
val_dict = df_val[categorical + numerical].to_dict(orient = 'records')
X_val = dv.transform(val_dict)

y_pred = model.predict_proba(X_val)

y_pred # produces 2 dimensional array. 1st column contains probability the target is 0 (client won't churn)
#2nd column: probablity target is 1 (client will churn)

array([[0.76509452, 0.23490548],
       [0.73114964, 0.26885036],
       [0.68055068, 0.31944932],
       ...,
       [0.94275132, 0.05724868],
       [0.3847724 , 0.6152276 ],
       [0.93872722, 0.06127278]])

In [61]:
y_pred = model.predict_proba(X_val)[:,1] #: = select all rows and 1 means select only the 2nd column

y_pred #provides the probability the client will churn only


array([0.23490548, 0.26885036, 0.31944932, ..., 0.05724868, 0.6152276 ,
       0.06127278])

In [63]:
#Because we want to understand which customers to reach out to we will focus on customers who have a 0.5+ chance of churning

y_pred >= 0.5

churn = y_pred >=0.5

In [64]:
#Measuring quality of the model:
# * accuracy -- determining the number of times the prediction matches the actual value

(y_val == churn).mean()
#note the y_val is an int variabl and the churn is booelan but is recast to int

#model matched 80% of its output or is accurate 80% of the time

0.8016129032258065

In [69]:
#Model Intrepretation

model.intercept_[0] #w0
model.coef_[0] #weighted vector

array([ 5.63364001e-01, -8.58801191e-02, -5.99471996e-01, -3.02653696e-02,
       -9.17227451e-02,  9.99351394e-02, -1.15874933e-01, -1.06048321e-01,
       -2.73672166e-02, -9.46208981e-02, -3.23337975e-01,  3.17224794e-01,
       -1.15874933e-01,  7.83699338e-04, -1.68108194e-01,  1.27129027e-01,
       -8.10089474e-02,  1.35691087e-01, -1.15874933e-01, -1.41804269e-01,
        2.57837939e-01, -1.15874933e-01, -2.63951121e-01, -2.12622849e-01,
        9.06347345e-02, -4.79899461e-02, -7.39981686e-02, -2.66727814e-02,
       -1.36238321e-01,  1.74728722e-01, -1.33805734e-01,  1.27129027e-01,
       -2.49117141e-01,  2.97084094e-01, -8.48297478e-02, -1.15874933e-01,
        7.87165660e-02, -9.90472114e-02, -1.15874933e-01,  9.29340297e-02,
        1.78129406e-01, -1.15874933e-01, -1.84242588e-01, -6.94882832e-02,
        4.47713249e-04])

In [73]:
#To see each feature associated w/its weight use the get_feature_names method

dict(zip(dv.get_feature_names(), model.coef_[0].round(3)))

{'contract=month-to-month': 0.563,
 'contract=one_year': -0.086,
 'contract=two_year': -0.599,
 'dependents=no': -0.03,
 'dependents=yes': -0.092,
 'deviceprotection=no': 0.1,
 'deviceprotection=no_internet_service': -0.116,
 'deviceprotection=yes': -0.106,
 'gender=female': -0.027,
 'gender=male': -0.095,
 'internetservice=dsl': -0.323,
 'internetservice=fiber_optic': 0.317,
 'internetservice=no': -0.116,
 'monthlycharges': 0.001,
 'multiplelines=no': -0.168,
 'multiplelines=no_phone_service': 0.127,
 'multiplelines=yes': -0.081,
 'onlinebackup=no': 0.136,
 'onlinebackup=no_internet_service': -0.116,
 'onlinebackup=yes': -0.142,
 'onlinesecurity=no': 0.258,
 'onlinesecurity=no_internet_service': -0.116,
 'onlinesecurity=yes': -0.264,
 'paperlessbilling=no': -0.213,
 'paperlessbilling=yes': 0.091,
 'partner=no': -0.048,
 'partner=yes': -0.074,
 'paymentmethod=bank_transfer_(automatic)': -0.027,
 'paymentmethod=credit_card_(automatic)': -0.136,
 'paymentmethod=electronic_check': 0.175,
