# 프로젝트 : Telco Customer Churn 고객 유지를 위한 행동 예측(이탈 방지)

In [91]:
from google.colab import drive
drive.mount('/content/Mydrive')

Drive already mounted at /content/Mydrive; to attempt to forcibly remount, call drive.mount("/content/Mydrive", force_remount=True).


In [92]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [93]:
data = pd.read_csv('/content/Mydrive/MyDrive/datascience/WA_Fn-UseC_-Telco-Customer-Churn.csv')

In [94]:
# 해당 함수는 Yes, No, No phone service 세가지 답변의 컬럼을 자동으로 숫자로 변화해주는 함수이다.
def Change(x):
  if x == 'No':
    return 0
  elif x == 'Yes':
    return 1
  elif x == 'No phone service':
    return 2

# Column 해석

customerID : 고객 id

gender : 성별

SeniorCitizen : 고령자

Partner : 파트너

Dependents : 부양 가족

tenure : 보유(고객이 머물렀던 개월 수)

PhoneService : 휴대폰 서비스

MultipleLines : 여러회선 여부( 예, 아니오, 전화 서비스 없음)

InternetService : 인터넷 서비스(인터넷 서비스 제공없체: DSL, 광섬유, 아니요)

OnlineSecurity : 온라인 보안 여부(예, 아니오, 전화서비스 없음)

OnlineBackup : 온라인 백업 여부(예, 아니오, 전화서비스 없음)

DeviceProtection : 장치보호 여부(예, 아니요, 인터넷 서비스 없음)

TechSupport : 기술지원 여부(예, 아니요, 인터넷 서비스 없음)

StreamingTV : StreamingTV보유 여부(예, 아니요, 인터넷 서비스 없음)

StreamingMovies : 스트리밍 영화 보유 여부(예, 아니요, 인터넷 서비스 없음)

Contract : 계약(계약기간 월간, 1년, 2년)

PaperlessBilling : 종이없는 결제(예, 아니요)

PaymentMethod : 고객의 결제 수단 (전자 수표, 우편 수표, 은행 송금 (자동), 신용 카드 (자동))

MonthlyCharges : 월간 요금

TotalCharges : 총 요금

Churn : 지난달 퇴사한 고객객

# 데이터 확인하기

In [95]:
data.head(5)

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [96]:
data.isnull().sum()

customerID          0
gender              0
SeniorCitizen       0
Partner             0
Dependents          0
tenure              0
PhoneService        0
MultipleLines       0
InternetService     0
OnlineSecurity      0
OnlineBackup        0
DeviceProtection    0
TechSupport         0
StreamingTV         0
StreamingMovies     0
Contract            0
PaperlessBilling    0
PaymentMethod       0
MonthlyCharges      0
TotalCharges        0
Churn               0
dtype: int64

In [97]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7043 non-null   object 
 1   gender            7043 non-null   object 
 2   SeniorCitizen     7043 non-null   int64  
 3   Partner           7043 non-null   object 
 4   Dependents        7043 non-null   object 
 5   tenure            7043 non-null   int64  
 6   PhoneService      7043 non-null   object 
 7   MultipleLines     7043 non-null   object 
 8   InternetService   7043 non-null   object 
 9   OnlineSecurity    7043 non-null   object 
 10  OnlineBackup      7043 non-null   object 
 11  DeviceProtection  7043 non-null   object 
 12  TechSupport       7043 non-null   object 
 13  StreamingTV       7043 non-null   object 
 14  StreamingMovies   7043 non-null   object 
 15  Contract          7043 non-null   object 
 16  PaperlessBilling  7043 non-null   object 


In [98]:
data.describe()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges
count,7043.0,7043.0,7043.0
mean,0.162147,32.371149,64.761692
std,0.368612,24.559481,30.090047
min,0.0,0.0,18.25
25%,0.0,9.0,35.5
50%,0.0,29.0,70.35
75%,0.0,55.0,89.85
max,1.0,72.0,118.75


In [99]:
NewPartner = []
NewDependents = []
NewPhoneService = []
NewMultipleLines = []
NewOnlineSecurity = []
NewOnlineBackup = []
NewDeviceProtection = []
NewTechSupport = []
NewStreamingTV = []
NewStreamingMovies = []
NewPaperlessBilling = []


Partner = data['Partner'] 
Dependents = data['Dependents']
PhoneService = data['PhoneService']
MultipleLines = data['MultipleLines']
OnlineSecurity = data['OnlineSecurity']
OnlineBackup = data['OnlineBackup']
DeviceProtection = data['DeviceProtection']
TechSupport = data['TechSupport']
StreamingTV = data['StreamingTV']
StreamingMovies = data['StreamingMovies']
PaperlessBilling = data['PaperlessBilling']


for i in Partner:
  NewPartner.append(Change(i))
  NewDependents.append(Change(i))
  NewPhoneService.append(Change(i))
  NewMultipleLines.append(Change(i))
  NewOnlineSecurity.append(Change(i))
  NewOnlineBackup.append(Change(i))
  NewDeviceProtection.append(Change(i))
  NewTechSupport.append(Change(i))
  NewStreamingTV.append(Change(i))
  NewStreamingMovies.append(Change(i))
  NewPaperlessBilling.append(Change(i))




data['Partner'] = NewPartner
data['Dependents'] = NewDependents
data['PhoneService'] = NewPhoneService
data['MultipleLines'] = NewMultipleLines
data['OnlineSecurity'] = NewOnlineSecurity
data['OnlineBackup'] =  NewOnlineBackup
data['DeviceProtection'] = NewDeviceProtection
data['TechSupport'] = NewTechSupport
data['StreamingTV'] = NewStreamingTV
data['StreamingMovies'] = NewStreamingMovies
data['PaperlessBilling'] = NewPaperlessBilling

In [100]:
data

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,1,1,1,1,1,DSL,1,1,1,1,1,1,Month-to-month,1,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,0,0,34,0,0,DSL,0,0,0,0,0,0,One year,0,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,0,0,2,0,0,DSL,0,0,0,0,0,0,Month-to-month,0,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,0,0,45,0,0,DSL,0,0,0,0,0,0,One year,0,Bank transfer (automatic),42.30,1840.75,No
4,9237-HQITU,Female,0,0,0,2,0,0,Fiber optic,0,0,0,0,0,0,Month-to-month,0,Electronic check,70.70,151.65,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,6840-RESVB,Male,0,1,1,24,1,1,DSL,1,1,1,1,1,1,One year,1,Mailed check,84.80,1990.5,No
7039,2234-XADUH,Female,0,1,1,72,1,1,Fiber optic,1,1,1,1,1,1,One year,1,Credit card (automatic),103.20,7362.9,No
7040,4801-JZAZL,Female,0,1,1,11,1,1,DSL,1,1,1,1,1,1,Month-to-month,1,Electronic check,29.60,346.45,No
7041,8361-LTMKD,Male,1,1,1,4,1,1,Fiber optic,1,1,1,1,1,1,Month-to-month,1,Mailed check,74.40,306.6,Yes


먼저 이탈한 고객의 옵션들을 살펴보자

In [101]:
Chdata = data.loc[data['Churn']=='Yes']
Chdata

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
2,3668-QPYBK,Male,0,0,0,2,0,0,DSL,0,0,0,0,0,0,Month-to-month,0,Mailed check,53.85,108.15,Yes
4,9237-HQITU,Female,0,0,0,2,0,0,Fiber optic,0,0,0,0,0,0,Month-to-month,0,Electronic check,70.70,151.65,Yes
5,9305-CDSKC,Female,0,0,0,8,0,0,Fiber optic,0,0,0,0,0,0,Month-to-month,0,Electronic check,99.65,820.5,Yes
8,7892-POOKP,Female,0,1,1,28,1,1,Fiber optic,1,1,1,1,1,1,Month-to-month,1,Electronic check,104.80,3046.05,Yes
13,0280-XJGEX,Male,0,0,0,49,0,0,Fiber optic,0,0,0,0,0,0,Month-to-month,0,Bank transfer (automatic),103.70,5036.3,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7021,1699-HPSBG,Male,0,0,0,12,0,0,DSL,0,0,0,0,0,0,One year,0,Electronic check,59.80,727.8,Yes
7026,8775-CEBBJ,Female,0,0,0,9,0,0,DSL,0,0,0,0,0,0,Month-to-month,0,Bank transfer (automatic),44.20,403.35,Yes
7032,6894-LFHLY,Male,1,0,0,1,0,0,Fiber optic,0,0,0,0,0,0,Month-to-month,0,Electronic check,75.75,75.75,Yes
7034,0639-TSIQW,Female,0,0,0,67,0,0,Fiber optic,0,0,0,0,0,0,Month-to-month,0,Credit card (automatic),102.95,6886.25,Yes


**가설1)** 단기계약일수록 이탈할 가능성이 높지 않을까?

In [102]:
pd.crosstab(data.Contract, data.Churn,normalize=True)

Churn,No,Yes
Contract,Unnamed: 1_level_1,Unnamed: 2_level_1
Month-to-month,0.315207,0.234985
One year,0.185574,0.02357
Two year,0.233849,0.006815


월별 계약일수록 이탈가능성이 높으며 장기계약인 2년계약시 가장 이탈율이 낮다

-> 장기 계약 고객으로 만들수록 이탈율을 줄일 수 있다.