In [1]:
# Import library yang dibutuhkan untuk eksplorasi dataset
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

import warnings
warnings.filterwarnings('ignore')

### Informasi Atribut

| Atribut | Tipe Data | Deksripsi |
| --- | --- | --- |
| Dependents | --- | Status ketergantungan konsumen terhadap produk |
| Tenure | --- |  |
| OnlineSecurity | --- | Status kepemilikan fitur 'online security' konsumen |
| OnlineBackup | --- | Status kepemilikan fitur 'online backup' konsumen |
| InternetService | --- | Status subskripsi konsumen terhadap 'internet service' |
| DeviceProtection | --- | Status kepemilikan 'device protection' konsumen |
| TechSupport | --- | --- |
| Contract | --- | --- |
| PaperlessBilling | --- | --- |
| MonthlyCharges | --- | --- |
| Churn | --- | --- |

[---]

##### ***Telco Customer Churn***

The dataset represents customer profiles who have left the telco company. A churn in telco and other subscription-based services means a situation when the customer leaves the service provider.

| Nama Atribut | Deskripsi |
| --- | --- |
| Dependents | Whether the customer has dependents or not. |
| Tenure | Number of months the customer has stayed with the company. |
| OnlineSecurity | Whether the customer has online security or not. |
| OnlineBackup | Whether the customer has online backup or not. |
| InternetService | Whether the client is subscribed to Internet service. |
| DeviceProtection | Whether the client has device protection or not. |
| TechSupport | Whether the client has tech support or not. |
| Contract | Type of contract according to duration. |
| PaperlessBilling | Bills issued in paperless form. |
| MonthlyCharges | Amount of charge for service on monthly bases. |
| Churn | Whether the customer churns or not. |


In [2]:
# Load dataset
df = pd.read_csv('data_telco_customer_churn.csv')
df.sample(10)

Unnamed: 0,Dependents,tenure,OnlineSecurity,OnlineBackup,InternetService,DeviceProtection,TechSupport,Contract,PaperlessBilling,MonthlyCharges,Churn
1633,No,72,Yes,Yes,DSL,Yes,Yes,Two year,No,78.85,No
3294,No,71,No internet service,No internet service,No,No internet service,No internet service,Two year,No,23.85,No
3366,No,47,Yes,No,DSL,Yes,Yes,Two year,Yes,65.0,No
918,No,12,No,No,DSL,Yes,No,Month-to-month,Yes,49.85,No
3219,No,47,No,No,DSL,Yes,Yes,Two year,No,74.45,No
2233,No,1,No,No,Fiber optic,No,No,Month-to-month,Yes,89.3,Yes
2856,No,67,No internet service,No internet service,No,No internet service,No internet service,Two year,No,24.85,No
4553,No,13,No,Yes,Fiber optic,Yes,No,Month-to-month,No,94.1,Yes
1184,No,30,Yes,Yes,DSL,Yes,Yes,One year,No,85.35,Yes
3953,Yes,7,Yes,No,DSL,No,No,Month-to-month,No,50.7,No


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4930 entries, 0 to 4929
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Dependents        4930 non-null   object 
 1   tenure            4930 non-null   int64  
 2   OnlineSecurity    4930 non-null   object 
 3   OnlineBackup      4930 non-null   object 
 4   InternetService   4930 non-null   object 
 5   DeviceProtection  4930 non-null   object 
 6   TechSupport       4930 non-null   object 
 7   Contract          4930 non-null   object 
 8   PaperlessBilling  4930 non-null   object 
 9   MonthlyCharges    4930 non-null   float64
 10  Churn             4930 non-null   object 
dtypes: float64(1), int64(1), object(9)
memory usage: 423.8+ KB


In [4]:
df.nunique()

Dependents             2
tenure                73
OnlineSecurity         3
OnlineBackup           3
InternetService        3
DeviceProtection       3
TechSupport            3
Contract               3
PaperlessBilling       2
MonthlyCharges      1422
Churn                  2
dtype: int64

In [5]:
df.duplicated().sum()

77

In [6]:
#Fungsi tampil missing value
def persentase_null():
    return round(df.isnull().sum() * 100 / len(df), 2)

def jumlah_null():
    return df.isna().sum()

#Dataframe missing value
df_null = pd.DataFrame({
    'Jumlah Data': jumlah_null(),
    'Persentase': persentase_null()
})

#Fungsi pengecekan keberadaan missing value dari setiap kolom
def cek_missing_value():
    print('Jumlah missing value di setiap kolom adalah sebagai berikut:\n')
    print(df_null)
    print('\nJumlah missing value dari semua kolom sebanyak', jumlah_null().sum(), 'data.')

In [7]:
cek_missing_value()

Jumlah missing value di setiap kolom adalah sebagai berikut:

                  Jumlah Data  Persentase
Dependents                  0         0.0
tenure                      0         0.0
OnlineSecurity              0         0.0
OnlineBackup                0         0.0
InternetService             0         0.0
DeviceProtection            0         0.0
TechSupport                 0         0.0
Contract                    0         0.0
PaperlessBilling            0         0.0
MonthlyCharges              0         0.0
Churn                       0         0.0

Jumlah missing value dari semua kolom sebanyak 0 data.


In [8]:
persentase_null()

Dependents          0.0
tenure              0.0
OnlineSecurity      0.0
OnlineBackup        0.0
InternetService     0.0
DeviceProtection    0.0
TechSupport         0.0
Contract            0.0
PaperlessBilling    0.0
MonthlyCharges      0.0
Churn               0.0
dtype: float64

In [9]:
listItem = []
for col in df.columns :
    listItem.append([col, df[col].dtype, df[col].isna().sum(), round((df[col].isna().sum()/len(df[col])) * 100,2),
                    df[col].nunique(), list(df[col].drop_duplicates().values)]);

dfDesc = pd.DataFrame(columns=['dataFeatures', 'dataType', 'null', 'nullPct', 'unique', 'uniqueSample'],
                     data=listItem)
dfDesc

Unnamed: 0,dataFeatures,dataType,null,nullPct,unique,uniqueSample
0,Dependents,object,0,0.0,2,"[Yes, No]"
1,tenure,int64,0,0.0,73,"[9, 14, 64, 72, 3, 40, 17, 11, 8, 47, 18, 5, 1..."
2,OnlineSecurity,object,0,0.0,3,"[No, Yes, No internet service]"
3,OnlineBackup,object,0,0.0,3,"[No, Yes, No internet service]"
4,InternetService,object,0,0.0,3,"[DSL, Fiber optic, No]"
5,DeviceProtection,object,0,0.0,3,"[Yes, No internet service, No]"
6,TechSupport,object,0,0.0,3,"[Yes, No, No internet service]"
7,Contract,object,0,0.0,3,"[Month-to-month, Two year, One year]"
8,PaperlessBilling,object,0,0.0,2,"[Yes, No]"
9,MonthlyCharges,float64,0,0.0,1422,"[72.9, 82.65, 47.85, 69.65, 23.6, 74.55, 19.7,..."
