## Perform Exploratory Data Analysis - Bivariate
- Examine each independent (predictor) feature in relation to the dependent (response) feature
- Form initial hypotheses regarding their level of correlation to the outcome

In [None]:
import os
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# Read the cleaned source file into a DataFrame.
data_dir = os.path.join(os.getcwd(), 'Data')
source_file = os.path.join(data_dir, 'WA-Telco-Customer-Churn-EDA.xlsx')
df = pd.read_excel(source_file, header=0)

In [None]:
# Make appropriate data type assignments.
df.gender = df.gender.astype('category')
df.SeniorCitizen = df.SeniorCitizen.astype('category')
df.Partner = df.Partner.astype('category')
df.Dependents = df.Dependents.astype('category')
df.PhoneService = df.PhoneService.astype('category')
df.MultipleLines = df.MultipleLines.astype('category')
df.InternetService = df.InternetService.astype('category')
df.OnlineSecurity = df.OnlineSecurity.astype('category')
df.OnlineBackup = df.OnlineBackup.astype('category')
df.DeviceProtection = df.DeviceProtection.astype('category')
df.TechSupport = df.TechSupport.astype('category')
df.StreamingTV = df.StreamingTV.astype('category')
df.StreamingMovies = df.StreamingMovies.astype('category')
df.Contract = df.Contract.astype('category')
df.PaperlessBilling = df.PaperlessBilling.astype('category')
df.PaymentMethod = df.PaymentMethod.astype('category')
df.TotalCharges = pd.to_numeric(df.TotalCharges, errors='coerce')
df.Churn = df.Churn.astype('category')

# Validate new data type assignments.
df.dtypes

In [None]:
# Get Descriptive statistics regarding all numeric features.
df.describe()

### Create Box-and-Whisker Plots

In [None]:
df.boxplot(column="tenure", by="Churn", figsize= (9,6))

In [None]:
df.boxplot(column="MonthlyCharges", by="Churn", figsize= (9,6))

In [None]:
df.boxplot(column="TotalCharges", by="Churn", figsize= (9,6))

### Create Frequency Tables and Stacked Bar Charts

In [None]:
churn_gender_df = pd.crosstab(index=df["Churn"], columns=df["gender"])
churn_gender_df

In [None]:
churn_gender_df.plot(kind="barh", figsize=(6,4), stacked=True)

In [None]:
churn_senior_df = pd.crosstab(index=df["Churn"], columns=df["SeniorCitizen"])
churn_senior_df

In [None]:
churn_senior_df.plot(kind="barh", figsize=(6,4), stacked=True)

In [None]:
churn_partner_df = pd.crosstab(index=df["Churn"], columns=df['Partner'])
churn_partner_df

In [None]:
churn_partner_df.plot(kind="barh", figsize=(6,4), stacked=True)

In [None]:
churn_dependents_df = pd.crosstab(index=df["Churn"], columns=df["Dependents"])
churn_dependents_df

In [None]:
churn_dependents_df.plot(kind="barh", figsize=(6,4), stacked=True)

In [None]:
churn_phonesvc_df = pd.crosstab(index=df["Churn"], columns=df["PhoneService"])
churn_phonesvc_df

In [None]:
churn_phonesvc_df.plot(kind="barh", figsize=(6,4), stacked=True)

In [None]:
churn_multilines_df = pd.crosstab(index=df["Churn"], columns=df["MultipleLines"])
churn_multilines_df

In [None]:
churn_multilines_df.plot(kind="barh", figsize=(6,4), stacked=True)

In [None]:
churn_intservice_df = pd.crosstab(index=df["Churn"], columns=df["InternetService"])
churn_intservice_df

In [None]:
churn_intservice_df.plot(kind="barh", figsize=(6,4), stacked=True)

In [None]:
churn_onlinesec_df = pd.crosstab(index=df["Churn"], columns=df["OnlineSecurity"])
churn_onlinesec_df

In [None]:
churn_onlinesec_df.plot(kind="barh", figsize=(6,4), stacked=True)

In [None]:
churn_olbackup_df = pd.crosstab(index=df["Churn"], columns=df["OnlineBackup"])
churn_olbackup_df

In [None]:
churn_olbackup_df.plot(kind="barh", figsize=(6,4), stacked=True)

In [None]:
churn_devprotect_df = pd.crosstab(index=df["Churn"], columns=df["DeviceProtection"])
churn_devprotect_df

In [None]:
churn_devprotect_df.plot(kind="barh", figsize=(6,4), stacked=True)

In [None]:
churn_tekspt_df = pd.crosstab(index=df["Churn"], columns=df["TechSupport"])
churn_tekspt_df

In [None]:
churn_tekspt_df.plot(kind="barh", figsize=(6,4), stacked=True)

In [None]:
churn_tv_df = pd.crosstab(index=df["Churn"], columns=df["StreamingTV"])
churn_tv_df

In [None]:
churn_tv_df.plot(kind="barh", figsize=(6,4), stacked=True)

In [None]:
churn_movies_df = pd.crosstab(index=df["Churn"], columns=df["StreamingMovies"])
churn_movies_df

In [None]:
churn_movies_df.plot(kind="barh", figsize=(6,4), stacked=True)

In [None]:
churn_contract_df = pd.crosstab(index=df["Churn"], columns=df["Contract"])
churn_contract_df

In [None]:
churn_contract_df.plot(kind="barh", figsize=(6,4), stacked=True)

In [None]:
churn_billing_df = pd.crosstab(index=df["Churn"], columns=df["PaperlessBilling"])
churn_billing_df

In [None]:
churn_billing_df.plot(kind="barh", figsize=(6,4), stacked=True)

In [None]:
churn_paymeth_df = pd.crosstab(index=df["Churn"], columns=df["PaymentMethod"])
churn_paymeth_df  #/churn_paymeth_df.sum()

In [None]:
churn_paymeth_df.plot(kind="barh", figsize=(6,4), stacked=True)