# Importing libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import ipywidgets as widgets

# Loading the dataset

In [None]:
df = pd.read_csv("../data/WA_Fn-UseC_-Telco-Customer-Churn.csv")

# Overview of the dataset

In [None]:
df.head()

# Summary of the data

In [None]:
df.info()

# Remove missing values

In [None]:
df["TotalCharges"] = df["TotalCharges"].replace(" ", np.nan).astype(float)

df = df.dropna()

# Remove customerID feature

In [None]:
df = df.drop("customerID", axis=1)

# Remove duplicate rows

In [None]:
df = df.drop_duplicates()

# Transform to categorical variable just for visualization

In [None]:
df["SeniorCitizen"] = df["SeniorCitizen"].apply(lambda x: "Yes" if x == 1 else "No")

# Summary Statistics

In [None]:
df.describe()

# Check number of unique values in each categorical feature

In [None]:
categorical_features = list(df.select_dtypes(include=["object"]).columns)

# remove the target variable
categorical_features.remove("Churn")

df[categorical_features].nunique()

# Data Visualization

## Check the relation between the categorical features and the target

In [None]:
@widgets.interact(feature=categorical_features)
def plot_dist_catfeat(feature):
    plt.figure(figsize=(6, 4))
    sns.countplot(x=feature, data=df, hue="Churn")
    plt.title(f"Distribution of the feature {feature} by class")
    plt.xlabel(feature)
    plt.ylabel("Count")
    plt.show()

In [None]:
plt.figure(figsize=(6, 4))
sns.countplot(x="Churn", data=df)
plt.title("Distribution of the target variable")
plt.xlabel("Churn")
plt.ylabel("Count")
plt.show()

# Check the relation between the numerical features and the target

In [None]:
numerical_features = list(df.select_dtypes(include=["float", "int"]).columns)


@widgets.interact(feature=numerical_features)
def plot_dist_numfeat(feature):
    plt.figure(figsize=(6, 4))
    sns.boxplot(x=df["Churn"], y=df[feature])
    plt.title("Feature by class")
    plt.xlabel("Churn")
    plt.ylabel(feature)
    plt.show()

In [None]:
plt.figure(figsize=(10, 8))
sns.pairplot(df, hue="Churn")
plt.show()