In [2]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import numpy as np
from tqdm import tqdm
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import seaborn as sns

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly
import plotly.plotly as py
import plotly.figure_factory as ff
import plotly.graph_objs as go

import warnings
warnings.simplefilter('ignore')

%matplotlib inline
plt.rcParams['figure.figsize'] = (6,4)
plt.rcParams['figure.dpi'] = 150

init_notebook_mode(connected=True)

In [2]:
csv_path = '/home/roman/data/telecom_churn.csv'
df = pd.read_csv(csv_path)

df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3333 entries, 0 to 3332
Data columns (total 21 columns):
state                     3333 non-null object
account length            3333 non-null int64
area code                 3333 non-null int64
phone number              3333 non-null object
international plan        3333 non-null object
voice mail plan           3333 non-null object
number vmail messages     3333 non-null int64
total day minutes         3333 non-null float64
total day calls           3333 non-null int64
total day charge          3333 non-null float64
total eve minutes         3333 non-null float64
total eve calls           3333 non-null int64
total eve charge          3333 non-null float64
total night minutes       3333 non-null float64
total night calls         3333 non-null int64
total night charge        3333 non-null float64
total intl minutes        3333 non-null float64
total intl calls          3333 non-null int64
total intl charge         3333 non-null float64

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3333 entries, 0 to 3332
Data columns (total 21 columns):
state                     3333 non-null object
account length            3333 non-null int64
area code                 3333 non-null int64
phone number              3333 non-null object
international plan        3333 non-null object
voice mail plan           3333 non-null object
number vmail messages     3333 non-null int64
total day minutes         3333 non-null float64
total day calls           3333 non-null int64
total day charge          3333 non-null float64
total eve minutes         3333 non-null float64
total eve calls           3333 non-null int64
total eve charge          3333 non-null float64
total night minutes       3333 non-null float64
total night calls         3333 non-null int64
total night charge        3333 non-null float64
total intl minutes        3333 non-null float64
total intl calls          3333 non-null int64
total intl charge         3333 non-null float64

# ff.create_2d_density

In [4]:
def simple_2d_density(x, y, title, to_save=False):
    filename = title if to_save else None
    plot_function = plot if to_save else iplot
    plot_function(ff.create_2d_density(
                        x=x,
                        y=y,
                        title=title
                    ), filename=filename)

In [5]:
def multy_2d_density(columns, split_by=None, to_save=False):
    for i in range(len(columns)):
        for j in range(i + 1, len(columns)):
            if not split_by:
                simple_2d_density(
                    x=df[columns[i]], 
                    y=df[columns[j]], 
                    title=f'(X) {columns[i]} VS {columns[j]} (Y)',
                    to_save=to_save
                )
            else:
                split_by_values = df[split_by].unique()
                for v in split_by_values:
                    df_tmp = df[df[split_by] == v]
                    simple_2d_density(
                        x=df_tmp[columns[i]], 
                        y=df_tmp[columns[j]], 
                        title=f'(X) {columns[i]} VS {columns[j]} (Y); {split_by}={v}',
                        to_save=to_save
                    )

### 2 columns, not splitted

In [30]:
x_name = 'total day minutes'
y_name = 'total night minutes'

simple_2d_density(
    x=df[x_name],
    y=df[y_name],
    title=f'(X) {x_name} VS {y_name} (Y)'
)

### 2 columns, splitted by target

In [18]:
multy_2d_density(['total day minutes', 'total night minutes'], split_by='churn')

### 3 columns, splitted by target

In [19]:
multy_2d_density(['total day minutes', 'total eve minutes', 'total night minutes'], split_by='churn')

### 2 columns, splitted by target, saving to html

In [33]:
multy_2d_density(['total day minutes', 'total night minutes'], 
                 split_by='churn',
                 to_save=True)

# Generate all possible pairs and plot them separately

In [34]:
def gen_pairs_from(columns_list):
    result = []
    for i in range(len(columns_list)):
        for j in range(i + 1, len(columns_list)):
            result.append((columns_list[i], columns_list[j]))
    return result

In [42]:
c_pairs = gen_pairs_from([
    'total day minutes',
    'total night minutes',
    'total day calls',
    'total night calls'
])

In [45]:
if len(c_pairs) > 0:
    multy_2d_density(c_pairs.pop(0), split_by='churn')

In [18]:
x = np.random.normal(0, 1, 5000)
y = np.random.normal(0, 1, 5000)

simple_2d_density(x, y, 'title')

In [20]:
xx, yy = np.meshgrid(np.linspace(-5, 5, 500), np.linspace(-5, 5, 500))
# Generate train data
X = 0.3 * np.random.randn(100, 2)
X_train = np.r_[X + 2, X - 2]
# Generate some regular novel observations
X = 0.3 * np.random.randn(20, 2)
X_test = np.r_[X + 2, X - 2]

simple_2d_density(x=X_train[0], y=X_train[1], title='title')