In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt 
import seaborn as sns
import plotly.express as px 
import warnings 

warnings.simplefilter(action='ignore')

In [None]:
portfolio = pd.read_csv(r'../input/starbucks-customer-data/portfolio.csv') 
profile = pd.read_csv(r'../input/starbucks-customer-data/profile.csv')
transcript = pd.read_csv(r'../input/starbucks-customer-data/transcript.csv')

In [None]:
dfs = [transcript, portfolio, profile]
for i in dfs:
  print(f'{i.shape}')

In [None]:
for i in dfs:
  i.drop('Unnamed: 0', axis=1, inplace=True)

In [None]:
for i in dfs:
    print(f'Null values : {i.isna().sum()}')

In [None]:
df1 = portfolio.groupby(by=['offer_type', 'difficulty', 'reward', 'duration']).count().reset_index()
df1

In [None]:
fig = px.sunburst(df1, path=['offer_type', 'difficulty', 'reward', 'duration'], custom_data=['offer_type', 'difficulty', 'reward', 'duration'])
fig.update_layout(title='Offer type, Difficulty, Reward, Duration', template='plotly_dark', hoverlabel=dict(
    font_family='Arial',
    font_size=18,
))
fig.update_traces(hovertemplate='Offertype: %{customdata[0]} <br> Difficulty: %{customdata[1]} <br> Reward : %{customdata[2]} <br> Duration: %{customdata[3]}')
fig.show()

In [None]:
import re

In [None]:
email_srch = re.compile(r'\bemail\b')
web_srch = re.compile(r'\bweb\b')
mobile_srch = re.compile(r'\bmobile\b')
social_srch = re.compile(r'\bsocial\b')

email_list = []
web_list = []
mobile_list = []
social_list = []

for i in portfolio['channels']:
    if  email_srch.findall(i):
        match =  email_srch.findall(i)
        email_list.append(match[0])
    else:
        email_list.append('NaN')
    
    if  web_srch.findall(i):
        match =  web_srch.findall(i)
        web_list.append(match[0])
    else:
        web_list.append('NaN')
    
    if  mobile_srch.findall(i):
        match =  mobile_srch.findall(i)
        mobile_list.append(match[0])
    else:
        mobile_list.append('NaN')
    if  social_srch.findall(i):
        match =  social_srch.findall(i)
        social_list.append(match[0])
    else:
        social_list.append("NaN")

portfolio['email_channel'] = email_list
portfolio['mobile_channel'] = mobile_list
portfolio['social_channel'] = social_list
portfolio['web_channel'] = web_list

In [None]:
portfolio.drop('channels', axis=1, inplace=True)

In [None]:
portfolio.dtypes

In [None]:
fig = px.sunburst(portfolio, path=['offer_type','web_channel', 'mobile_channel', 'social_channel', 'email_channel'], custom_data=['offer_type', 'email_channel', 'mobile_channel', 'social_channel', 'web_channel'])
fig.update_layout(title='Offer type, Difficulty, Reward, Duration', template='plotly_dark', hoverlabel=dict(
    font_family='Arial',
    font_size=18,
))
fig.update_traces(hovertemplate='Offertype: %{customdata[0]} <br> Channels Used: %{customdata[1]}, %{customdata[2]}, %{customdata[3]}, %{customdata[4]}')
fig.show()

In [None]:
transcript.head()

In [None]:
transcript.shape

In [None]:
transcript['value'].value_counts()

In [None]:
values = transcript['event'].value_counts()
labels = transcript['event'].value_counts().index

fig = px.pie(transcript, names=labels, values=values)
fig.update_layout(title='Distribution of Event types', template='plotly_dark', hoverlabel=dict(
    font_size=18,
    font_family='Helvetica'
))
fig.update_traces(hovertemplate='Event : %{label} <br> Number of Instances: %{value}')
fig.show()

In [None]:
profile.dropna(inplace=True)

In [None]:
profile = profile.reset_index()

In [None]:
profile.isna().sum()

In [None]:
profile.head()

In [None]:
profile.dtypes

In [None]:
sns.distplot(profile, x=profile['age'])
plt.show()

In [None]:
len(profile['became_member_on'])

In [None]:
years = []
months = []
days = []
for i in profile['became_member_on']:
    i = str(i)
    years.append(int(i[:4]))
    months.append(int(i[4:6]))
    days.append(int(i[6:8]))

In [None]:
profile['years'] = years
profile['months'] = months
profile['days'] = days

In [None]:
profile.head()

In [None]:
import calendar
profile['months'] = profile['months'].apply(lambda x: calendar.month_abbr[x])

In [None]:
values = profile['gender'].value_counts()
labels = profile['gender'].value_counts().index

fig = px.pie(profile, names=labels, values=values)
fig.update_layout(title='Distibution of gender', template='plotly_dark')
fig.update_traces(hovertemplate='%{label} : %{value}')
fig.show()

In [None]:
values = profile['years'].value_counts()
labels = profile['years'].value_counts().index

fig = px.pie(profile, names=labels, values=values)
fig.update_layout(title='Distibution of years', template='plotly_dark')
fig.update_traces(hovertemplate='%{label} : %{value}')
fig.show()

In [None]:
def year_wise(year):
    profile_df = profile[profile['years']==year]
    values = profile_df['months'].value_counts()
    labels = profile_df['months'].value_counts().index

    fig = px.bar(profile_df, x=labels, y=values, color_discrete_sequence=['lightgreen'])
    fig.update_layout(title=f'Customers data in the year {year}', template='plotly_dark')
    fig.update_traces(hovertemplate='Month : %{x} <br>Customers:  %{y}')
    fig.update_xaxes(title='Month', showline=True, linewidth=1, linecolor='white')
    fig.update_yaxes(title='Count', showgrid=False,showline=True, linewidth=1, linecolor='white')
    return fig.show()

In [None]:
year_wise(2015)

In [None]:
def month_wise(month, year):
    profile_df = profile[(profile['years']==year) & (profile['months']==month)]
    values = profile_df['days'].value_counts()
    labels = profile_df['days'].value_counts().index

    fig = px.bar(profile_df, x=labels, y=values, color_discrete_sequence=['aqua'])
    fig.update_layout(title=f'Customer offers in {month} in {year}', template='plotly_dark')
    fig.update_traces(hovertemplate='Day : %{x} <br>Customers:  %{y}')
    fig.update_xaxes(title='Day', showline=True, linewidth=1, linecolor='white')
    fig.update_yaxes(title='Count', showgrid=False,showline=True, linewidth=1, linecolor='white')
    return fig.show()

In [None]:
month_wise('Apr', 2018)