App user segmentation is the task of grouping users based on how they engage with the app. It helps find retained users, find the user segment for a marketing campaign, and solve many other business problems where you need to search for users based on similar characteristics. 

In [2]:
import numpy as np
import pandas as pd

In [1]:
import zipfile
with zipfile.ZipFile('/content/user-behaviour.zip') as zip_file:
    file_list = zip_file.namelist()

file_list

['user behaviour/',
 'user behaviour/userbehaviour.csv',
 '__MACOSX/user behaviour/._userbehaviour.csv']

In [3]:
import pandas as pd
with zipfile.ZipFile('/content/user-behaviour.zip') as zip_file:
    df = pd.read_csv(zip_file.open('user behaviour/userbehaviour.csv'))

In [4]:
df.head()

Unnamed: 0,userid,Average Screen Time,Average Spent on App (INR),Left Review,Ratings,New Password Request,Last Visited Minutes,Status
0,1001,17.0,634.0,1,9,7,2990,Installed
1,1002,0.0,54.0,0,4,8,24008,Uninstalled
2,1003,37.0,207.0,0,8,5,971,Installed
3,1004,32.0,445.0,1,6,2,799,Installed
4,1005,45.0,427.0,1,5,6,3668,Installed


In [5]:
df.shape

(999, 8)

In [6]:
df.isna().sum()

userid                        0
Average Screen Time           0
Average Spent on App (INR)    0
Left Review                   0
Ratings                       0
New Password Request          0
Last Visited Minutes          0
Status                        0
dtype: int64

In [7]:
df.describe()

Unnamed: 0,userid,Average Screen Time,Average Spent on App (INR),Left Review,Ratings,New Password Request,Last Visited Minutes
count,999.0,999.0,999.0,999.0,999.0,999.0,999.0
mean,1500.0,24.39039,424.415415,0.497497,6.513514,4.941942,5110.898899
std,288.530761,14.235415,312.365695,0.500244,2.701511,2.784626,8592.036516
min,1001.0,0.0,0.0,0.0,0.0,1.0,201.0
25%,1250.5,12.0,96.0,0.0,5.0,3.0,1495.5
50%,1500.0,24.0,394.0,0.0,7.0,5.0,2865.0
75%,1749.5,36.0,717.5,1.0,9.0,7.0,4198.0
max,1999.0,50.0,998.0,1.0,10.0,15.0,49715.0


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 999 entries, 0 to 998
Data columns (total 8 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   userid                      999 non-null    int64  
 1   Average Screen Time         999 non-null    float64
 2   Average Spent on App (INR)  999 non-null    float64
 3   Left Review                 999 non-null    int64  
 4   Ratings                     999 non-null    int64  
 5   New Password Request        999 non-null    int64  
 6   Last Visited Minutes        999 non-null    int64  
 7   Status                      999 non-null    object 
dtypes: float64(2), int64(5), object(1)
memory usage: 62.6+ KB


In [9]:
df['Status'].value_counts()

Installed      916
Uninstalled     83
Name: Status, dtype: int64

Let’s start by looking at the highest, lowest, and average screen time of all the users:



In [17]:
print(f'Average screen time : {df["Average Screen Time"].mean()}')
print(f'Maximum screen time : {df["Average Screen Time"].max()}')
print(f'Minimum screen time : {df["Average Screen Time"].min()}')

Average screen time : 24.39039039039039
Maximum screen time : 50.0
Minimum screen time : 0.0


Now let’s have a look at the highest, lowest, and the average amount spent by all the users:



In [18]:
print(f'Average spend of user : {df["Average Spent on App (INR)"].mean()}')
print(f'Maximum spend of user : {df["Average Spent on App (INR)"].max()}')
print(f'Minimum spend of user : {df["Average Spent on App (INR)"].min()}')

Average spend of user : 424.4154154154154
Maximum spend of user : 998.0
Minimum spend of user : 0.0


Now let’s move forward to App User segmentation to find the users that the app retained and lost forever. I will be using the K-means clustering algorithm in Machine Learning for this task:

In [23]:
df.columns

Index(['userid', 'Average Screen Time', 'Average Spent on App (INR)',
       'Left Review', 'Ratings', 'New Password Request',
       'Last Visited Minutes', 'Status'],
      dtype='object')

In [24]:
clustering_data = df[['Average Screen Time', 'Average Spent on App (INR)','Left Review', 'Ratings', 'New Password Request','Last Visited Minutes']]

In [27]:
from sklearn.preprocessing import MinMaxScaler
for i in clustering_data.columns:
    MinMaxScaler(i)

In [28]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=3)
clusters = kmeans.fit_predict(clustering_data)
df["Segments"] = clusters



In [30]:
df.head(10)

Unnamed: 0,userid,Average Screen Time,Average Spent on App (INR),Left Review,Ratings,New Password Request,Last Visited Minutes,Status,Segments
0,1001,17.0,634.0,1,9,7,2990,Installed,1
1,1002,0.0,54.0,0,4,8,24008,Uninstalled,2
2,1003,37.0,207.0,0,8,5,971,Installed,1
3,1004,32.0,445.0,1,6,2,799,Installed,1
4,1005,45.0,427.0,1,5,6,3668,Installed,1
5,1006,28.0,599.0,0,9,4,2878,Installed,1
6,1007,49.0,887.0,1,9,6,4481,Installed,1
7,1008,8.0,31.0,0,2,1,1715,Installed,1
8,1009,28.0,741.0,1,8,2,801,Installed,1
9,1010,28.0,524.0,1,8,4,4621,Installed,1


In [31]:
df['Segments'].value_counts()

1    910
0     45
2     44
Name: Segments, dtype: int64

In [32]:
df["Segments"].replace([0,1,2],["Retained","Churn","Needs Attention"],inplace=True)

In [33]:
df['Segments'].value_counts()

Churn              910
Retained            45
Needs Attention     44
Name: Segments, dtype: int64

In [43]:
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio

In [45]:
PLOT = go.Figure()
for i in list(df["Segments"].unique()):
    

    PLOT.add_trace(go.Scatter(x = df[df["Segments"]== i]['Last Visited Minutes'],y = df[df["Segments"] == i]['Average Spent on App (INR)'],
                                mode = 'markers',marker_size = 6, marker_line_width = 1,
                                name = str(i)))
    
PLOT.update_traces(hovertemplate='Last Visited Minutes: %{x} <br>Average Spent on App (INR): %{y}')

    
PLOT.update_layout(width = 800, height = 800, autosize = True, showlegend = True,
                   yaxis_title = 'Average Spent on App (INR)',
                   xaxis_title = 'Last Visited Minutes',
                   scene = dict(xaxis=dict(title = 'Last Visited Minutes', titlefont_color = 'black'),
                                yaxis=dict(title = 'Average Spent on App (INR)', titlefont_color = 'black')))

# **Summary**
So this is how you can segment users based on how they engage with the app. App users segmentation helps businesses find retained users, find the user segment for a marketing campaign, and solve many other business problems where you need to search for users based on similar characteristics.