# Prevalence of topics & visualizations


Copyright 2023 Maria Lima  
(mr3418@ic.ac.uk)

Last updated: 10/06/2023

### Dependencies

In [13]:
import pickle
import numpy as np
import pandas as pd
import pylab as plt
import altair as alt 
from utils import alexa_usage as ale
from utils import novelty_effect as nov

### Load and process data

In [11]:
PATH = './datasets/'
df_alexa = pickle.load(open(PATH + 'df_alexa.pkl', 'rb'))
df_topics = pickle.load(open(PATH + 'df_topic_embed.pkl', 'rb'))
df_act = pickle.load(open(PATH + 'df_activity.pkl', 'rb'))
topicID = pickle.load(open(PATH + 'df_topic_pred.pkl', 'rb'))
df_alexa.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8491 entries, 0 to 831
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   patient_id  8491 non-null   object        
 1   timeframe   8491 non-null   datetime64[ns]
 2   int_type    8491 non-null   object        
 3   date        8491 non-null   object        
dtypes: datetime64[ns](1), object(3)
memory usage: 331.7+ KB


In [6]:
df_all = ale.get_usage_all(df_alexa)
df_all =df_all.drop(df_all.index[df_all.patient_id == 'P12'][:3]) 
len(df_all[df_all.int_type == 'random'])

6965

In [7]:
df_topics_ = ale.get_usage_topics(df_topics)
df_topics_.topic.unique()

array(['Undefined', 'Weather', 'Entertainment', 'Control',
       'Reminders/Time/Date', 'Answers', 'Attempt questionnaire', 'News',
       'Greetings', 'Timers'], dtype=object)

## Topics prevalence

##### 1) Plot with normalized x-axis 

In [6]:
figs = []
ax1, ax2, ax3, ax4 = 0,1,2,3
for (i,u),ax in zip(enumerate(['P2','P6','P12','P14']),[ax1,ax2,ax3,ax4]):
    ax = ale.plot_topics_monthly(df_topics_, u)
    figs.append(ax)
alt.vconcat(figs[0], figs[1], figs[2], figs[3]).configure_axis(
    labelFontSize=12
    ).configure_title(fontSize=15)

##### 2) % time slots of certain topics per user per novelty/post-novelty phase

In [8]:
df_topics_.topic.unique()

array(['Undefined', 'Weather', 'Entertainment', 'Control',
       'Reminders/Time/Date', 'Answers', 'Attempt questionnaire', 'News',
       'Greetings', 'Timers'], dtype=object)

In [9]:
# the two last numbers represent the total triggers for novelty/post-novelty phase
nov.topics_novelty(df=df_topics_, pid='P2', topic='Weather')
nov.topics_novelty(df=df_topics_, pid='P2', topic='Timers')
nov.topics_novelty(df=df_topics_, pid='P6', topic='Entertainment')
nov.topics_novelty(df=df_topics_, pid='P6', topic='Reminders/Time/Date')
nov.topics_novelty(df=df_topics_, pid='P12', topic='Weather')
nov.topics_novelty(df=df_topics_, pid='P14', topic='News')

([(50.0, 0.0, 50.0), (66.67, 33.33, 0.0)], 4, 9)