In [1]:
import pandas as pd
import pickle as pkl
from tqdm.auto import tqdm
from collections import Counter

import warnings
warnings.filterwarnings('ignore')

In [3]:
df_survey = pd.read_csv("data/survey_answers.csv")
with open("data/survey_Q_dict.pkl", "rb") as f:
    q_dict = pkl.load(f)

In [4]:
q_dict

{'Q23': 'If you would like to participate in the survey, click on “Yes” below. With this you declare:\n\n- I am 16 years or older.\n\n- I have read and understood the information.- I understand that through the integration of the service provider Qualtrics LLC, it cannot be ruled out that my data is stored temporarily on non-EU servers.\n\n- I agree to participate in the study and to provide the data obtained with it.\n\n- I reserve the right to withdraw this consent without giving any reason. (until the full anonymization of the data)\n\n- I reserve the right to stop the study at any time I wish.',
 'Q7': 'How old are you?',
 'Q8': 'What is your level of education?',
 'Q9': 'What is your gender?',
 'Q10': 'What type of content do you receive on TikTok?',
 'Q28_1': 'Please express your degree of agreement or disagreement with the following statements: - TikTok is important for me to stay up to date with current affairs (politics, economics etc.).',
 'Q28_2': 'Please express your degree

In [5]:
df_survey.head(2)

Unnamed: 0,Q23,Q7,Q8,Q9,Q10,Q28_1,Q28_2,Q28_3,Q28_4,Q28_5,Q28_6,Q28_7,Q30,Random ID
0,"Yes, I participate",27 to 34,Master,Male,"Dance and Music,Sport,Entertaiment,Comedy and ...",Somewhat agree,Somewhat agree,Strongly disagree,Somewhat agree,Somewhat agree,Neither agree nor disagree,Somewhat agree,Yes,6266
1,"Yes, I participate",16 to 26,Master,Female,"Dance and Music,Sport,Entertaiment,Comedy and ...",Somewhat disagree,Somewhat agree,Strongly disagree,Strongly agree,Somewhat agree,Somewhat disagree,Somewhat agree,Yes,6592


- 'Q10': 'What type of content do you receive on TikTok?'
- Q28_1: TikTok is important for me to stay up to date with current affairs (politics, economics etc.).',
- Q28_2: TikTok is important for me to stay up to date with general affairs (celebrities, sports, etc.).',
- Q28_5: TikTok is important for me to learn new things (DIY, cooking etc.).',
- Q28_7: TikTok is showing me primarily informative content.',

In [6]:
df_info_qs = df_survey.loc[:,["Random ID","Q10","Q28_1","Q28_2","Q28_5","Q28_7"]]

In [7]:
df_info_qs.head(3)

Unnamed: 0,Random ID,Q10,Q28_1,Q28_2,Q28_5,Q28_7
0,6266,"Dance and Music,Sport,Entertaiment,Comedy and ...",Somewhat agree,Somewhat agree,Somewhat agree,Somewhat agree
1,6592,"Dance and Music,Sport,Entertaiment,Comedy and ...",Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat agree
2,6871,"Dance and Music,Entertaiment,Comedy and Drama,...",Somewhat disagree,Somewhat agree,Somewhat agree,Neither agree nor disagree


In [8]:
df_info_qs["info"] = df_info_qs['Q10'].str.get_dummies(sep=',')["Information"]


In [9]:
df_info_qs = df_info_qs.drop(columns="Q10")

In [10]:
dict_to_num = {"Strongly disagree":1,
               "Somewhat disagree":2,
               "Neither agree nor disagree":3,
               "Somewhat agree":4,
               "Strongly agree":5

}

In [11]:
df_info_qs["Q28_1"] = df_info_qs["Q28_1"].map(dict_to_num)
df_info_qs["Q28_2"] = df_info_qs["Q28_2"].map(dict_to_num)
df_info_qs["Q28_5"] = df_info_qs["Q28_5"].map(dict_to_num)
df_info_qs["Q28_7"] = df_info_qs["Q28_7"].map(dict_to_num)

In [12]:
df_info_qs.describe().round(3)

Unnamed: 0,Random ID,Q28_1,Q28_2,Q28_5,Q28_7,info
count,18.0,18.0,18.0,18.0,18.0,18.0
mean,5708.722,2.444,3.167,3.611,3.444,0.667
std,2510.06,1.338,1.383,1.092,1.149,0.485
min,1065.0,1.0,1.0,1.0,1.0,0.0
25%,4405.75,1.25,2.25,3.0,3.0,0.0
50%,6219.5,2.0,4.0,4.0,4.0,1.0
75%,7002.25,3.0,4.0,4.0,4.0,1.0
max,9228.0,5.0,5.0,5.0,5.0,1.0


In [13]:
# load user informtive consumption
df_sessions = pd.read_csv("data/session_df.csv")
df_sessions.head()

Unnamed: 0,Date,Action,Url,OperatingSystem,likes,v_ids,activity_time,ID,session,active_engament,video_length,wt_frac,passive_endorsement,class
0,2023-03-17 11:37:38,watching,https://www.tiktokv.com/share/video/6984429060...,,,6984429060914990342,26.0,6592,1,0,24.0,1.083333,0,0
1,2023-03-17 11:38:05,watching,https://www.tiktokv.com/share/video/7211168761...,,,7211168761695604014,20.0,6592,1,0,12.0,1.666667,0,0
2,2023-03-17 11:38:25,watching,https://www.tiktokv.com/share/video/7211204623...,,,7211204623296630022,6.0,6592,1,0,10.0,0.6,0,0
3,2023-03-17 11:38:34,watching,https://www.tiktokv.com/share/video/7194066849...,,,7194066849594756354,37.0,6592,1,0,26.0,1.423077,0,1
4,2023-03-17 11:39:11,watching,https://www.tiktokv.com/share/video/7208232985...,,,7208232985055841542,9.0,6592,1,0,15.0,0.6,0,0


In [14]:
frac_info_content = []
for i in tqdm(list(df_sessions["ID"].unique())):
    user_df = df_sessions[df_sessions["ID"]==i]
    user_watch_history_length = len(user_df)
    frac_info = Counter(user_df["class"])[1]/user_watch_history_length 
    frac_info_content.append(frac_info)

  0%|          | 0/18 [00:00<?, ?it/s]

In [15]:
df_info_qs

Unnamed: 0,Random ID,Q28_1,Q28_2,Q28_5,Q28_7,info
0,6266,4,4,4,4,1
1,6592,2,4,4,4,0
2,6871,2,4,4,3,1
3,9228,2,3,4,3,1
4,2354,3,4,4,5,0
5,4256,1,1,2,4,0
6,8613,1,1,4,3,1
7,4855,1,1,2,5,0
8,6173,2,4,3,4,0
9,3000,1,1,1,4,1


In [20]:
df_info_qs["info_frag_ddp"] = frac_info_content
df_corr = df_info_qs.drop(columns=["Random ID","info"])
df_corr.corr().round(3)


Unnamed: 0,Q28_1,Q28_2,Q28_5,Q28_7,info_frag_ddp
Q28_1,1.0,0.53,0.447,-0.633,0.268
Q28_2,0.53,1.0,0.669,-0.197,0.507
Q28_5,0.447,0.669,1.0,-0.37,0.226
Q28_7,-0.633,-0.197,-0.37,1.0,-0.179
info_frag_ddp,0.268,0.507,0.226,-0.179,1.0


In [21]:
from scipy.stats import pearsonr

In [22]:
pearsonr(df_corr["info_frag_ddp"], df_corr["Q28_1"])

PearsonRResult(statistic=0.2680619609781814, pvalue=0.28215900539363936)

In [23]:
pearsonr(df_corr["info_frag_ddp"], df_corr["Q28_2"])

PearsonRResult(statistic=0.50672524398283, pvalue=0.03186632296548283)

In [24]:
pearsonr(df_corr["info_frag_ddp"], df_corr["Q28_5"])

PearsonRResult(statistic=0.22632412571086907, pvalue=0.3664859965381587)

In [25]:
pearsonr(df_corr["info_frag_ddp"], df_corr["Q28_7"])

PearsonRResult(statistic=-0.17851753385619396, pvalue=0.47848760705546783)