In [2]:
import pandas as pd
import seaborn as sns

In [3]:
#1 Encode datasets

raw = pd.read_csv("dataset/adjusted.csv")
raw.shape

(38, 12)

In [4]:
raw.columns

Index(['timestamp', 'age', 'sex', 'occupation', 'must_use_phone',
       'screen_time', 'sleep_time', 'is_smoker', 'is_alcoholics',
       'is_consuming_special_medicine', 'is_screen_time_distract_sleep',
       'difference_before_pandemic'],
      dtype='object')

In [5]:
# Demography of the respondents

raw[["sex", "age"]].groupby("sex").count()

Unnamed: 0_level_0,age
sex,Unnamed: 1_level_1
Pria,27
Wanita,11


In [6]:
# Screen time description

raw[["screen_time", "sleep_time"]].describe()

Unnamed: 0,screen_time,sleep_time
count,38,38
unique,5,2
top,6 - 8 jam sehari,6 - 8 jam sehari
freq,14,25


In [7]:
raw[["screen_time", "sleep_time"]].groupby("sleep_time").describe()

Unnamed: 0_level_0,screen_time,screen_time,screen_time,screen_time
Unnamed: 0_level_1,count,unique,top,freq
sleep_time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
3 - 5 jam sehari,13,5,6 - 8 jam sehari,6
6 - 8 jam sehari,25,4,3 - 5 jam sehari,10


In [8]:
raw[["occupation", "sleep_time"]].groupby("occupation").describe()

Unnamed: 0_level_0,sleep_time,sleep_time,sleep_time,sleep_time
Unnamed: 0_level_1,count,unique,top,freq
occupation,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Mahasiswa,5,2,6 - 8 jam sehari,3
Media Massa,2,2,6 - 8 jam sehari,1
Otomasi industri,1,1,6 - 8 jam sehari,1
Pendidikan,3,2,6 - 8 jam sehari,2
Perbankan,3,1,6 - 8 jam sehari,3
Seni Kreatif dan Desain,4,2,3 - 5 jam sehari,3
Teknologi Informasi,19,2,6 - 8 jam sehari,13
Telekomunikasi,1,1,6 - 8 jam sehari,1


In [9]:
raw.is_smoker.describe()

count                                          38
unique                                          2
top       Tidak, saya bukan seorang perokok aktif
freq                                           36
Name: is_smoker, dtype: object

In [10]:
raw.is_alcoholics.describe()

count        38
unique        2
top       Tidak
freq         36
Name: is_alcoholics, dtype: object

In [16]:
encoded = pd.read_csv("dataset/encoded.csv")
encoded.head()

Unnamed: 0,timestamp,age,sex,occupation,must_use_phone,screen_time,sleep_time,is_smoker,is_alcoholics,is_consuming_special_medicine,is_screen_time_distract_sleep,difference_before_pandemic,occupation_Mahasiswa,occupation_Media Massa,occupation_Otomasi industri,occupation_Pendidikan,occupation_Perbankan,occupation_Seni Kreatif dan Desain,occupation_Teknologi Informasi,occupation_Telekomunikasi
0,10/31/2020 9:29:02,1,1,Seni Kreatif dan Desain,0,2,1,0,0,1,2,2.0,0,0,0,0,0,1,0,0
1,10/31/2020 9:30:06,1,1,Seni Kreatif dan Desain,1,2,1,0,0,0,2,2.0,0,0,0,0,0,1,0,0
2,10/31/2020 9:31:32,1,1,Teknologi Informasi,0,2,2,0,0,0,1,2.0,0,0,0,0,0,0,1,0
3,10/31/2020 9:37:07,2,1,Teknologi Informasi,1,4,2,0,0,0,1,2.0,0,0,0,0,0,0,1,0
4,10/31/2020 9:38:10,1,1,Teknologi Informasi,0,3,2,0,0,0,1,2.0,0,0,0,0,0,0,1,0


In [55]:
# sleep_time_with_age

sleep_time_with_age = pd.crosstab([raw.age], [raw.sleep_time])
sleep_time_with_age

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari
age,Unnamed: 1_level_1,Unnamed: 2_level_1
15-20,7,9
20-25,5,14
25-30,0,2
>35,1,0


In [56]:
# Chi-square test of independence.
c, p, dof, expected = chi2_contingency(sleep_time_with_age)
print(c)
print(p)
print(dof)
print(expected)

4.136153846153847
0.24713233982354563
3
[[ 5.47368421 10.52631579]
 [ 6.5        12.5       ]
 [ 0.68421053  1.31578947]
 [ 0.34210526  0.65789474]]


In [60]:
# sleep_time_with_screen_time

sleep_time_with_screen = pd.crosstab([raw.screen_time], [raw.sleep_time])
sleep_time_with_screen

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari
screen_time,Unnamed: 1_level_1,Unnamed: 2_level_1
1 - 2 jam sehari,1,2
3 - 5 jam sehari,4,10
6 - 8 jam sehari,6,8
Kurang dari 1 jam sehari,1,0
Lebih dari 9 jam sehari,1,5


In [62]:
c, p, dof, expected = chi2_contingency(sleep_time_with_screen)
print(c)
print(p)
print(dof)
print(expected)

3.4074725274725277
0.49208606648283415
4
[[1.02631579 1.97368421]
 [4.78947368 9.21052632]
 [4.78947368 9.21052632]
 [0.34210526 0.65789474]
 [2.05263158 3.94736842]]


In [65]:
# sleep_time_with_screen_time

sleep_time_with_forced_to_use_phone = pd.crosstab([raw.must_use_phone], [raw.sleep_time])
sleep_time_with_forced_to_use_phone

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari
must_use_phone,Unnamed: 1_level_1,Unnamed: 2_level_1
Tidak,4,13
Ya,9,12


In [67]:
c, p, dof, expected = chi2_contingency(sleep_time_with_forced_to_use_phone)
print(c)
print(p)
print(dof)
print(expected)

0.8187890540831718
0.3655344559795839
1
[[ 5.81578947 11.18421053]
 [ 7.18421053 13.81578947]]


In [69]:
# sleep_time_with_sex

sleep_time_with_sex = pd.crosstab([raw.sex], [raw.sleep_time])
sleep_time_with_sex

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari
sex,Unnamed: 1_level_1,Unnamed: 2_level_1
Pria,9,18
Wanita,4,7


In [70]:
c, p, dof, expected = chi2_contingency(sleep_time_with_sex)
print(c)
print(p)
print(dof)
print(expected)

0.03936803936803942
0.8427213146597374
1
[[ 9.23684211 17.76315789]
 [ 3.76315789  7.23684211]]
