In [33]:
import pandas as pd
import seaborn as sns
from scipy.stats import chi2_contingency

In [34]:
#1 Encode datasets

raw = pd.read_csv("dataset/adjusted.csv")
raw.shape

(47, 12)

In [35]:
raw.columns

Index(['timestamp', 'age', 'sex', 'occupation', 'must_use_phone',
       'screen_time', 'sleep_time', 'is_smoker', 'is_alcoholics',
       'is_consuming_special_medicine', 'is_screen_time_distract_sleep',
       'difference_before_pandemic'],
      dtype='object')

In [36]:
# Demography of the respondents

raw["age"].describe()

count        47
unique        4
top       15-20
freq         24
Name: age, dtype: object

In [37]:
# Screen time description

raw[["screen_time", "sleep_time"]].describe()

Unnamed: 0,screen_time,sleep_time
count,47,47
unique,5,3
top,3 - 5 jam sehari,6 - 8 jam sehari
freq,19,29


In [38]:
raw[["screen_time", "sleep_time"]].groupby("sleep_time").describe()

Unnamed: 0_level_0,screen_time,screen_time,screen_time,screen_time
Unnamed: 0_level_1,count,unique,top,freq
sleep_time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
3 - 5 jam sehari,17,4,6 - 8 jam sehari,8
6 - 8 jam sehari,29,4,3 - 5 jam sehari,12
Kurang dari 2 jam sehari,1,1,Lebih dari 9 jam sehari,1


In [39]:
raw[["occupation", "sleep_time"]].groupby("occupation").describe()

Unnamed: 0_level_0,sleep_time,sleep_time,sleep_time,sleep_time
Unnamed: 0_level_1,count,unique,top,freq
occupation,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Media Massa,2,2,3 - 5 jam sehari,1
Otomasi industri,1,1,6 - 8 jam sehari,1
Pendidikan,8,3,3 - 5 jam sehari,4
Perbankan,3,1,6 - 8 jam sehari,3
Seni Kreatif dan Desain,6,2,3 - 5 jam sehari,3
Sosial,1,1,3 - 5 jam sehari,1
Teknologi Informasi,25,2,6 - 8 jam sehari,17
Telekomunikasi,1,1,6 - 8 jam sehari,1


In [40]:
raw.is_smoker.describe()

count                                          47
unique                                          2
top       Tidak, saya bukan seorang perokok aktif
freq                                           45
Name: is_smoker, dtype: object

In [41]:
raw.is_alcoholics.describe()

count        47
unique        2
top       Tidak
freq         45
Name: is_alcoholics, dtype: object

In [42]:
encoded = pd.read_csv("dataset/encoded.csv")
encoded.head()

Unnamed: 0,timestamp,age,sex,occupation,must_use_phone,screen_time,sleep_time,is_smoker,is_alcoholics,is_consuming_special_medicine,...,difference_before_pandemic,occupation_Mahasiswa,occupation_Media Massa,occupation_Otomasi industri,occupation_Pendidikan,occupation_Perbankan,occupation_Seni Kreatif dan Desain,occupation_Sosial,occupation_Teknologi Informasi,occupation_Telekomunikasi
0,10/31/2020 9:29:02,1,1,Seni Kreatif dan Desain,0,2,1,0,0,1,...,2.0,0,0,0,0,0,1,0,0,0
1,10/31/2020 9:30:06,1,1,Seni Kreatif dan Desain,1,2,1,0,0,0,...,2.0,0,0,0,0,0,1,0,0,0
2,10/31/2020 9:31:32,1,1,Teknologi Informasi,0,2,2,0,0,0,...,2.0,0,0,0,0,0,0,0,1,0
3,10/31/2020 9:37:07,2,1,Teknologi Informasi,1,4,2,0,0,0,...,2.0,0,0,0,0,0,0,0,1,0
4,10/31/2020 9:38:10,1,1,Teknologi Informasi,0,3,2,0,0,0,...,2.0,0,0,0,0,0,0,0,1,0


In [43]:
# sleep_time_with_age

sleep_time_with_age = pd.crosstab([raw.age], [raw.sleep_time])
sleep_time_with_age

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
15-20,10,13,1
20-25,6,14,0
25-30,0,2,0
>35,1,0,0


In [44]:
# Chi-square test of independence.
# Chi, P, Degree of Freedom, Expected
chi2_contingency(sleep_time_with_age)

(4.755611899932386,
 0.5755179064165603,
 6,
 array([[ 8.68085106, 14.80851064,  0.5106383 ],
        [ 7.23404255, 12.34042553,  0.42553191],
        [ 0.72340426,  1.23404255,  0.04255319],
        [ 0.36170213,  0.61702128,  0.0212766 ]]))

In [45]:
# sleep_time_with_screen_time

sleep_time_with_screen = pd.crosstab([raw.screen_time], [raw.sleep_time])
sleep_time_with_screen

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
screen_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1 - 2 jam sehari,1,2,0
3 - 5 jam sehari,7,12,0
6 - 8 jam sehari,8,8,0
Kurang dari 1 jam sehari,1,0,0
Lebih dari 9 jam sehari,0,7,1


In [46]:
chi2_contingency(sleep_time_with_screen)

(11.603652894914774,
 0.16978314844220832,
 8,
 array([[ 1.08510638,  1.85106383,  0.06382979],
        [ 6.87234043, 11.72340426,  0.40425532],
        [ 5.78723404,  9.87234043,  0.34042553],
        [ 0.36170213,  0.61702128,  0.0212766 ],
        [ 2.89361702,  4.93617021,  0.17021277]]))

In [47]:
# sleep_time_with_occupation

sleep_time_with_occupation = pd.crosstab([raw.occupation], [raw.sleep_time])
sleep_time_with_occupation

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
occupation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Media Massa,1,1,0
Otomasi industri,0,1,0
Pendidikan,4,3,1
Perbankan,0,3,0
Seni Kreatif dan Desain,3,3,0
Sosial,1,0,0
Teknologi Informasi,8,17,0
Telekomunikasi,0,1,0


In [48]:
chi2_contingency(sleep_time_with_occupation)

(11.679452332657203,
 0.6320278624694696,
 14,
 array([[ 0.72340426,  1.23404255,  0.04255319],
        [ 0.36170213,  0.61702128,  0.0212766 ],
        [ 2.89361702,  4.93617021,  0.17021277],
        [ 1.08510638,  1.85106383,  0.06382979],
        [ 2.17021277,  3.70212766,  0.12765957],
        [ 0.36170213,  0.61702128,  0.0212766 ],
        [ 9.04255319, 15.42553191,  0.53191489],
        [ 0.36170213,  0.61702128,  0.0212766 ]]))

In [49]:
# sleep_time_with_screen_time

sleep_time_with_forced_to_use_phone = pd.crosstab([raw.must_use_phone], [raw.sleep_time])
sleep_time_with_forced_to_use_phone

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
must_use_phone,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Tidak,6,18,0
Ya,11,11,1


In [50]:
print(chi2_contingency(sleep_time_with_forced_to_use_phone))

(4.140841344033865, 0.1261327100414052, 2, array([[ 8.68085106, 14.80851064,  0.5106383 ],
       [ 8.31914894, 14.19148936,  0.4893617 ]]))


In [51]:
# sleep_time_with_sex

sleep_time_with_sex = pd.crosstab([raw.sex], [raw.sleep_time])
sleep_time_with_sex

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Pria,14,20,0
Wanita,3,9,1


In [52]:
chi2_contingency(sleep_time_with_sex)

(3.6322083834313874,
 0.16265820335341669,
 2,
 array([[12.29787234, 20.9787234 ,  0.72340426],
        [ 4.70212766,  8.0212766 ,  0.27659574]]))

In [53]:
sleep_time_with_smoke_habits = pd.crosstab([raw.is_smoker], [raw.sleep_time])
sleep_time_with_smoke_habits

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
is_smoker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Tidak, saya bukan seorang perokok aktif",15,29,1
"Ya, saya seorang perokok aktif",2,0,0


In [54]:
chi2_contingency(sleep_time_with_smoke_habits)

(3.6862745098039214,
 0.1583199561114644,
 2,
 array([[16.27659574, 27.76595745,  0.95744681],
        [ 0.72340426,  1.23404255,  0.04255319]]))

In [55]:
sleep_time_with_drink_habits = pd.crosstab([raw.is_alcoholics], [raw.sleep_time])
sleep_time_with_drink_habits

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
is_alcoholics,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Tidak,15,29,1
Ya,2,0,0


In [56]:
chi2_contingency(sleep_time_with_drink_habits)

(3.6862745098039214,
 0.1583199561114644,
 2,
 array([[16.27659574, 27.76595745,  0.95744681],
        [ 0.72340426,  1.23404255,  0.04255319]]))

In [57]:
sleep_time_with_sleep_distracted = pd.crosstab([raw.is_screen_time_distract_sleep], [raw.sleep_time])
sleep_time_with_sleep_distracted

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
is_screen_time_distract_sleep,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Mungkin,8,14,0
Tidak,5,7,0
Ya,4,8,1


In [58]:
chi2_contingency(sleep_time_with_sleep_distracted)

(2.856151623900102,
 0.5821793511658522,
 4,
 array([[ 7.95744681, 13.57446809,  0.46808511],
        [ 4.34042553,  7.40425532,  0.25531915],
        [ 4.70212766,  8.0212766 ,  0.27659574]]))

In [59]:
sleep_time_with_screen_habits_change = pd.crosstab([raw.difference_before_pandemic], [raw.sleep_time])
sleep_time_with_screen_habits_change

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
difference_before_pandemic,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Tidak ada perubahan,6,5,0
"Ya, lebih sering menatap layar ponsel saat pandemi",11,22,1


In [60]:
chi2_contingency(sleep_time_with_screen_habits_change)

(1.9204152249134947,
 0.3828134008937157,
 2,
 array([[ 4.15555556,  6.6       ,  0.24444444],
        [12.84444444, 20.4       ,  0.75555556]]))

In [61]:
# P value tidak ada yang bernilai di bawah alpha normal yakni 0.05. Maka Variabel Dependen Sleep Time
# tidak berkorelasi dengan variabel bebas di atas. 

# Pengembangan selanjutnya bisa dilakukan dengan pengambilan data yang berbeda seperti screen time total 
# pada handphone, tv, laptop dan gadget ber-layar lainnya. 