In [71]:
import pandas as pd
import seaborn as sns
from scipy.stats import chi2_contingency

In [72]:
#1 Encode datasets

raw = pd.read_csv("dataset/adjusted.csv")
raw.shape

(54, 12)

In [73]:
raw.columns

Index(['timestamp', 'age', 'sex', 'occupation', 'must_use_phone',
       'screen_time', 'sleep_time', 'is_smoker', 'is_alcoholics',
       'is_consuming_special_medicine', 'is_screen_time_distract_sleep',
       'difference_before_pandemic'],
      dtype='object')

In [74]:
# Demography of the respondents

raw[["sex", "age"]].groupby("sex").count()

Unnamed: 0_level_0,age
sex,Unnamed: 1_level_1
Pria,38
Wanita,16


In [75]:
# Screen time description

raw[["screen_time", "sleep_time"]].describe()

Unnamed: 0,screen_time,sleep_time
count,54,54
unique,5,3
top,3 - 5 jam sehari,6 - 8 jam sehari
freq,20,34


In [76]:
raw[["screen_time", "sleep_time"]].groupby("sleep_time").describe()

Unnamed: 0_level_0,screen_time,screen_time,screen_time,screen_time
Unnamed: 0_level_1,count,unique,top,freq
sleep_time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
3 - 5 jam sehari,19,5,6 - 8 jam sehari,9
6 - 8 jam sehari,34,4,3 - 5 jam sehari,13
Kurang dari 2 jam sehari,1,1,Lebih dari 9 jam sehari,1


In [77]:
raw[["occupation", "sleep_time"]].groupby("occupation").describe()

Unnamed: 0_level_0,sleep_time,sleep_time,sleep_time,sleep_time
Unnamed: 0_level_1,count,unique,top,freq
occupation,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Mahasiswa,7,2,6 - 8 jam sehari,5
Media Massa,2,2,3 - 5 jam sehari,1
Otomasi industri,1,1,6 - 8 jam sehari,1
Pendidikan,8,3,3 - 5 jam sehari,4
Perbankan,3,1,6 - 8 jam sehari,3
Seni Kreatif dan Desain,6,2,3 - 5 jam sehari,3
Sosial,1,1,3 - 5 jam sehari,1
Teknologi Informasi,25,2,6 - 8 jam sehari,17
Telekomunikasi,1,1,6 - 8 jam sehari,1


In [78]:
raw.is_smoker.describe()

count                                          54
unique                                          2
top       Tidak, saya bukan seorang perokok aktif
freq                                           52
Name: is_smoker, dtype: object

In [79]:
raw.is_alcoholics.describe()

count        54
unique        2
top       Tidak
freq         52
Name: is_alcoholics, dtype: object

In [80]:
encoded = pd.read_csv("dataset/encoded.csv")
encoded.head()

Unnamed: 0,timestamp,age,sex,occupation,must_use_phone,screen_time,sleep_time,is_smoker,is_alcoholics,is_consuming_special_medicine,...,difference_before_pandemic,occupation_Mahasiswa,occupation_Media Massa,occupation_Otomasi industri,occupation_Pendidikan,occupation_Perbankan,occupation_Seni Kreatif dan Desain,occupation_Sosial,occupation_Teknologi Informasi,occupation_Telekomunikasi
0,10/31/2020 9:29:02,1,1,Seni Kreatif dan Desain,0,2,1,0,0,1,...,2.0,0,0,0,0,0,1,0,0,0
1,10/31/2020 9:30:06,1,1,Seni Kreatif dan Desain,1,2,1,0,0,0,...,2.0,0,0,0,0,0,1,0,0,0
2,10/31/2020 9:31:32,1,1,Teknologi Informasi,0,2,2,0,0,0,...,2.0,0,0,0,0,0,0,0,1,0
3,10/31/2020 9:37:07,2,1,Teknologi Informasi,1,4,2,0,0,0,...,2.0,0,0,0,0,0,0,0,1,0
4,10/31/2020 9:38:10,1,1,Teknologi Informasi,0,3,2,0,0,0,...,2.0,0,0,0,0,0,0,0,1,0


In [81]:
# sleep_time_with_age

sleep_time_with_age = pd.crosstab([raw.age], [raw.sleep_time])
sleep_time_with_age

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
15-20,12,17,1
20-25,6,15,0
25-30,0,2,0
>35,1,0,0


In [82]:
# Chi-square test of independence.
# Chi, P, Degree of Freedom, Expected
chi2_contingency(sleep_time_with_age)

(4.649668288367979,
 0.5894641009396733,
 6,
 array([[1.05555556e+01, 1.88888889e+01, 5.55555556e-01],
        [7.38888889e+00, 1.32222222e+01, 3.88888889e-01],
        [7.03703704e-01, 1.25925926e+00, 3.70370370e-02],
        [3.51851852e-01, 6.29629630e-01, 1.85185185e-02]]))

In [83]:
# sleep_time_with_screen_time

sleep_time_with_screen = pd.crosstab([raw.screen_time], [raw.sleep_time])
sleep_time_with_screen

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
screen_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1 - 2 jam sehari,1,2,0
3 - 5 jam sehari,7,13,0
6 - 8 jam sehari,9,8,0
Kurang dari 1 jam sehari,1,0,0
Lebih dari 9 jam sehari,1,11,1


In [84]:
chi2_contingency(sleep_time_with_screen)

(10.967176358516735,
 0.2035659776741628,
 8,
 array([[ 1.05555556,  1.88888889,  0.05555556],
        [ 7.03703704, 12.59259259,  0.37037037],
        [ 5.98148148, 10.7037037 ,  0.31481481],
        [ 0.35185185,  0.62962963,  0.01851852],
        [ 4.57407407,  8.18518519,  0.24074074]]))

In [85]:
# sleep_time_with_occupation

sleep_time_with_occupation = pd.crosstab([raw.occupation], [raw.sleep_time])
sleep_time_with_occupation

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
occupation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Mahasiswa,2,5,0
Media Massa,1,1,0
Otomasi industri,0,1,0
Pendidikan,4,3,1
Perbankan,0,3,0
Seni Kreatif dan Desain,3,3,0
Sosial,1,0,0
Teknologi Informasi,8,17,0
Telekomunikasi,0,1,0


In [86]:
chi2_contingency(sleep_time_with_occupation)

(12.79705661211853,
 0.6875344203676306,
 16,
 array([[ 2.46296296,  4.40740741,  0.12962963],
        [ 0.7037037 ,  1.25925926,  0.03703704],
        [ 0.35185185,  0.62962963,  0.01851852],
        [ 2.81481481,  5.03703704,  0.14814815],
        [ 1.05555556,  1.88888889,  0.05555556],
        [ 2.11111111,  3.77777778,  0.11111111],
        [ 0.35185185,  0.62962963,  0.01851852],
        [ 8.7962963 , 15.74074074,  0.46296296],
        [ 0.35185185,  0.62962963,  0.01851852]]))

In [68]:
# sleep_time_with_screen_time

sleep_time_with_forced_to_use_phone = pd.crosstab([raw.must_use_phone], [raw.sleep_time])
sleep_time_with_forced_to_use_phone

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
must_use_phone,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Tidak,6,19,0
Ya,13,15,1


In [67]:
print(chi2_contingency(sleep_time_with_forced_to_use_phone))

(3.773946834632219, 0.1515297327798016, 2, array([[ 8.7962963 , 15.74074074,  0.46296296],
       [10.2037037 , 18.25925926,  0.53703704]]))


In [62]:
# sleep_time_with_sex

sleep_time_with_sex = pd.crosstab([raw.sex], [raw.sleep_time])
sleep_time_with_sex

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Pria,14,24,0
Wanita,5,10,1


In [70]:
chi2_contingency(sleep_time_with_sex)

(2.475843245885612,
 0.289986292911004,
 2,
 array([[13.37037037, 23.92592593,  0.7037037 ],
        [ 5.62962963, 10.07407407,  0.2962963 ]]))

In [89]:
sleep_time_with_smoke_habits = pd.crosstab([raw.is_smoker], [raw.sleep_time])
sleep_time_with_smoke_habits

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
is_smoker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Tidak, saya bukan seorang perokok aktif",17,34,1
"Ya, saya seorang perokok aktif",2,0,0


In [91]:
chi2_contingency(sleep_time_with_smoke_habits)

(3.8259109311740893,
 0.14764338619667502,
 2,
 array([[18.2962963 , 32.74074074,  0.96296296],
        [ 0.7037037 ,  1.25925926,  0.03703704]]))

In [94]:
sleep_time_with_drink_habits = pd.crosstab([raw.is_alcoholics], [raw.sleep_time])
sleep_time_with_drink_habits

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
is_alcoholics,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Tidak,17,34,1
Ya,2,0,0


In [98]:
chi2_contingency(sleep_time_with_drink_habits)

(3.8259109311740893,
 0.14764338619667502,
 2,
 array([[18.2962963 , 32.74074074,  0.96296296],
        [ 0.7037037 ,  1.25925926,  0.03703704]]))

In [99]:
sleep_time_with_sleep_distracted = pd.crosstab([raw.is_screen_time_distract_sleep], [raw.sleep_time])
sleep_time_with_sleep_distracted

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
is_screen_time_distract_sleep,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Mungkin,8,17,0
Tidak,5,7,0
Ya,6,10,1


In [101]:
chi2_contingency(sleep_time_with_sleep_distracted)

(2.5797432161719174,
 0.6304156858135397,
 4,
 array([[ 8.7962963 , 15.74074074,  0.46296296],
        [ 4.22222222,  7.55555556,  0.22222222],
        [ 5.98148148, 10.7037037 ,  0.31481481]]))

In [103]:
sleep_time_with_screen_habits_change = pd.crosstab([raw.difference_before_pandemic], [raw.sleep_time])
sleep_time_with_screen_habits_change

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
difference_before_pandemic,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Tidak ada perubahan,6,6,0
"Ya, lebih sering menatap layar ponsel saat pandemi",13,26,1


In [105]:
chi2_contingency(sleep_time_with_screen_habits_change)

(1.4111842105263157,
 0.49381609656608805,
 2,
 array([[ 4.38461538,  7.38461538,  0.23076923],
        [14.61538462, 24.61538462,  0.76923077]]))

In [None]:
# P value tidak ada yang bernilai di bawah alpha normal yakni 0.05. Maka Variabel Dependen Sleep Time
# tidak berkorelasi dengan variabel bebas di atas. 

# Pengembangan selanjutnya bisa dilakukan dengan pengambilan data yang berbeda seperti screen time total 
# pada handphone, tv, laptop dan gadget ber-layar lainnya. 