In [1]:
import pandas as pd
import seaborn as sns
from scipy.stats import chi2_contingency

In [2]:
#1 Encode datasets

raw = pd.read_csv("dataset/adjusted.csv")
raw.shape

(46, 12)

In [3]:
raw.columns

Index(['timestamp', 'age', 'sex', 'occupation', 'must_use_phone',
       'screen_time', 'sleep_time', 'is_smoker', 'is_alcoholics',
       'is_consuming_special_medicine', 'is_screen_time_distract_sleep',
       'difference_before_pandemic'],
      dtype='object')

In [4]:
# Demography of the respondents

raw["age"].describe()

count        46
unique        3
top       15-20
freq         24
Name: age, dtype: object

In [5]:
# Screen time description

raw[["screen_time", "sleep_time"]].describe()

Unnamed: 0,screen_time,sleep_time
count,46,46
unique,4,3
top,3 - 5 jam sehari,6 - 8 jam sehari
freq,19,29


In [6]:
raw[["screen_time", "sleep_time"]].groupby("sleep_time").describe()

Unnamed: 0_level_0,screen_time,screen_time,screen_time,screen_time
Unnamed: 0_level_1,count,unique,top,freq
sleep_time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
3 - 5 jam sehari,16,3,6 - 8 jam sehari,8
6 - 8 jam sehari,29,4,3 - 5 jam sehari,12
Kurang dari 2 jam sehari,1,1,Lebih dari 9 jam sehari,1


In [7]:
raw[["occupation", "sleep_time"]].groupby("occupation").describe()

Unnamed: 0_level_0,sleep_time,sleep_time,sleep_time,sleep_time
Unnamed: 0_level_1,count,unique,top,freq
occupation,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Media Massa,2,2,6 - 8 jam sehari,1
Otomasi industri,1,1,6 - 8 jam sehari,1
Pendidikan,8,3,3 - 5 jam sehari,4
Perbankan,3,1,6 - 8 jam sehari,3
Seni Kreatif dan Desain,6,2,6 - 8 jam sehari,3
Sosial,1,1,3 - 5 jam sehari,1
Teknologi Informasi,24,2,6 - 8 jam sehari,17
Telekomunikasi,1,1,6 - 8 jam sehari,1


In [8]:
raw.is_smoker.describe()

count                                          46
unique                                          2
top       Tidak, saya bukan seorang perokok aktif
freq                                           45
Name: is_smoker, dtype: object

In [9]:
raw.is_alcoholics.describe()

count        46
unique        2
top       Tidak
freq         45
Name: is_alcoholics, dtype: object

In [10]:
encoded = pd.read_csv("dataset/encoded.csv")
encoded.head()

Unnamed: 0,timestamp,age,sex,occupation,must_use_phone,screen_time,sleep_time,is_smoker,is_alcoholics,is_consuming_special_medicine,...,difference_before_pandemic,occupation_Mahasiswa,occupation_Media Massa,occupation_Otomasi industri,occupation_Pendidikan,occupation_Perbankan,occupation_Seni Kreatif dan Desain,occupation_Sosial,occupation_Teknologi Informasi,occupation_Telekomunikasi
0,10/31/2020 9:29:02,1,1,Seni Kreatif dan Desain,0,2,1,0,0,1,...,2.0,0,0,0,0,0,1,0,0,0
1,10/31/2020 9:30:06,1,1,Seni Kreatif dan Desain,1,2,1,0,0,0,...,2.0,0,0,0,0,0,1,0,0,0
2,10/31/2020 9:31:32,1,1,Teknologi Informasi,0,2,2,0,0,0,...,2.0,0,0,0,0,0,0,0,1,0
3,10/31/2020 9:37:07,2,1,Teknologi Informasi,1,4,2,0,0,0,...,2.0,0,0,0,0,0,0,0,1,0
4,10/31/2020 9:38:10,1,1,Teknologi Informasi,0,3,2,0,0,0,...,2.0,0,0,0,0,0,0,0,1,0


In [11]:
# sleep_time_with_age

sleep_time_with_age = pd.crosstab([raw.age], [raw.sleep_time])
pd.concat([
    sleep_time_with_age, 
    sleep_time_with_age.apply(lambda r: r/r.sum(), axis=1)
])

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
15-20,10.0,13.0,1.0
20-25,6.0,14.0,0.0
25-30,0.0,2.0,0.0
15-20,0.416667,0.541667,0.041667
20-25,0.3,0.7,0.0
25-30,0.0,1.0,0.0


In [12]:
# Chi-square test of independence.
# Chi, P, Degree of Freedom, Expected
chi2_contingency(sleep_time_with_age)

(2.957614942528736,
 0.5649433122907475,
 4,
 array([[ 8.34782609, 15.13043478,  0.52173913],
        [ 6.95652174, 12.60869565,  0.43478261],
        [ 0.69565217,  1.26086957,  0.04347826]]))

In [13]:
# sleep_time_with_screen_time

sleep_time_with_screen = pd.crosstab([raw.screen_time], [raw.sleep_time])
pd.concat([
    sleep_time_with_screen, 
    sleep_time_with_screen.apply(lambda r: r/r.sum(), axis=1)
])

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
screen_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1 - 2 jam sehari,1.0,2.0,0.0
3 - 5 jam sehari,7.0,12.0,0.0
6 - 8 jam sehari,8.0,8.0,0.0
Lebih dari 9 jam sehari,0.0,7.0,1.0
1 - 2 jam sehari,0.333333,0.666667,0.0
3 - 5 jam sehari,0.368421,0.631579,0.0
6 - 8 jam sehari,0.5,0.5,0.0
Lebih dari 9 jam sehari,0.0,0.875,0.125


In [14]:
chi2_contingency(sleep_time_with_screen)

(9.819872958257712,
 0.13244539410993317,
 6,
 array([[ 1.04347826,  1.89130435,  0.06521739],
        [ 6.60869565, 11.97826087,  0.41304348],
        [ 5.56521739, 10.08695652,  0.34782609],
        [ 2.7826087 ,  5.04347826,  0.17391304]]))

In [15]:
# sleep_time_with_occupation

sleep_time_with_occupation = pd.crosstab([raw.occupation], [raw.sleep_time])
pd.concat([
    sleep_time_with_occupation, 
    sleep_time_with_occupation.apply(lambda r: r/r.sum(), axis=1)
])

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
occupation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Media Massa,1.0,1.0,0.0
Otomasi industri,0.0,1.0,0.0
Pendidikan,4.0,3.0,1.0
Perbankan,0.0,3.0,0.0
Seni Kreatif dan Desain,3.0,3.0,0.0
Sosial,1.0,0.0,0.0
Teknologi Informasi,7.0,17.0,0.0
Telekomunikasi,0.0,1.0,0.0
Media Massa,0.5,0.5,0.0
Otomasi industri,0.0,1.0,0.0


In [16]:
chi2_contingency(sleep_time_with_occupation)

(11.983297413793103,
 0.6076441907421104,
 14,
 array([[ 0.69565217,  1.26086957,  0.04347826],
        [ 0.34782609,  0.63043478,  0.02173913],
        [ 2.7826087 ,  5.04347826,  0.17391304],
        [ 1.04347826,  1.89130435,  0.06521739],
        [ 2.08695652,  3.7826087 ,  0.13043478],
        [ 0.34782609,  0.63043478,  0.02173913],
        [ 8.34782609, 15.13043478,  0.52173913],
        [ 0.34782609,  0.63043478,  0.02173913]]))

In [17]:
# sleep_time_with_screen_time

sleep_time_with_forced_to_use_phone = pd.crosstab([raw.must_use_phone], [raw.sleep_time])
pd.concat([
    sleep_time_with_forced_to_use_phone, 
    sleep_time_with_forced_to_use_phone.apply(lambda r: r/r.sum(), axis=1)
])

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
must_use_phone,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Tidak,5.0,18.0,0.0
Ya,11.0,11.0,1.0
Tidak,0.217391,0.782609,0.0
Ya,0.478261,0.478261,0.043478


In [18]:
print(chi2_contingency(sleep_time_with_forced_to_use_phone))

(4.939655172413794, 0.0845994438552231, 2, array([[ 8. , 14.5,  0.5],
       [ 8. , 14.5,  0.5]]))


In [19]:
# sleep_time_with_sex

sleep_time_with_sex = pd.crosstab([raw.sex], [raw.sleep_time])
pd.concat([
    sleep_time_with_sex, 
    sleep_time_with_sex.apply(lambda r: r/r.sum(), axis=1)
])

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Pria,13.0,20.0,0.0
Wanita,3.0,9.0,1.0
Pria,0.393939,0.606061,0.0
Wanita,0.230769,0.692308,0.076923


In [20]:
chi2_contingency(sleep_time_with_sex)

(3.362370388232458,
 0.1861532175462648,
 2,
 array([[11.47826087, 20.80434783,  0.7173913 ],
        [ 4.52173913,  8.19565217,  0.2826087 ]]))

In [21]:
sleep_time_with_smoke_habits = pd.crosstab([raw.is_smoker], [raw.sleep_time])
sleep_time_with_smoke_habits
pd.concat([
    sleep_time_with_smoke_habits, 
    sleep_time_with_smoke_habits.apply(lambda r: r/r.sum(), axis=1)
])

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
is_smoker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Tidak, saya bukan seorang perokok aktif",15.0,29.0,1.0
"Ya, saya seorang perokok aktif",1.0,0.0,0.0
"Tidak, saya bukan seorang perokok aktif",0.333333,0.644444,0.022222
"Ya, saya seorang perokok aktif",1.0,0.0,0.0


In [22]:
chi2_contingency(sleep_time_with_smoke_habits)

(1.916666666666667,
 0.3835315728763106,
 2,
 array([[1.56521739e+01, 2.83695652e+01, 9.78260870e-01],
        [3.47826087e-01, 6.30434783e-01, 2.17391304e-02]]))

In [23]:
sleep_time_with_drink_habits = pd.crosstab([raw.is_alcoholics], [raw.sleep_time])
pd.concat([
    sleep_time_with_drink_habits, 
    sleep_time_with_drink_habits.apply(lambda r: r/r.sum(), axis=1)
])

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
is_alcoholics,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Tidak,15.0,29.0,1.0
Ya,1.0,0.0,0.0
Tidak,0.333333,0.644444,0.022222
Ya,1.0,0.0,0.0


In [24]:
chi2_contingency(sleep_time_with_drink_habits)

(1.916666666666667,
 0.3835315728763106,
 2,
 array([[1.56521739e+01, 2.83695652e+01, 9.78260870e-01],
        [3.47826087e-01, 6.30434783e-01, 2.17391304e-02]]))

In [25]:
sleep_time_with_sleep_distracted = pd.crosstab([raw.is_screen_time_distract_sleep], [raw.sleep_time])

pd.concat([
    sleep_time_with_sleep_distracted, 
    sleep_time_with_sleep_distracted.apply(lambda r: r/r.sum(), axis=1)
])

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
is_screen_time_distract_sleep,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Mungkin,8.0,14.0,0.0
Tidak,4.0,7.0,0.0
Ya,4.0,8.0,1.0
Mungkin,0.363636,0.636364,0.0
Tidak,0.363636,0.636364,0.0
Ya,0.307692,0.615385,0.076923


In [26]:
chi2_contingency(sleep_time_with_sleep_distracted)

(2.6288883530262845,
 0.6217143013568283,
 4,
 array([[ 7.65217391, 13.86956522,  0.47826087],
        [ 3.82608696,  6.93478261,  0.23913043],
        [ 4.52173913,  8.19565217,  0.2826087 ]]))

In [27]:
sleep_time_with_screen_habits_change = pd.crosstab([raw.difference_before_pandemic], [raw.sleep_time])
sleep_time_with_screen_habits_change
pd.concat([
    sleep_time_with_screen_habits_change, 
    sleep_time_with_screen_habits_change.apply(lambda r: r/r.sum(), axis=1)
])

sleep_time,3 - 5 jam sehari,6 - 8 jam sehari,Kurang dari 2 jam sehari
difference_before_pandemic,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Tidak ada perubahan,5.0,5.0,0.0
"Ya, lebih sering menatap layar ponsel saat pandemi",11.0,22.0,1.0
"Ya, lebih sering menatap layar ponsel sebelum pandemi",0.0,1.0,0.0
Tidak ada perubahan,0.5,0.5,0.0
"Ya, lebih sering menatap layar ponsel saat pandemi",0.323529,0.647059,0.029412
"Ya, lebih sering menatap layar ponsel sebelum pandemi",0.0,1.0,0.0


In [28]:
chi2_contingency(sleep_time_with_screen_habits_change)

(1.867121848739496,
 0.7601818614754741,
 4,
 array([[ 3.55555556,  6.22222222,  0.22222222],
        [12.08888889, 21.15555556,  0.75555556],
        [ 0.35555556,  0.62222222,  0.02222222]]))

In [29]:
# P value tidak ada yang bernilai di bawah alpha normal yakni 0.05. Maka Variabel Dependen Sleep Time
# tidak berkorelasi dengan variabel bebas di atas. 

# Pengembangan selanjutnya bisa dilakukan dengan pengambilan data yang berbeda seperti screen time total 
# pada handphone, tv, laptop dan gadget ber-layar lainnya. 