# **Import Library**

In [60]:
# data wrangling
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
# data visualization
import matplotlib.pyplot as plt
import seaborn as sn
import plotly.express as px

# **Load Dataset**

In [61]:
raw_df = pd.read_excel('D:\\NIDS-2023\\mawa\\data-mhs.xlsx')
raw_df.head(5)

Unnamed: 0,nim,nama,nama_prodi,jenjang,angkatan,nama_pembimbing,tanggal_sk_pembimbing,tanggal_pelaksanaan_ujian,status_lulus,tgl_lulus
0,4112317035,ALFIN WIDYATMOKO,Statistika Terapan dan Komputasi,D3,2017,Dr. Iqbal Kharisudin M.Sc.,2022-07-11,,LULUS,2022-08-19
1,4112318001,DHAMA SEKAR OKTAFANI,Statistika Terapan dan Komputasi,D3,2018,Dr. Iqbal Kharisudin M.Sc.,2022-07-08,2023-07-21 08:10:00,LULUS,2023-07-21
2,4112318007,RANI WIDYANINGRUM,Statistika Terapan dan Komputasi,D3,2018,Prof. Dr. rer.nat. YL Sukestiyarno M.S.,2022-08-15,2023-08-10 08:00:00,LULUS,2023-08-10
3,4112318009,ENDAH SRI WAHYUNI,Statistika Terapan dan Komputasi,D3,2018,Dr. Iqbal Kharisudin M.Sc.,2023-02-07,2023-08-15 10:00:00,LULUS,2023-08-15
4,4112318015,BERTI SUHARYANI,Statistika Terapan dan Komputasi,D3,2018,Dr. Scolastika Mariani M.Si.,2023-02-17,2023-07-21 10:00:00,LULUS,2023-08-16


In [62]:
raw_df.shape

(2432, 10)

In [63]:
raw_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2432 entries, 0 to 2431
Data columns (total 10 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   nim                        2432 non-null   int64         
 1   nama                       2432 non-null   object        
 2   nama_prodi                 2432 non-null   object        
 3   jenjang                    2432 non-null   object        
 4   angkatan                   2432 non-null   int64         
 5   nama_pembimbing            2432 non-null   object        
 6   tanggal_sk_pembimbing      2432 non-null   datetime64[ns]
 7   tanggal_pelaksanaan_ujian  1868 non-null   object        
 8   status_lulus               2432 non-null   object        
 9   tgl_lulus                  1793 non-null   datetime64[ns]
dtypes: datetime64[ns](2), int64(2), object(6)
memory usage: 190.1+ KB


# **Tingkat Kelulusan Mahasiswa FMIPA Angkatan 2017-2019**

In [64]:
# angkatan included pada data
angkatan = raw_df['angkatan'].value_counts().rename_axis('Angkatan').reset_index(name='Jumlah')
angkatan

Unnamed: 0,Angkatan,Jumlah
0,2019,845
1,2018,842
2,2017,745


In [65]:
# jumlah total status kelulusan
status_lulus = raw_df['status_lulus'].value_counts().rename_axis('Status').reset_index(name='Jumlah')
status_lulus

Unnamed: 0,Status,Jumlah
0,LULUS,1793
1,BELUM LULUS,639


In [66]:
# pie chart

In [67]:
# jumlah kelulusan berdasarkan angkatan
status_lulus_angkatan = raw_df.groupby(['angkatan','status_lulus']).size().reset_index(name='Jumlah')
status_lulus_angkatan

Unnamed: 0,angkatan,status_lulus,Jumlah
0,2017,BELUM LULUS,119
1,2017,LULUS,626
2,2018,BELUM LULUS,193
3,2018,LULUS,649
4,2019,BELUM LULUS,327
5,2019,LULUS,518


In [68]:
# stacked bar chart

In [69]:
df1 = raw_df.copy()
df1 = df1.loc[raw_df['status_lulus'] == "LULUS"]
df1.head(5)

Unnamed: 0,nim,nama,nama_prodi,jenjang,angkatan,nama_pembimbing,tanggal_sk_pembimbing,tanggal_pelaksanaan_ujian,status_lulus,tgl_lulus
0,4112317035,ALFIN WIDYATMOKO,Statistika Terapan dan Komputasi,D3,2017,Dr. Iqbal Kharisudin M.Sc.,2022-07-11,,LULUS,2022-08-19
1,4112318001,DHAMA SEKAR OKTAFANI,Statistika Terapan dan Komputasi,D3,2018,Dr. Iqbal Kharisudin M.Sc.,2022-07-08,2023-07-21 08:10:00,LULUS,2023-07-21
2,4112318007,RANI WIDYANINGRUM,Statistika Terapan dan Komputasi,D3,2018,Prof. Dr. rer.nat. YL Sukestiyarno M.S.,2022-08-15,2023-08-10 08:00:00,LULUS,2023-08-10
3,4112318009,ENDAH SRI WAHYUNI,Statistika Terapan dan Komputasi,D3,2018,Dr. Iqbal Kharisudin M.Sc.,2023-02-07,2023-08-15 10:00:00,LULUS,2023-08-15
4,4112318015,BERTI SUHARYANI,Statistika Terapan dan Komputasi,D3,2018,Dr. Scolastika Mariani M.Si.,2023-02-17,2023-07-21 10:00:00,LULUS,2023-08-16


In [70]:
df1['tanggal_pelaksanaan_ujian'] = pd.to_datetime(df1['tanggal_pelaksanaan_ujian'], format='%Y-%m-%d')
df1.head(5)

Unnamed: 0,nim,nama,nama_prodi,jenjang,angkatan,nama_pembimbing,tanggal_sk_pembimbing,tanggal_pelaksanaan_ujian,status_lulus,tgl_lulus
0,4112317035,ALFIN WIDYATMOKO,Statistika Terapan dan Komputasi,D3,2017,Dr. Iqbal Kharisudin M.Sc.,2022-07-11,NaT,LULUS,2022-08-19
1,4112318001,DHAMA SEKAR OKTAFANI,Statistika Terapan dan Komputasi,D3,2018,Dr. Iqbal Kharisudin M.Sc.,2022-07-08,2023-07-21 08:10:00,LULUS,2023-07-21
2,4112318007,RANI WIDYANINGRUM,Statistika Terapan dan Komputasi,D3,2018,Prof. Dr. rer.nat. YL Sukestiyarno M.S.,2022-08-15,2023-08-10 08:00:00,LULUS,2023-08-10
3,4112318009,ENDAH SRI WAHYUNI,Statistika Terapan dan Komputasi,D3,2018,Dr. Iqbal Kharisudin M.Sc.,2023-02-07,2023-08-15 10:00:00,LULUS,2023-08-15
4,4112318015,BERTI SUHARYANI,Statistika Terapan dan Komputasi,D3,2018,Dr. Scolastika Mariani M.Si.,2023-02-17,2023-07-21 10:00:00,LULUS,2023-08-16


In [71]:
# check NaT in 'tanggal_pelaksanaan_ujian' column
df1[df1['tanggal_pelaksanaan_ujian'].isnull()]
# # delete row contain NaT
# df1 = df1.dropna(subset=['tanggal_pelaksanaan_ujian'])

Unnamed: 0,nim,nama,nama_prodi,jenjang,angkatan,nama_pembimbing,tanggal_sk_pembimbing,tanggal_pelaksanaan_ujian,status_lulus,tgl_lulus
0,4112317035,ALFIN WIDYATMOKO,Statistika Terapan dan Komputasi,D3,2017,Dr. Iqbal Kharisudin M.Sc.,2022-07-11,NaT,LULUS,2022-08-19


In [72]:
# move waktu pelaksanaan to new column
df1['waktu_pelaksanaan_ujian'] = df1['tanggal_pelaksanaan_ujian'].dt.time
df1.head(5)

Unnamed: 0,nim,nama,nama_prodi,jenjang,angkatan,nama_pembimbing,tanggal_sk_pembimbing,tanggal_pelaksanaan_ujian,status_lulus,tgl_lulus,waktu_pelaksanaan_ujian
0,4112317035,ALFIN WIDYATMOKO,Statistika Terapan dan Komputasi,D3,2017,Dr. Iqbal Kharisudin M.Sc.,2022-07-11,NaT,LULUS,2022-08-19,NaT
1,4112318001,DHAMA SEKAR OKTAFANI,Statistika Terapan dan Komputasi,D3,2018,Dr. Iqbal Kharisudin M.Sc.,2022-07-08,2023-07-21 08:10:00,LULUS,2023-07-21,08:10:00
2,4112318007,RANI WIDYANINGRUM,Statistika Terapan dan Komputasi,D3,2018,Prof. Dr. rer.nat. YL Sukestiyarno M.S.,2022-08-15,2023-08-10 08:00:00,LULUS,2023-08-10,08:00:00
3,4112318009,ENDAH SRI WAHYUNI,Statistika Terapan dan Komputasi,D3,2018,Dr. Iqbal Kharisudin M.Sc.,2023-02-07,2023-08-15 10:00:00,LULUS,2023-08-15,10:00:00
4,4112318015,BERTI SUHARYANI,Statistika Terapan dan Komputasi,D3,2018,Dr. Scolastika Mariani M.Si.,2023-02-17,2023-07-21 10:00:00,LULUS,2023-08-16,10:00:00


In [73]:
# delete time in tanggal_pelaksanaan_ujian
df1['tanggal_pelaksanaan_ujian'] = df1['tanggal_pelaksanaan_ujian'].dt.date
df1.head(5)

Unnamed: 0,nim,nama,nama_prodi,jenjang,angkatan,nama_pembimbing,tanggal_sk_pembimbing,tanggal_pelaksanaan_ujian,status_lulus,tgl_lulus,waktu_pelaksanaan_ujian
0,4112317035,ALFIN WIDYATMOKO,Statistika Terapan dan Komputasi,D3,2017,Dr. Iqbal Kharisudin M.Sc.,2022-07-11,NaT,LULUS,2022-08-19,NaT
1,4112318001,DHAMA SEKAR OKTAFANI,Statistika Terapan dan Komputasi,D3,2018,Dr. Iqbal Kharisudin M.Sc.,2022-07-08,2023-07-21,LULUS,2023-07-21,08:10:00
2,4112318007,RANI WIDYANINGRUM,Statistika Terapan dan Komputasi,D3,2018,Prof. Dr. rer.nat. YL Sukestiyarno M.S.,2022-08-15,2023-08-10,LULUS,2023-08-10,08:00:00
3,4112318009,ENDAH SRI WAHYUNI,Statistika Terapan dan Komputasi,D3,2018,Dr. Iqbal Kharisudin M.Sc.,2023-02-07,2023-08-15,LULUS,2023-08-15,10:00:00
4,4112318015,BERTI SUHARYANI,Statistika Terapan dan Komputasi,D3,2018,Dr. Scolastika Mariani M.Si.,2023-02-17,2023-07-21,LULUS,2023-08-16,10:00:00


In [74]:
# new column called 'lulus_daycount' 
# if tgl_lulus = tanggal_pelaksanaan_ujian the value store in lulus_daycount = 'sameday', else 'not sameday'
df1['lulus_daycount'] = np.where(df1['tgl_lulus'] == df1['tanggal_pelaksanaan_ujian'], 'sameday', 'not sameday')
df1.head(5)

Unnamed: 0,nim,nama,nama_prodi,jenjang,angkatan,nama_pembimbing,tanggal_sk_pembimbing,tanggal_pelaksanaan_ujian,status_lulus,tgl_lulus,waktu_pelaksanaan_ujian,lulus_daycount
0,4112317035,ALFIN WIDYATMOKO,Statistika Terapan dan Komputasi,D3,2017,Dr. Iqbal Kharisudin M.Sc.,2022-07-11,NaT,LULUS,2022-08-19,NaT,not sameday
1,4112318001,DHAMA SEKAR OKTAFANI,Statistika Terapan dan Komputasi,D3,2018,Dr. Iqbal Kharisudin M.Sc.,2022-07-08,2023-07-21,LULUS,2023-07-21,08:10:00,sameday
2,4112318007,RANI WIDYANINGRUM,Statistika Terapan dan Komputasi,D3,2018,Prof. Dr. rer.nat. YL Sukestiyarno M.S.,2022-08-15,2023-08-10,LULUS,2023-08-10,08:00:00,sameday
3,4112318009,ENDAH SRI WAHYUNI,Statistika Terapan dan Komputasi,D3,2018,Dr. Iqbal Kharisudin M.Sc.,2023-02-07,2023-08-15,LULUS,2023-08-15,10:00:00,sameday
4,4112318015,BERTI SUHARYANI,Statistika Terapan dan Komputasi,D3,2018,Dr. Scolastika Mariani M.Si.,2023-02-17,2023-07-21,LULUS,2023-08-16,10:00:00,not sameday


In [75]:
# jumlah yang lulusnya di hari yang sama vs di hari yang berbeda
grad_daycount = df1['lulus_daycount'].value_counts().rename_axis('Keterangan').reset_index(name='Jumlah')
grad_daycount

Unnamed: 0,Keterangan,Jumlah
0,not sameday,1485
1,sameday,308


In [77]:
df1['tanggal_pelaksanaan_ujian'] = pd.to_datetime(df1['tanggal_pelaksanaan_ujian'], format='%Y-%m-%d')
df1.head(5)

Unnamed: 0,nim,nama,nama_prodi,jenjang,angkatan,nama_pembimbing,tanggal_sk_pembimbing,tanggal_pelaksanaan_ujian,status_lulus,tgl_lulus,waktu_pelaksanaan_ujian,lulus_daycount
0,4112317035,ALFIN WIDYATMOKO,Statistika Terapan dan Komputasi,D3,2017,Dr. Iqbal Kharisudin M.Sc.,2022-07-11,NaT,LULUS,2022-08-19,NaT,not sameday
1,4112318001,DHAMA SEKAR OKTAFANI,Statistika Terapan dan Komputasi,D3,2018,Dr. Iqbal Kharisudin M.Sc.,2022-07-08,2023-07-21,LULUS,2023-07-21,08:10:00,sameday
2,4112318007,RANI WIDYANINGRUM,Statistika Terapan dan Komputasi,D3,2018,Prof. Dr. rer.nat. YL Sukestiyarno M.S.,2022-08-15,2023-08-10,LULUS,2023-08-10,08:00:00,sameday
3,4112318009,ENDAH SRI WAHYUNI,Statistika Terapan dan Komputasi,D3,2018,Dr. Iqbal Kharisudin M.Sc.,2023-02-07,2023-08-15,LULUS,2023-08-15,10:00:00,sameday
4,4112318015,BERTI SUHARYANI,Statistika Terapan dan Komputasi,D3,2018,Dr. Scolastika Mariani M.Si.,2023-02-17,2023-07-21,LULUS,2023-08-16,10:00:00,not sameday


In [79]:
# new column called days where the value are -> if in lulus_daycount = 'sameday' the value = 0. else tanggal_lulus - tanggal_pelaksanaan_ujian
df1['days'] = np.where(df1['lulus_daycount'] == 'sameday', 0, (df1['tgl_lulus'] - df1['tanggal_pelaksanaan_ujian']).dt.days)
df1.head(5)

Unnamed: 0,nim,nama,nama_prodi,jenjang,angkatan,nama_pembimbing,tanggal_sk_pembimbing,tanggal_pelaksanaan_ujian,status_lulus,tgl_lulus,waktu_pelaksanaan_ujian,lulus_daycount,days
0,4112317035,ALFIN WIDYATMOKO,Statistika Terapan dan Komputasi,D3,2017,Dr. Iqbal Kharisudin M.Sc.,2022-07-11,NaT,LULUS,2022-08-19,NaT,not sameday,
1,4112318001,DHAMA SEKAR OKTAFANI,Statistika Terapan dan Komputasi,D3,2018,Dr. Iqbal Kharisudin M.Sc.,2022-07-08,2023-07-21,LULUS,2023-07-21,08:10:00,sameday,0.0
2,4112318007,RANI WIDYANINGRUM,Statistika Terapan dan Komputasi,D3,2018,Prof. Dr. rer.nat. YL Sukestiyarno M.S.,2022-08-15,2023-08-10,LULUS,2023-08-10,08:00:00,sameday,0.0
3,4112318009,ENDAH SRI WAHYUNI,Statistika Terapan dan Komputasi,D3,2018,Dr. Iqbal Kharisudin M.Sc.,2023-02-07,2023-08-15,LULUS,2023-08-15,10:00:00,sameday,0.0
4,4112318015,BERTI SUHARYANI,Statistika Terapan dan Komputasi,D3,2018,Dr. Scolastika Mariani M.Si.,2023-02-17,2023-07-21,LULUS,2023-08-16,10:00:00,not sameday,26.0


In [83]:
# mean of 'days' column based on 'angkatan' column
grad_daycount_mean = df1.groupby(['angkatan'])['days'].mean().reset_index(name='rata-rata')
grad_daycount_mean

Unnamed: 0,angkatan,rata-rata
0,2017,20.104
1,2018,15.269646
2,2019,10.333977


# **Tingkat Kelulusan Mahasiswa Berdasarkan Prodi**

In [84]:
# prodi included pada data
nama_prodi = raw_df['nama_prodi'].value_counts().rename_axis('Prodi').reset_index(name='Jumlah')
nama_prodi

Unnamed: 0,Prodi,Jumlah
0,Pendidikan Matematika,535
1,Pendidikan Biologi,281
2,Pendidikan Kimia,248
3,Pendidikan Fisika,235
4,Pendidikan Ilmu Pengetahuan Alam,209
5,Biologi,204
6,Teknik Informatika,186
7,Kimia,166
8,Fisika,132
9,Matematika,107


In [85]:
# jumlah kelulusan berdasarkan nama_prodi
status_lulus_nama_prodi = raw_df.groupby(['nama_prodi','status_lulus']).size().reset_index(name='Jumlah')
status_lulus_nama_prodi

Unnamed: 0,nama_prodi,status_lulus,Jumlah
0,Biologi,BELUM LULUS,46
1,Biologi,LULUS,158
2,Fisika,BELUM LULUS,27
3,Fisika,LULUS,105
4,Ilmu Lingkungan,BELUM LULUS,21
5,Ilmu Lingkungan,LULUS,10
6,Kimia,BELUM LULUS,45
7,Kimia,LULUS,121
8,Matematika,BELUM LULUS,33
9,Matematika,LULUS,74
