In [26]:
import os
import numpy
import pandas

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

from sklearn.preprocessing import LabelEncoder

from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split

from sklearn import tree
from sklearn.svm import SVC as SVM
from sklearn.naive_bayes import GaussianNB as NVB
from sklearn.linear_model import SGDClassifier as SGD
from sklearn.tree import DecisionTreeClassifier as DTR
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.ensemble import RandomForestClassifier as RFR
from sklearn.linear_model import LogisticRegression as LGR
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

In [43]:
import pandas

df = pandas.read_csv('../dataset/datakelulusanmahasiswa.csv')
df.dropna(0, inplace=True)

df = df.drop(['NAMA'], axis=1)

In [44]:
from sklearn.preprocessing import LabelEncoder
attributes = [
    'JENIS KELAMIN',
    'STATUS MAHASISWA',
    'STATUS NIKAH', 
    'STATUS KELULUSAN',
]
for attr in attributes :
    df[attr] = LabelEncoder().fit_transform(df[attr])

In [45]:
X = df.drop('STATUS KELULUSAN', axis='columns')
Y = df['STATUS KELULUSAN']
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.3)

In [46]:
models = []

models.append(('SVM', SVM()))
models.append(('NVB', NVB()))
models.append(('DTR', DTR()))
models.append(('SGD', SGD()))
models.append(('KNN', KNN()))
models.append(('LDA', LDA()))
models.append(('LGR', LGR(max_iter=200)))
models.append(('RFR', RFR()))

cross_validation = []

for name, model in models:
    cv_results = cross_val_score(model, X_train, Y_train, cv=KFold(n_splits=10))
    model.fit(X_train, Y_train)
    cv = {}
    cv["name"] = name
    cv["mean"] = round(cv_results.mean()*100,2)
    cv["acc"] = round(accuracy_score(Y_test, model.predict(X_test))*100)
    cross_validation.append(cv)

cross_validation

[{'name': 'SVM', 'mean': 75.97, 'acc': 77},
 {'name': 'NVB', 'mean': 88.75, 'acc': 87},
 {'name': 'DTR', 'mean': 85.68, 'acc': 82},
 {'name': 'SGD', 'mean': 76.82, 'acc': 89},
 {'name': 'KNN', 'mean': 85.01, 'acc': 82},
 {'name': 'LDA', 'mean': 88.72, 'acc': 89},
 {'name': 'LGR', 'mean': 88.73, 'acc': 88},
 {'name': 'RFR', 'mean': 91.75, 'acc': 91}]

In [47]:
model = RFR(n_estimators=50, max_features="sqrt")
model.fit(X_train, Y_train)
predictions = model.predict(X_test)
cr = classification_report(Y_test, predictions, output_dict=True)
performance = pandas.DataFrame(cr).transpose()
print(performance)

              precision    recall  f1-score     support
0              0.918919  0.931507  0.925170   73.000000
1              0.875000  0.853659  0.864198   41.000000
accuracy       0.903509  0.903509  0.903509    0.903509
macro avg      0.896959  0.892583  0.894684  114.000000
weighted avg   0.903124  0.903509  0.903241  114.000000


In [48]:
importances = dict(zip(df.columns, model.feature_importances_))
importances

{'JENIS KELAMIN': 0.008654905047861513,
 'STATUS MAHASISWA': 0.3912583740689499,
 'UMUR': 0.09985535695785573,
 'STATUS NIKAH': 0.0009279110320522295,
 'IPS 1': 0.050067880031707264,
 'IPS 2': 0.05545353634664853,
 'IPS 3': 0.044850179381388855,
 'IPS 4': 0.08702266037413521,
 'IPS 5': 0.08223659631538047,
 'IPS 6': 0.03953494142943315,
 'IPS 7': 0.05158527765847151,
 'IPS 8': 0.02793767729447427,
 'IPK': 0.060614704061641295}

In [34]:
def prediksi(model, data):
    labels = ["TEPAT","TERLAMBAT"]
    array = numpy.asarray(data)
    prediction=model.predict(array)
    no_of_test_cases, cols = array.shape
    
    for i in range(no_of_test_cases):
        print("Mahasiswa Diprediksi Lulus {}".format(labels[int(prediction[i])]))

In [35]:
new_data = [
    [0,0,25,0,2.5,2.5,3,3,2.7,2.8,2.9,3,3],
    [1,1,25,1,4,4,4,4,4,4,4,4,4],
    [1,1,25,0,3.71,3.79,3.96,3.91,3.75,3.94,2.58,4.00,3.71],
    [0,1,21,0,3.79,3.29,2.98,3.55,3.75,3.71,4.00,4.00,3.60],
    [0,0,24,0,2.64,1.61,1.21,0.83,1.11,1.36,1.18,0.38,1.29],
]

prediksi(model, new_data)

Mahasiswa Diprediksi Lulus TERLAMBAT
Mahasiswa Diprediksi Lulus TEPAT
Mahasiswa Diprediksi Lulus TEPAT
Mahasiswa Diprediksi Lulus TEPAT
Mahasiswa Diprediksi Lulus TERLAMBAT
