# Model dengan Fitur Bag of Words

File ini berisikan program untuk pembangunan model menggunakan fitur bag of words.
Prediksi dilakukan terhadap label fakultas dan label rumpun.
Eksperimen terhadap fitur bag of words menyangkut preprocessing, metode pembangunan model, dan jumlah label.

Harap sebelum menjalankan program, install module yang diperlukan seperti nltk, spacy, dan en_core_web_sm

### Load Data

In [10]:
from nltk.corpus import stopwords
from nltk import stem
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn import metrics
import spacy
import warnings
warnings.filterwarnings('ignore')

In [2]:
dataset = open('korpus-ver-2.txt', 'r', encoding="utf-8")
lines = dataset.readlines()
len_lines = len(lines)
counter = 0
list_kalimat = []
list_rumpun = []
list_fakultas = []


for i in range(0,len_lines,3):
    line_i = lines[i]
    judul = line_i
    
    judul = judul[8:-10] #buang tag <JUDUL> dari kalimat
   
    sinopsis = lines[i+1]
    sinopsis = sinopsis[10:-13] #buang tag <SINOPSIS> dari kalimat
   
    
    kalimat = judul + " " + sinopsis


    fakultas_arr = lines[i+2].lower().split()
    fakultas = fakultas_arr[1]
    
    
    if(fakultas=="rik"):
        label = "rik"
    elif(fakultas=="fmipa"):
        label = "saintek"
    elif(fakultas=="fasilkom"):
        label = "saintek"
    elif(fakultas=="ft"):
        label = "saintek"
    elif(fakultas=="fisip"):
        label = "soshum"
    elif(fakultas=="fib"):
        label = "soshum"
    elif(fakultas=="fh"):
        label = "soshum"
    elif(fakultas=="feb"):
        label = "soshum"
    elif(fakultas=="psikologi"):
        label = "soshum"
    
    list_kalimat.append(kalimat)
    list_fakultas.append(fakultas)
    list_rumpun.append(label)

### Preprocessing

In [3]:
N = len(list_kalimat)
list_cleaned = []
pos_tag = dict()
list_stemmed = []
list_lemmatized = []

stemmer = stem.snowball.SnowballStemmer("english")
stop_words = stopwords.words('english')
whitelist = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')

# Load English tokenizer, tagger,  
# parser, NER and word vectors 
nlp = spacy.load('en_core_web_sm') 

for i in range(N):
    kalimat = list_kalimat[i]
    
    #lowercase
    kalimat = kalimat.lower()
    
    #stopwords and punctuation removal
    words = kalimat.split(" ")
    kalimat_clean_arr = [''.join(filter(whitelist.__contains__, word)) for word in words if word not in stop_words]
    
    # stemming
    kalimat_stemmed = ' '.join(stemmer.stem(w) for w in kalimat_clean_arr)
    
    # lematisasi
    kalimat_clean_str = ' '.join(kalimat_clean_arr)
    spacy_kalimat_clean = nlp(kalimat_clean_str)
    kalimat_lemma = ' '.join([w.lemma_ for w in spacy_kalimat_clean])
    
    kalimat_cleaned = ' '.join(kalimat_clean_arr)
    
    list_cleaned.append(kalimat_cleaned)
    list_stemmed.append(kalimat_stemmed)
    list_lemmatized.append(kalimat_lemma)    
    
    
# print(list_cleaned[0])
# print()
# print(list_stemmed[0])
# print()
# print(list_lemmatized[0])


### Ekstraksi Fitur data

In [4]:
from sklearn.feature_extraction.text import CountVectorizer

vectorizer1 = CountVectorizer(min_df=0, lowercase=False)
vectorizer1.fit(list_cleaned)

X1 = vectorizer1.transform(list_cleaned).toarray()

vectorizer2 = CountVectorizer(min_df=0, lowercase=False)
vectorizer2.fit(list_stemmed)

X2 = vectorizer2.transform(list_stemmed).toarray()

vectorizer3 = CountVectorizer(min_df=0, lowercase=False)
vectorizer3.fit(list_lemmatized)

X3 = vectorizer3.transform(list_lemmatized).toarray()


yf = list_fakultas
yr = list_rumpun

### Split Data test

In [5]:
from sklearn.model_selection import train_test_split
ind = [i for i in range(N)]

ind_train,ind_test, _ , _ = train_test_split(ind, ind, test_size=0.25, random_state=1000)


X_train1 = []
X_test1 = []
X_train2 = []
X_test2 = []
X_train3 = []
X_test3 = []

y_train1 = []
y_test1 = []

y_train2 = []
y_test2 = []


for i in ind_train:
#     X_train1.append(list_cleaned[i])
#     X_train2.append(list_stemmed[i])
#     X_train3.append(list_lemmatized[i])
    X_train1.append(X1[i])
    X_train2.append(X2[i])
    X_train3.append(X3[i])
    
    y_train1.append(yf[i])
    y_train2.append(yr[i])
    

for i in ind_test:
    X_test1.append(X1[i])
    X_test2.append(X2[i])
    X_test3.append(X3[i])
    
    y_test1.append(yf[i])
    y_test2.append(yr[i])
    
print(X_test1[1])
print()
print(X_test2[1])
print()
print(y_test1[1])
print(y_test2[1])

[0 0 0 ... 0 0 0]

[0 0 0 ... 0 0 0]

fasilkom
saintek


### Pembangunan Model dan Akurasi

#### Fungsi untuk confusion matrix

In [6]:
import itertools
import numpy
import matplotlib.pyplot as plt 
from sklearn.metrics import confusion_matrix

def plot_confusion_matrix(cm, classes, normalize=False,  title='Confusion matrix', cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    plt.colorbar()
    tick_marks = numpy.arange(len(classes)) 
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix") 
    else:
        print('Confusion matrix, without normalization')
    #print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): 
        plt.text(j, i, cm[i, j],  horizontalalignment="center",  color="white" if cm[i, j] > thresh else "black") 

    plt.tight_layout() 
    plt.ylabel('True label') 
    plt.xlabel('Predicted label')

#### Log Regression

In [7]:
def count_accuracy(preds, acts):
    total = len(preds)
    counter = 0
    for i in range(total):
        if(preds[i] == acts[i]):
            counter += 1
            
    return counter / total

In [11]:
from sklearn.linear_model import LogisticRegression

classifier_log = LogisticRegression(multi_class='ovr',solver='liblinear')

# Cross Validation
classifier_log.fit(X_train1, y_train1)
scores = cross_val_score(classifier_log, X_train1, y_train1, cv=5)
print("Confidence untuk X1 dan Y1:", scores.mean())

predicted = classifier_log.predict(X_test1)
score = classifier_log.score(X_test1, y_test1)

print("Logistic Regression untuk X1 dan Y1 :", score)
print()

# matrix = confusion_matrix(y_test1, predicted)
# plot_confusion_matrix(matrix, classes = set(yf), title= 'CM for X1 and Y1')

classifier_log.fit(X_train2, y_train1)

# Cross Validation
classifier_log.fit(X_train2, y_train1)
scores = cross_val_score(classifier_log, X_train2, y_train1, cv=5)
print("Confidence untuk X2 dan Y1:", scores.mean())

score = classifier_log.score(X_test2, y_test1)

print("Logistic Regression untuk X2 dan Y1 :", score)
print()

classifier_log.fit(X_train3, y_train1)

# Cross Validation
classifier_log.fit(X_train3, y_train1)
scores = cross_val_score(classifier_log, X_train3, y_train1, cv=5)
print("Confidence untuk X3 dan Y1:", scores.mean())

score = classifier_log.score(X_test3, y_test1)

print("Logistic Regression untuk X3 dan Y1 :", score)
print()


classifier_log.fit(X_train1, y_train2)
predicted = classifier_log.predict(X_test1)
score = classifier_log.score(X_test1, y_test2)

print("Logistic Regression untuk X1 dan Y2 :", score)

# matrix = confusion_matrix(y_test2, predicted)
# plot_confusion_matrix(matrix, classes = set(yr), title= 'CM for X1 and Y2')

classifier_log.fit(X_train2, y_train2)
score = classifier_log.score(X_test2, y_test2)

print("Logistic Regression untuk X2 dan Y2 :", score)

classifier_log.fit(X_train3, y_train2)
score = classifier_log.score(X_test3, y_test2)

print("Logistic Regression untuk X3 dan Y2 :", score)

Confidence untuk X1 dan Y1: 0.8118979321655045
Logistic Regression untuk X1 dan Y1 : 0.8701298701298701

Confidence untuk X2 dan Y1: 0.8204850211773278
Logistic Regression untuk X2 dan Y1 : 0.8831168831168831

Confidence untuk X3 dan Y1: 0.8150553569516076
Logistic Regression untuk X3 dan Y1 : 0.8571428571428571

Logistic Regression untuk X1 dan Y2 : 0.948051948051948
Logistic Regression untuk X2 dan Y2 : 0.922077922077922
Logistic Regression untuk X3 dan Y2 : 0.935064935064935


#### Gaussian Naive Bayes

In [12]:
from sklearn.naive_bayes import GaussianNB
classifier_gnb = GaussianNB()

classifier_gnb.fit(X_train1, y_train1)

# Cross Validation
classifier_gnb.fit(X_train1, y_train1)
scores = cross_val_score(classifier_gnb, X_train1, y_train1, cv=5)
print("Confidence untuk X1 dan Y1:", scores.mean())

score = classifier_gnb.score(X_test1, y_test1)

print("Gaussian NB untuk X1 dan Y1 :", score)
print()

classifier_gnb.fit(X_train2, y_train1)

# Cross Validation
classifier_gnb.fit(X_train2, y_train1)
scores = cross_val_score(classifier_gnb, X_train2, y_train1, cv=5)
print("Confidence untuk X2 dan Y1:", scores.mean())

score = classifier_gnb.score(X_test2, y_test1)

print("Gaussian NB untuk X2 dan Y1 :", score)
print()

classifier_gnb.fit(X_train3, y_train1)

# Cross Validation
classifier_gnb.fit(X_train3, y_train1)
scores = cross_val_score(classifier_gnb, X_train3, y_train1, cv=5)
print("Confidence untuk X3 dan Y1:", scores.mean())

score = classifier_gnb.score(X_test3, y_test1)

print("Gaussian NB untuk X3 dan Y1 :", score)
print()

classifier_gnb.fit(X_train1, y_train2)
score = classifier_gnb.score(X_test1, y_test2)

print("Gaussian NB untuk X1 dan Y2 :", score)

classifier_gnb.fit(X_train2, y_train2)
score = classifier_gnb.score(X_test2, y_test2)

print("Gaussian NB untuk X2 dan Y2 :", score)

classifier_gnb.fit(X_train3, y_train2)
score = classifier_gnb.score(X_test3, y_test2)

print("Gaussian NB untuk X3 dan Y2 :", score)

Confidence untuk X1 dan Y1: 0.7403256459545069
Gaussian NB untuk X1 dan Y1 : 0.7662337662337663

Confidence untuk X2 dan Y1: 0.6840638186516538
Gaussian NB untuk X2 dan Y1 : 0.7142857142857143

Confidence untuk X3 dan Y1: 0.7136870739875352
Gaussian NB untuk X3 dan Y1 : 0.7402597402597403

Gaussian NB untuk X1 dan Y2 : 0.8701298701298701
Gaussian NB untuk X2 dan Y2 : 0.8831168831168831
Gaussian NB untuk X3 dan Y2 : 0.8961038961038961


#### Multinomial Naive Bayes

In [13]:
from sklearn.naive_bayes import MultinomialNB
classifier_mnb = MultinomialNB()

classifier_mnb.fit(X_train1, y_train1)

# Cross Validation
classifier_mnb.fit(X_train1, y_train1)
scores = cross_val_score(classifier_mnb, X_train1, y_train1, cv=5)
print("Confidence untuk X1 dan Y1:", scores.mean())

score = classifier_mnb.score(X_test1, y_test1)

print("Multinomial NB untuk X1 dan Y1 :", score)
print()

classifier_mnb.fit(X_train2, y_train1)

# Cross Validation
classifier_mnb.fit(X_train2, y_train1)
scores = cross_val_score(classifier_mnb, X_train2, y_train1, cv=5)
print("Confidence untuk X2 dan Y1:", scores.mean())

score = classifier_mnb.score(X_test2, y_test1)

print("Multinomial NB untuk X2 dan Y1 :", score)
print()

classifier_mnb.fit(X_train3, y_train1)

# Cross Validation
classifier_mnb.fit(X_train3, y_train1)
scores = cross_val_score(classifier_mnb, X_train3, y_train1, cv=5)
print("Confidence untuk X3 dan Y1:", scores.mean())

score = classifier_mnb.score(X_test3, y_test1)

print("Multinomial NB untuk X3 dan Y1 :", score)
print()

classifier_mnb.fit(X_train1, y_train2)
score = classifier_mnb.score(X_test1, y_test2)

print("Multinomial NB untuk X1 dan Y2 :", score)

classifier_mnb.fit(X_train2, y_train2)
score = classifier_mnb.score(X_test2, y_test2)

print("Multinomial NB untuk X2 dan Y2 :", score)

classifier_mnb.fit(X_train3, y_train2)
score = classifier_mnb.score(X_test3, y_test2)

print("Multinomial NB untuk X3 dan Y2 :", score)

Confidence untuk X1 dan Y1: 0.7941453627761185
Multinomial NB untuk X1 dan Y1 : 0.8441558441558441

Confidence untuk X2 dan Y1: 0.7910643887039664
Multinomial NB untuk X2 dan Y1 : 0.8571428571428571

Confidence untuk X3 dan Y1: 0.7953197078529025
Multinomial NB untuk X3 dan Y1 : 0.8441558441558441

Multinomial NB untuk X1 dan Y2 : 0.9090909090909091
Multinomial NB untuk X2 dan Y2 : 0.922077922077922
Multinomial NB untuk X3 dan Y2 : 0.922077922077922


#### Random Forest

In [16]:
from sklearn.ensemble import RandomForestClassifier


classifier_rf = RandomForestClassifier()

classifier_rf.fit(X_train1, y_train1)

# Cross Validation
classifier_rf.fit(X_train1, y_train1)
scores = cross_val_score(classifier_rf, X_train1, y_train1, cv=5)
print("Confidence untuk X1 dan Y1:", scores.mean())

score = classifier_rf.score(X_test1, y_test1)

print("Random Forest untuk X1 dan Y1 :", score)
print()

classifier_rf.fit(X_train2, y_train1)

# Cross Validation
classifier_rf.fit(X_train2, y_train1)
scores = cross_val_score(classifier_rf, X_train2, y_train1, cv=5)
print("Confidence untuk X2 dan Y1:", scores.mean())

score = classifier_rf.score(X_test2, y_test1)

print("Random Forest untuk X2 dan Y1 :", score)
print()

classifier_rf.fit(X_train3, y_train1)

# Cross Validation
classifier_rf.fit(X_train3, y_train1)
scores = cross_val_score(classifier_rf, X_train3, y_train1, cv=5)
print("Confidence untuk X3 dan Y1:", scores.mean())

score = classifier_rf.score(X_test3, y_test1)

print("Random Forest untuk X3 dan Y1 :", score)
print()

classifier_rf.fit(X_train1, y_train2)
score = classifier_rf.score(X_test1, y_test2)

print("Random Forest untuk X1 dan Y2 :", score)

classifier_rf.fit(X_train2, y_train2)
score = classifier_rf.score(X_test2, y_test2)

print("Random Forest untuk X2 dan Y2 :", score)

classifier_rf.fit(X_train3, y_train2)
score = classifier_rf.score(X_test3, y_test2)

print("Random Forest untuk X3 dan Y2 :", score)

Confidence untuk X1 dan Y1: 0.4192019011097898
Random Forest untuk X1 dan Y1 : 0.6493506493506493

Confidence untuk X2 dan Y1: 0.4028880894980381
Random Forest untuk X2 dan Y1 : 0.6363636363636364

Confidence untuk X3 dan Y1: 0.5006723888779379
Random Forest untuk X3 dan Y1 : 0.5714285714285714

Random Forest untuk X1 dan Y2 : 0.7272727272727273
Random Forest untuk X2 dan Y2 : 0.7402597402597403
Random Forest untuk X3 dan Y2 : 0.7142857142857143


#### SVM

In [17]:
from sklearn.svm import SVC

classifier_svc = SVC(gamma='auto')

classifier_svc.fit(X_train1, y_train1)

# Cross Validation
classifier_svc.fit(X_train1, y_train1)
scores = cross_val_score(classifier_svc, X_train1, y_train1, cv=5)
print("Confidence untuk X1 dan Y1:", scores.mean())

score = classifier_svc.score(X_test1, y_test1)

print("SVC untuk X1 dan Y1 :", score)
print()

classifier_svc.fit(X_train2, y_train1)

# Cross Validation
classifier_svc.fit(X_train2, y_train1)
scores = cross_val_score(classifier_svc, X_train2, y_train1, cv=5)
print("Confidence untuk X2 dan Y1:", scores.mean())

score = classifier_svc.score(X_test2, y_test1)

print("SVC untuk X2 dan Y1 :", score)
print()

classifier_svc.fit(X_train3, y_train1)

# Cross Validation
classifier_svc.fit(X_train3, y_train1)
scores = cross_val_score(classifier_svc, X_train3, y_train1, cv=5)
print("Confidence untuk X3 dan Y1:", scores.mean())

score = classifier_svc.score(X_test3, y_test1)

print("SVC untuk X3 dan Y1 :", score)
print()

classifier_svc.fit(X_train1, y_train2)
score = classifier_svc.score(X_test1, y_test2)

print("SVC untuk X1 dan Y2 :", score)

classifier_svc.fit(X_train2, y_train2)
score = classifier_svc.score(X_test2, y_test2)

print("SVC untuk X2 dan Y2 :", score)

classifier_svc.fit(X_train3, y_train2)
score = classifier_svc.score(X_test3, y_test2)

print("SVC untuk X3 dan Y2 :", score)

Confidence untuk X1 dan Y1: 0.12148715149350578
SVC untuk X1 dan Y1 : 0.03896103896103896

Confidence untuk X2 dan Y1: 0.12148715149350578
SVC untuk X2 dan Y1 : 0.05194805194805195

Confidence untuk X3 dan Y1: 0.12148715149350578
SVC untuk X3 dan Y1 : 0.03896103896103896

SVC untuk X1 dan Y2 : 0.6493506493506493
SVC untuk X2 dan Y2 : 0.6623376623376623
SVC untuk X3 dan Y2 : 0.6493506493506493


#### AdaBoost

In [20]:
from sklearn.ensemble import AdaBoostClassifier

classifier_ada = AdaBoostClassifier()

classifier_ada.fit(X_train1, y_train1)

# Cross Validation
classifier_ada.fit(X_train1, y_train1)
scores = cross_val_score(classifier_ada, X_train1, y_train1, cv=5)
print("Confidence untuk X1 dan Y1:", scores.mean())

score = classifier_ada.score(X_test1, y_test1)

print("AdaBoost untuk X1 dan Y1 :", score)
print()

classifier_ada.fit(X_train2, y_train1)

# Cross Validation
classifier_ada.fit(X_train2, y_train1)
scores = cross_val_score(classifier_ada, X_train2, y_train1, cv=5)
print("Confidence untuk X2 dan Y1:", scores.mean())

score = classifier_ada.score(X_test2, y_test1)

print("AdaBoost untuk X2 dan Y1 :", score)
print()

classifier_ada.fit(X_train3, y_train1)

# Cross Validation
classifier_ada.fit(X_train3, y_train1)
scores = cross_val_score(classifier_ada, X_train3, y_train1, cv=5)
print("Confidence untuk X3 dan Y1:", scores.mean())

score = classifier_ada.score(X_test3, y_test1)

print("AdaBoost untuk X3 dan Y1 :", score)
print()

classifier_ada.fit(X_train1, y_train2)
score = classifier_ada.score(X_test1, y_test2)

print("AdaBoost untuk X1 dan Y2 :", score)

classifier_ada.fit(X_train2, y_train2)
score = classifier_ada.score(X_test2, y_test2)

print("AdaBoost untuk X2 dan Y2 :", score)

classifier_ada.fit(X_train3, y_train2)
score = classifier_ada.score(X_test3, y_test2)

print("AdaBoost untuk X3 dan Y2 :", score)

Confidence untuk X1 dan Y1: 0.18479070385437413
AdaBoost untuk X1 dan Y1 : 0.15584415584415584

Confidence untuk X2 dan Y1: 0.17503460629339854
AdaBoost untuk X2 dan Y1 : 0.15584415584415584

Confidence untuk X3 dan Y1: 0.18479070385437413
AdaBoost untuk X3 dan Y1 : 0.15584415584415584

AdaBoost untuk X1 dan Y2 : 0.6753246753246753
AdaBoost untuk X2 dan Y2 : 0.6623376623376623
AdaBoost untuk X3 dan Y2 : 0.6233766233766234


#### Decision Tree

In [22]:
from sklearn.tree import DecisionTreeClassifier

classifier_dt = DecisionTreeClassifier()

classifier_dt.fit(X_train1, y_train1)

# Cross Validation
classifier_dt.fit(X_train1, y_train1)
scores = cross_val_score(classifier_dt, X_train1, y_train1, cv=5)
print("Confidence untuk X1 dan Y1:", scores.mean())

score = classifier_dt.score(X_test1, y_test1)

print("Decision Tree untuk X1 dan Y1 :", score)
print()

classifier_dt.fit(X_train2, y_train1)

# Cross Validation
classifier_dt.fit(X_train2, y_train1)
scores = cross_val_score(classifier_dt, X_train2, y_train1, cv=5)
print("Confidence untuk X2 dan Y1:", scores.mean())

score = classifier_dt.score(X_test2, y_test1)

print("Decision Tree untuk X2 dan Y1 :", score)
print()

classifier_dt.fit(X_train3, y_train1)

# Cross Validation
classifier_dt.fit(X_train3, y_train1)
scores = cross_val_score(classifier_dt, X_train3, y_train1, cv=5)
print("Confidence untuk X3 dan Y1:", scores.mean())

score = classifier_dt.score(X_test3, y_test1)

print("Decision Tree untuk X3 dan Y1 :", score)
print()

classifier_dt.fit(X_train1, y_train2)
score = classifier_dt.score(X_test1, y_test2)

print("Decision Tree untuk X1 dan Y2 :", score)

classifier_dt.fit(X_train2, y_train2)
score = classifier_dt.score(X_test2, y_test2)

print("Decision Tree untuk X2 dan Y2 :", score)

classifier_dt.fit(X_train3, y_train2)
score = classifier_dt.score(X_test3, y_test2)

print("Decision Tree untuk X3 dan Y2 :", score)

Confidence untuk X1 dan Y1: 0.5338493946004796
Decision Tree untuk X1 dan Y1 : 0.5324675324675324

Confidence untuk X2 dan Y1: 0.6188546284203601
Decision Tree untuk X2 dan Y1 : 0.5974025974025974

Confidence untuk X3 dan Y1: 0.5401433353231625
Decision Tree untuk X3 dan Y1 : 0.5844155844155844

Decision Tree untuk X1 dan Y2 : 0.7662337662337663
Decision Tree untuk X2 dan Y2 : 0.8311688311688312
Decision Tree untuk X3 dan Y2 : 0.8311688311688312


### Hyperparamter Tuning

Hal ini untuk memaksimalkan nilai klasifikasi untuk model dengan log regression dan multinomial naive bayes, dengan fitur x yang sudah di stemming dan label daftar fakultas

In [105]:
dual=[True,False]
max_iter=[50,100,110,120,130,140,150]
solver=['liblinear']
param_grid = dict(dual=dual,max_iter=max_iter, solver=solver)

In [106]:
import time
from sklearn.model_selection import GridSearchCV

grid = GridSearchCV(estimator=classifier_log, param_grid=param_grid, cv = 5, n_jobs=-1)

start_time = time.time()
grid_result = grid.fit(X_train2, y_train1)
# Summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
print("Execution time: " + str((time.time() - start_time)) + ' ms')



Best: 0.818182 using {'dual': True, 'max_iter': 50, 'solver': 'liblinear'}
Execution time: 7.903137445449829 ms


In [108]:
new_clf_log = LogisticRegression(dual= True, max_iter= 50,solver='liblinear',multi_class='ovr')
new_clf_log.fit(X_train2, y_train1)
score = new_clf_log.score(X_test2, y_test1)
print("New Accuracy for Log Regression : ", score)



New Accuracy for Log Regression :  0.8831168831168831


In [109]:
from sklearn.model_selection import RandomizedSearchCV

random = RandomizedSearchCV(estimator=classifier_log, param_distributions=param_grid, cv = 5, n_jobs=-1)

start_time = time.time()
random_result = random.fit(X_train2, y_train1)
# Summarize results
print("Best: %f using %s" % (random_result.best_score_, random_result.best_params_))
print("Execution time: " + str((time.time() - start_time)) + ' ms')



Best: 0.818182 using {'solver': 'liblinear', 'max_iter': 100, 'dual': True}
Execution time: 2.326324701309204 ms


In [110]:
new_clf_log = LogisticRegression(dual= True, max_iter= 140)
new_clf_log.fit(X_train2, y_train1)
score = new_clf_log.score(X_test2, y_test1)
print("New Accuracy for Log Regression : ", score)



New Accuracy for Log Regression :  0.8831168831168831
