In [27]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score
from openpyxl import load_workbook

In [28]:
# Load the Excel data
file_path = 'dokumen classification.xlsx'
wb = load_workbook(file_path)
sheet = wb.active

In [29]:
# Convert Excel data to pandas dataframe
data = sheet.values
columns = next(data)
df = pd.DataFrame(data, columns=columns)

In [30]:
# Preprocess the data
X = df['text'].values
y = df['class'].values

In [31]:
# Convert text data into numerical features
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(X)

In [32]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [33]:
# Train the KNN model
k = 2  # Choose the number of neighbors
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)

In [34]:
# Make predictions
y_pred = knn.predict(X_test)

In [35]:
# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           A       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1



In [36]:
# Function for prediction testing
def predict_new_text(new_text, vectorizer, knn_model):
    # Preprocess the new text data
    new_text_features = vectorizer.transform([new_text])
    
    # Make predictions
    predicted_label = knn_model.predict(new_text_features)
    
    return predicted_label[0]


In [37]:
# Example usage of the predict_new_text function
new_text = "bus bus"
predicted_label = predict_new_text(new_text, vectorizer, knn)
print("Predicted Label:", predicted_label)


Predicted Label: B


In [38]:
print("Predicted Label:", predicted_label)

Predicted Label: B


# NAIVE BAYES ALGORITHM

In [39]:
from sklearn.naive_bayes import MultinomialNB

In [40]:
# Train the Naive Bayes model
naive_bayes = MultinomialNB()
naive_bayes.fit(X_train, y_train)

In [41]:
# Make predictions
y_pred = naive_bayes.predict(X_test)

In [42]:
# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           A       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1



In [24]:
# Function for prediction testing
def predict_new_text_nb(new_text, vectorizer, nb_model):
    # Preprocess the new text data
    new_text_features = vectorizer.transform([new_text])
    
    # Make predictions
    predicted_label = nb_model.predict(new_text_features)
    
    return predicted_label[0]

In [45]:
# Example usage of the predict_new_text_nb function
new_text = "bus sawah"
predicted_label = predict_new_text_nb(new_text, vectorizer, naive_bayes)

print("Predicted Label:", predicted_label)

Predicted Label: B


# SVM ALGORITHM

In [47]:
from sklearn.svm import SVC

In [48]:
# Train the SVM model
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)

# Make predictions
y_pred = svm_model.predict(X_test)

In [49]:
# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           A       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1



In [50]:
# Function for prediction testing
def predict_new_text_svm(new_text, vectorizer, svm_model):
    # Preprocess the new text data
    new_text_features = vectorizer.transform([new_text])
    
    # Make predictions
    predicted_label = svm_model.predict(new_text_features)
    
    return predicted_label[0]

In [51]:
# Example usage of the predict_new_text_svm function
new_text = "nasi goreng"
predicted_label = predict_new_text_svm(new_text, vectorizer, svm_model)

print("Predicted Label:", predicted_label)

Predicted Label: A
