In [1]:
import pandas as pd
from keras.preprocessing.sequence import pad_sequences
from keras.models import load_model
import joblib
import nltk
from nltk.corpus import stopwords
from keras.utils import to_categorical
from nltk.stem import WordNetLemmatizer






In [2]:
objects = joblib.load('tokenizer_label_encoder.joblib')
tokenizer = objects['tokenizer']
label_encoder = objects['target_label_encoder']

In [3]:
model5 = load_model('my_model5.h5')
model10 = load_model('my_model10.h5')
model20 = load_model('my_model20.h5')
model30 = load_model('my_model30.h5')
model40 = load_model('my_model40.h5')




In [4]:
def preprocess_text(text):
    tokens = nltk.word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [token for token in tokens if token not in stop_words]
    lemmatizer = WordNetLemmatizer()
    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in filtered_tokens]
    return ' '.join(lemmatized_tokens)

In [5]:
test_data = pd.read_csv('test.csv')  
test_data

Unnamed: 0,Headline,Short Description,Author,Category
0,WHO Says All Countries Should Tax Sugary Drink...,A 20 percent tax could reduce the consumption ...,"Stephanie Nebehay, Reuters",HEALTHY LIVING
1,This Fake Town Exists Solely To Test Driverles...,Self-driving vehicles now have a neighborhood ...,Jenny Che,BUSINESS
2,"15 Different, Delicious Ways To Interpret Gree...","Pizza, pasta, grilled cheese... you name it.",Julie R. Thomson,FOOD & DRINK
3,Two Steps to Rebooting Your Resolutions,"Real change can be hard to come by, and it's t...","Heidi Grant Halvorson, Ph.D., Contributor\nAut...",WELLNESS
4,Georgia State Senator Under Fire For Photo Wit...,"Republican Michael Williams spoke at the ""Marc...",Carla Herreria,POLITICS
...,...,...,...,...
31367,The Great Cholesterol Myth,"The ""Great Cholesterol Myth"" has been the foun...","Dr. Jonny Bowden, Contributor\nPh.D., C.N.S.",WELLNESS
31368,7 Times The NBA Suspended A Player For Doing W...,Let's see if the league gives the Golden State...,Juliet Spies-Gans,SPORTS
31369,Complementary And Alternative Medicine Use Com...,"""This is something that should be discussed op...",Catherine Pearson,PARENTING
31370,An Art Show Dedicated Entirely To Hot Sauce Is...,"Los Angeles Grocery by Patrick Martinez A new,...",Priscilla Frank,CULTURE & ARTS


In [6]:
test_data['combined_features'] = test_data[['Headline', 'Short Description', 'Author']].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)
test_data['combined_features'] = test_data['combined_features'].apply(preprocess_text)

In [7]:
max_sequence_length = 50  
X_test_seq = tokenizer.texts_to_sequences(test_data['combined_features'])
X_test_padded = pad_sequences(X_test_seq, maxlen=max_sequence_length)

In [8]:
y_test_encoded = label_encoder.transform(test_data['Category'])
num_classes = len(label_encoder.classes_)
y_test_categorical = to_categorical(y_test_encoded, num_classes=num_classes)

In [9]:
accuracy_5 = model5.evaluate(X_test_padded, y_test_categorical)[1]
accuracy_10 = model10.evaluate(X_test_padded, y_test_categorical)[1]
accuracy_20 = model20.evaluate(X_test_padded, y_test_categorical)[1]
accuracy_30 = model30.evaluate(X_test_padded, y_test_categorical)[1]
accuracy_40 = model40.evaluate(X_test_padded, y_test_categorical)[1]




In [10]:
print(f"Test Accuracy for Model5: {accuracy_5 * 100:.2f}%")
print(f"Test Accuracy for Model10: {accuracy_10 * 100:.2f}%")
print(f"Test Accuracy for Model20: {accuracy_20 * 100:.2f}%")
print(f"Test Accuracy for Model30: {accuracy_30 * 100:.2f}%")
print(f"Test Accuracy for Model40: {accuracy_40 * 100:.2f}%")

Test Accuracy for Model5: 74.69%
Test Accuracy for Model10: 73.67%
Test Accuracy for Model20: 73.14%
Test Accuracy for Model30: 73.34%
Test Accuracy for Model40: 73.23%
