In [None]:
# Importing necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
nltk.download('omw-1.4')
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
import pickle

In [None]:
# Load and clean data
dataframe = pd.read_csv('newanswers.csv', index_col=False, header=None)
x_train = dataframe[0].values
y_train = dataframe[1].values
x_test = ['I keep care of not leaving my belongings anywhere', 'I try to not leave things anywhere', "I don't agree about this", 'I sometimes leave things around', 'I always leave things around']
y_test = [5, 4, 2, 4, 1]

In [None]:
# Define stopwords and punctuation
stopwords = set(nltk.corpus.stopwords.words('english'))
punctuation = set("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~")

In [None]:
# Function to clean text
def clean(text):
    wn = nltk.WordNetLemmatizer()
    tokens = nltk.word_tokenize(text)
    lower = [word.lower() for word in tokens]
    no_stopwords = [word for word in lower if word not in stopwords]
    no_punctuation = [word for word in no_stopwords if word not in punctuation]
    lemm_text = [wn.lemmatize(word) for word in no_punctuation]
    return lemm_text

In [None]:
# Function to preprocess text
def newpre(df):
    xnew = []
    for i in df:
        xnew.append(' '.join(i))
    return xnew
x_clean = [clean(i) for i in x_train]
xnewclean = newpre(x_clean)
x_testc = [clean(i) for i in x_test]
extestnewclean = newpre(x_testc)

In [None]:
# Vectorize text
cv = TfidfVectorizer(ngram_range=(1,2))
x_vec = cv.fit_transform(xnewclean).toarray()
xt_vec = cv.transform(extestnewclean).toarray()

In [None]:
# Function to predict using Naive Bayes
def predictposneg(df, train, test, test_val):
    mn = MultinomialNB()
    mn.fit(df, train)
    y_pred = mn.predict(test)
    filename = 'finalized_model.sav'
    pickle.dump(mn, open(filename, 'wb'))
    acc = accuracy_score(test_val, y_pred)
    return y_pred, acc
y_pred, acc = predictposneg(x_vec, y_train, xt_vec, y_test)
print(y_pred, acc)

In [None]:
# Example scores
scores = [5, 1, 2, 5, 2, 1, 5, 4, 2, 3, 4, 3, 1, 5, 2]
fac = {'Openness': 12, 'Conscientiousness': 14, 'Extraversion': 10, 'Agreeableness': 8, 'Neuroticism': 11}
print('Check your evaluation below')
print('Openness:', fac['Openness'])
print('Conscientiousness:', fac['Conscientiousness'])
print('Extraversion:', fac['Extraversion'])
print('Agreeableness:', fac['Agreeableness'])
print('Neuroticism:', fac['Neuroticism'])
print('I hope you agree')

In [None]:
# Visualizing the Results
results = {'y_test': y_test, 'y_pred': y_pred}
df_results = pd.DataFrame(results)
sns.heatmap(df_results.corr(), annot=True)
plt.show()

In [None]:
# Saving the model
def save_model(model, filename):
    with open(filename, 'wb') as file:
        pickle.dump(model, file)
save_model(mn, 'naive_bayes_model.pkl')
print('Model saved as naive_bayes_model.pkl')

In [None]:
# Loading the model
def load_model(filename):
    with open(filename, 'rb') as file:
        model = pickle.load(file)
    return model
model = load_model('naive_bayes_model.pkl')
print('Model loaded from naive_bayes_model.pkl')