In [None]:
# Importing necessary libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import re
import nltk
import pickle

In [None]:
# Download necessary NLTK data
nltk.download('omw-1.4')

In [None]:
# Reading the dataset
dataframe = pd.read_csv('newanswers.csv', index_col=False, header=None)
x_train = dataframe[0].values
y_train = dataframe[1].values

In [None]:
# Test data
x_test = ['I keep care of not leaving my belongings anywhere',
           'I try to not leave things anywhere',
           "I don't agree about this",
           'I sometimes leave things around',
           'I always leave things around']
y_test = [5, 4, 2, 4, 1]

In [None]:
# Contractions dictionary
contractions = { 'ain't': 'am not', 'aren't': 'are not', "can't": 'cannot', 'couldn't': 'could not', 'didn't': 'did not', 'doesn't': 'does not', 'don't': 'do not', 'hadn't': 'had not', 'hasn't': 'has not', 'haven't': 'have not', 'he'd': 'he would', 'he'll': 'he will', 'he's': 'he is', 'i'd': 'I would', 'i'll': 'I will', 'i'm': 'I am', 'i've': 'I have', 'isn't': 'is not', 'it'd': 'it would', 'it'll': 'it will', 'it's': 'it is', 'let's': 'let us', 'ma'am': 'madam', 'mightn't': 'might not', 'mustn't': 'must not', 'shan't': 'shall not', "she'd": 'she would', "she'll": 'she will', "she's": 'she is', "should've": 'should have', "shouldn't": 'should not', "so've": 'so have', "that's": 'that is', "there's": 'there is', "they'd": 'they would', "they'll": 'they will', "they're": 'they are', "they've": 'they have', "wasn't": 'was not', "we'd": 'we would', "we'll": 'we will', "we're": 'we are', "we've": 'we have', "weren't": 'were not', "what's": 'what is', "where's": 'where is', "who's": 'who is', "won't": 'will not', "would've": 'would have', "wouldn't": 'would not', "y'all": 'you all', "you'd": 'you would', "you'll": 'you will', "you're": 'you are', "you've": 'you have'}

In [None]:
# Function to change contractions
def changecontractions(text):
    words = text.split()
    for i in range(len(words)):
        if words[i] in contractions.keys():
            words[i] = contractions[words[i]]
    words = ' '.join(words)
    return words

In [None]:
# Stopwords
stopword = nltk.corpus.stopwords.words('english')
print(stopword)

In [None]:
# Text cleaning function
def clean(text):
    wn = nltk.WordNetLemmatizer()
    stopword = nltk.corpus.stopwords.words('english')
    text = changecontractions(text)
    tokens = nltk.word_tokenize(text)
    lower = [word.lower() for word in tokens]
    for i in stopword:
        if i == 'no' or i == 'not' or i == 'nor':
            stopword.remove(i)
    no_stopwords = [word for word in lower if word not in stopword]
    no_alpha = [word for word in no_stopwords if word.isalpha()]
    lemm_text = [wn.lemmatize(word) for word in no_alpha]
    clean_text = lemm_text
    return clean_text

In [None]:
# Clean and preprocess the training data
x_clean = [clean(i) for i in x_train]
def newpre(df):
    xnew = []
    for i in df:
        xnew.append(' '.join(i))
    return xnew
xnewclean = newpre(x_clean)

In [None]:
# Clean and preprocess the test data
x_testc = [clean(i) for i in x_test]
xtestnewclean = newpre(x_testc)

In [None]:
# Vectorize the text data
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(ngram_range=(1,2))
x_vec = cv.fit_transform(xnewclean).toarray()
xt_vec = cv.transform(xtestnewclean).toarray()

In [None]:
# Function to extract features
def feature(df):
    cv = CountVectorizer(ngram_range=(1,2))
    x_vec = cv.fit_transform(df).toarray()
    return x_vec
x_vect = feature(xnewclean)

In [None]:
# Train and predict using MultinomialNB
def predictposneg(df, train, test, test_val):
    from sklearn.naive_bayes import MultinomialNB
    from sklearn.metrics import accuracy_score
    mn = MultinomialNB()
    mn.fit(df, train)
    y_pred = mn.predict(test)
    filename = 'finalized_model.sav'
    pickle.dump(mn, open(filename, 'wb'))
    acc = accuracy_score(test_val, y_pred)
    return y_pred, acc
y_pred, acc = predictposneg(x_vect, y_train, xt_vec, y_test)
print(y_pred, acc)

In [None]:
# Function to get list from CSV
def get_list(csvname):
    df = pd.read_csv(csvname)
    questions = list(df['questions'])
    factors = list(df['factors'])
    direction = list(df['direction'])
    return questions, factors, direction

In [None]:
# Function to evaluate scores
def evaluate(scores):
    questions, factors, direction = get_list('big-five.csv')
    l = len(scores)
    questions, factors, direction = questions[:l], factors[:l], direction[:l]
    uni_fac = set(factors)
    fac = {}
    for i in uni_fac:
        fac[i] = []
    for i in range(l):
        if direction[i] == '-':
            scores[i] = -scores[i]
        fac[factors[i]].append(scores[i])
    for i in fac.keys():
        fac[i] = sum(fac[i])
    return fac

In [None]:
# Function to get personality
def get_personality(scores, length):
    df = pd.read_csv('personality.csv')
    mins, maxs = length / len(scores.keys()), length
    mid = (mins + maxs) // 2
    response = []
    for i in scores.keys():
        if scores[i] < mid:
            res = list(df[(df['factor'] == i) & (df['score'] == 'L')]['response'])
            response.append(res[0])
        else:
            res = list(df[(df['factor'] == i) & (df['score'] == 'H')]['response'])
            response.append(res[0])
    return response

In [None]:
# Evaluate and get personality scores
scores = [5, 1, 2, 5, 2, 1, 5, 4, 2, 3, 4, 3, 1, 5, 2]
fac = evaluate(scores)
print('Check your evaluation below\n')
response = get_personality(fac, len(scores))
print(''.join(response))
print('I hope you agree')