In [1]:
import pandas as pd
import nltk
import string
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pickle

In [2]:
# Download necessary NLTK resources
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\prabo\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\prabo\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [3]:
# Function for text preprocessing
def preprocess_text(text):
    # Remove punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))

    # Convert to lowercase
    text = text.lower()

    # Tokenize the text
    tokens = text.split()

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]

    # Lemmatization
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]

    # Join the tokens back into a single string
    preprocessed_text = ' '.join(tokens)

    return preprocessed_text

In [4]:
# Load the stress dataset
df = pd.read_csv('Stress.csv')  # Replace with the path to the downloaded dataset


In [5]:
# Preprocess the text data
df['text'] = df['text'].apply(preprocess_text)

In [6]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test, sentence_range_train, sentence_range_test = train_test_split(
    df['text'], df['label'], df['sentence_range'], test_size=0.2, random_state=42
)

In [7]:
# Vectorize the preprocessed text data using TF-IDF vectorizer
vectorizer = TfidfVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

In [8]:
# Train an SVM classifier for multi-class classification
model = SVC(decision_function_shape='ovr', probability=True)
model.fit(X_train_vectorized, y_train)

In [9]:
# Evaluate the model
y_pred_train = model.predict(X_train_vectorized)
train_accuracy = accuracy_score(y_train, y_pred_train)
print("Training Accuracy:", train_accuracy)


Training Accuracy: 0.9933920704845814


In [10]:
y_pred_test = model.predict(X_test_vectorized)
test_accuracy = accuracy_score(y_test, y_pred_test)
print("Testing Accuracy:", test_accuracy)


Testing Accuracy: 0.7359154929577465


In [11]:

# Define the stress level mapping
stress_level_mapping = {
    0: "Neutral",
    1: "Stressed",
   
}

In [12]:
# Get user input
user_input = input("Enter your feedback : ")

In [13]:
# Preprocess the user input
preprocessed_input = preprocess_text(user_input)


In [14]:
# Vectorize the preprocessed user input
user_input_vectorized = vectorizer.transform([preprocessed_input])

In [15]:
# Predict stress level and confidence score for user input
predicted_label = model.predict(user_input_vectorized)[0]
confidence = model.predict_proba(user_input_vectorized)[0][predicted_label]
predicted_sentence_range = sentence_range_train.unique()[predicted_label]

In [16]:
# Map the predicted label to the corresponding stress level category
predicted_stress_level = stress_level_mapping[predicted_label]

In [17]:
# Print the predicted stress level, predicted sentence range, and confidence score
print("Predicted Stress Level:", predicted_stress_level)
print("Predicted Sentence Range:", predicted_sentence_range)
print("Confidence Score:", confidence)

Predicted Stress Level: No Stress
Predicted Sentence Range: (30, 35)
Confidence Score: 0.9360805004981141


In [18]:


# Save the model
with open('stress_prediction_by_txt.pkl', 'wb') as file:
    pickle.dump(model, file)
