In [7]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import numpy as np
import json
# Define the data
data = '''
[
  {
    "id": 2,
    "question": "How would you describe your overall mood today ?",
    "scale": 0,
    "result": 3.0,
    "sprint_no": 0,
    "user_profile_id": 1
  },
  {
    "id": 3,
    "question": "When you think about the future, which emoji best represents your feelings?",
    "scale": 0,
    "result": 4.0,
    "sprint_no": 0,
    "user_profile_id": 1
  },
  {
    "id": 4,
    "question": "How would you rate your level of optimism right now?",
    "scale": 0,
    "result": 5.0,
    "sprint_no": 0,
    "user_profile_id": 1
  },
  {
    "id": 5,
    "question": "Reflecting on recent events, which emoji aligns with your emotions?",
    "scale": 0,
    "result": 4.0,
    "sprint_no": 0,
    "user_profile_id": 1
  },
  {
    "id": 6,
    "question": "Considering your current circumstances, how do you feel on the emoji scale?",
    "scale": 0,
    "result": 3.0,
    "sprint_no": 0,
    "user_profile_id": 1
  },
  {
    "id": 7,
    "question": "When you wake up in the morning, which emoji best captures your initial mood?",
    "scale": 0,
    "result": 1.0,
    "sprint_no": 0,
    "user_profile_id": 1
  },
  {
    "id": 8,
    "question": "In challenging situations, which emoji represents your typical reaction?",
    "scale": 0,
    "result": 1.0,
    "sprint_no": 0,
    "user_profile_id": 1
  },
  {
    "id": 9,
    "question": "How would you describe your emotional state regarding personal relationships?",
    "scale": 0,
    "result": 2.0,
    "sprint_no": 0,
    "user_profile_id": 1
  },
  {
    "id": 10,
    "question": "If your mood were a weather forecast, which emoji would symbolize it best?",
    "scale": 0,
    "result": 4.0,
    "sprint_no": 0,
    "user_profile_id": 1
  },
  {
    "id": 11,
    "question": "How would you rate your energy levels today?",
    "scale": 0,
    "result": 4.0,
    "sprint_no": 1,
    "user_profile_id": 1
  }
]
'''

# Parse JSON
responses = json.loads(data)

# Extract features and labels
X = [response['question'] for response in responses]
y = [response['result'] for response in responses]

# Convert y values to sentiment labels
sentiment_values = {1: 'saddest', 2: 'sad', 3: 'neutral', 4: 'happy', 5: 'happiest'}
y = [sentiment_values[int(value)] for value in y]

# Vectorize the text data
vectorizer = CountVectorizer()
X_vectorized = vectorizer.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)

# Train a logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

# Predict sentiment for all responses
all_predictions = model.predict(X_vectorized)
predicted_sentiments = {sentiment_values[int(response['result'])]: prediction for response, prediction in zip(responses, all_predictions)}

# Print predicted sentiments for each question
for response, sentiment in zip(responses, all_predictions):
    print(f"Question: {response['question']}")
    print(f"Predicted sentiment: {sentiment}\n")



Accuracy: 0.0
Question: How would you describe your overall mood today ?
Predicted sentiment: neutral

Question: When you think about the future, which emoji best represents your feelings?
Predicted sentiment: saddest

Question: How would you rate your level of optimism right now?
Predicted sentiment: happiest

Question: Reflecting on recent events, which emoji aligns with your emotions?
Predicted sentiment: happy

Question: Considering your current circumstances, how do you feel on the emoji scale?
Predicted sentiment: neutral

Question: When you wake up in the morning, which emoji best captures your initial mood?
Predicted sentiment: saddest

Question: In challenging situations, which emoji represents your typical reaction?
Predicted sentiment: saddest

Question: How would you describe your emotional state regarding personal relationships?
Predicted sentiment: sad

Question: If your mood were a weather forecast, which emoji would symbolize it best?
Predicted sentiment: saddest

Quest

In [9]:
from collections import Counter

# Calculate the most common sentiment
counter = Counter(all_predictions)
most_common_sentiment = counter.most_common(1)[0][0]

# Convert sentiment back to the original scale
reverse_sentiment_values = {v: k for k, v in sentiment_values.items()}
final_sentiment = reverse_sentiment_values[most_common_sentiment]

print(f"Overall sentiment: {final_sentiment}")

# Convert sentiment label to corresponding value
most_common_sentiment_value = reverse_sentiment_values[most_common_sentiment]

# Map most common sentiment value to sentiment labels
final_sentiment_label = sentiment_values[most_common_sentiment_value]

print(f"Overall sentiment: {final_sentiment_label}")




Overall sentiment: 1
Overall sentiment: saddest


In [6]:
# import json
# import numpy as np
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import LabelEncoder, OneHotEncoder
# from sklearn.compose import ColumnTransformer
# from sklearn.linear_model import LogisticRegression
# from sklearn.metrics import accuracy_score

# # Load the dataset from the JSON file
# with open("mental_health_survey.json", "r") as f:
#     dataset = json.load(f)

# # Extract features and labels
# X = []
# y = []
# for entry in dataset:
#     X_entry = []
#     for question, answer in entry.items():
#         X_entry.append(answer)
#     X.append(X_entry[:-1])  # Exclude the last question which is the target variable
#     y.append(X_entry[-1])

# # Convert X and y to numpy arrays
# X = np.array(X)
# y = np.array(y)

# # Encode the categorical target variable y
# label_encoder = LabelEncoder()
# y_encoded = label_encoder.fit_transform(y)

# # Define the column transformer to perform one-hot encoding on categorical features
# # Assuming all features are categorical (answers to survey questions)
# column_transformer = ColumnTransformer(
#     [("onehot", OneHotEncoder(), [i for i in range(X.shape[1])])],
#     remainder="passthrough"
# )

# # Transform X to perform one-hot encoding
# X_encoded = column_transformer.fit_transform(X)

# # Split the dataset into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X_encoded, y_encoded, test_size=0.2, random_state=42)

# # Train a logistic regression model
# logreg = LogisticRegression(max_iter=100000)
# logreg.fit(X_train, y_train)

# # Make predictions on the test set
# y_pred = logreg.predict(X_test)

# # Calculate accuracy
# accuracy = accuracy_score(y_test, y_pred)
# print("Accuracy:", accuracy)


Accuracy: 0.378
