## Training the dataset for questions

In [3]:
import json
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Load the JSON dataset
with open('primate_dataset.json', 'r') as file:
    data = json.load(file)

# Prepare Data
X = [entry['post_text'] for entry in data]

annotations = [[annotation[0] for annotation in entry['annotations']] for entry in data]
answers = [[annotation[1] for annotation in entry['annotations']] for entry in data]

# Vectorize Text Data
vectorizer = CountVectorizer()
X_vectorized = vectorizer.fit_transform(X)

# Split the Data
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, answers, test_size=0.2, random_state=42)



## Evaluation Metrics
## Model Used:- Decision Tree

In [4]:
models = []
train_auc_roc_scores = []
test_auc_roc_scores = []

for i in range(len(annotations[0])):
    model = DecisionTreeClassifier()
    y_train_question = [answer[i] for answer in y_train]
    
    model.fit(X_train, y_train_question)
    models.append(model)

    # Predict probabilities for the training set
    y_train_prob = model.predict_proba(X_train)[:, 1]  
    train_auc_roc = roc_auc_score(y_train_question, y_train_prob)
    
    # Predict probabilities for the test set
    y_test_question = [answer[i] for answer in y_test]
    y_test_prob = model.predict_proba(X_test)[:, 1] 
    test_auc_roc = roc_auc_score(y_test_question, y_test_prob)
    
    train_auc_roc_scores.append(train_auc_roc)
    test_auc_roc_scores.append(test_auc_roc)

# Step 6: Evaluate the Models
#for accuracy
train_accuracies = []
test_accuracies = []
#for precision
train_precisions = []
test_precisions = []
#for recalls
train_recalls = []
test_recalls = []
#for f1_score
train_f1_scores = []
test_f1_scores = []

for i, model in enumerate(models):
    y_train_question = [answer[i] for answer in y_train]
    y_test_question = [answer[i] for answer in y_test]
    
    # Training metrics
    y_train_pred = model.predict(X_train)
    train_accuracy = accuracy_score(y_train_question, y_train_pred)
    train_accuracies.append(train_accuracy)
    
    train_precision = precision_score(y_train_question, y_train_pred, average='weighted')
    train_precisions.append(train_precision)

    train_recall = recall_score(y_train_question, y_train_pred, average='weighted')
    train_recalls.append(train_recall)

    train_f1 = f1_score(y_train_question, y_train_pred, average='weighted')
    train_f1_scores.append(train_f1)

    # Test metrics
    y_test_pred = model.predict(X_test)
    test_accuracy = accuracy_score(y_test_question, y_test_pred)
    test_accuracies.append(test_accuracy)

    test_precision = precision_score(y_test_question, y_test_pred, average='weighted')
    test_precisions.append(test_precision)

    test_recall = recall_score(y_test_question, y_test_pred, average='weighted')
    test_recalls.append(test_recall)

    test_f1 = f1_score(y_test_question, y_test_pred, average='weighted')
    test_f1_scores.append(test_f1)

# Calculate averages
avg_train_accuracy = sum(train_accuracies) / len(train_accuracies)
avg_train_precision = sum(train_precisions) / len(train_precisions)
avg_train_recall = sum(train_recalls) / len(train_recalls)
avg_train_f1_score = sum(train_f1_scores) / len(train_f1_scores)
avg_train_auc_roc_score = sum(train_auc_roc_scores) / len(train_auc_roc_scores)

avg_test_accuracy = sum(test_accuracies) / len(test_accuracies)
avg_test_precision = sum(test_precisions) / len(test_precisions)
avg_test_recall = sum(test_recalls) / len(test_recalls)
avg_test_f1_score = sum(test_f1_scores) / len(test_f1_scores)
avg_test_auc_roc_score = sum(test_auc_roc_scores) / len(test_auc_roc_scores)

# Print the results
print("Training Metrics:")
print(f"Accuracy: {avg_train_accuracy}")
print(f"Precision: {avg_train_precision}")
print(f"Recall: {avg_train_recall}")
print(f"F1 Score: {avg_train_f1_score}")
print(f"AUC-ROC Score: {avg_train_auc_roc_score}")

print("\nTest Metrics:")
print(f"Accuracy: {avg_test_accuracy}")
print(f"Precision: {avg_test_precision}")
print(f"Recall: {avg_test_recall}")
print(f"F1 Score: {avg_test_f1_score}")
print(f"AUC-ROC Score: {avg_test_auc_roc_score}")


Training Metrics:
Accuracy: 0.9998612845054793
Precision: 0.999861392026659
Recall: 0.9998612845054793
F1 Score: 0.9998611583320177
AUC-ROC Score: 0.9999996409009927

Test Metrics:
Accuracy: 0.7788861180382377
Precision: 0.7737952246224777
Recall: 0.7788861180382377
F1 Score: 0.7758290925675633
AUC-ROC Score: 0.6664565569747141


## Model Used :- Random Forest

In [5]:
import json
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Step 1: Load the JSON dataset
with open('primate_dataset.json', 'r') as file:
    data = json.load(file)

# Step 2: Prepare Data
X = [entry['post_text'] for entry in data]

annotations = [[annotation[0] for annotation in entry['annotations']] for entry in data]
answers = [[annotation[1] for annotation in entry['annotations']] for entry in data]

# Step 3: Vectorize Text Data
vectorizer = CountVectorizer()
X_vectorized = vectorizer.fit_transform(X)

# Step 4: Split the Data
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, answers, test_size=0.2, random_state=42)

# Step 5: Train Random Forest Models
models = []
train_auc_roc_scores = []
test_auc_roc_scores = []

for i in range(len(annotations[0])):
    model = RandomForestClassifier()  # Change to RandomForestClassifier
    y_train_question = [answer[i] for answer in y_train]
    
    model.fit(X_train, y_train_question)
    models.append(model)

    # Predict probabilities for the training set
    y_train_prob = model.predict_proba(X_train)[:, 1]  
    train_auc_roc = roc_auc_score(y_train_question, y_train_prob)
    
    # Predict probabilities for the test set
    y_test_question = [answer[i] for answer in y_test]
    y_test_prob = model.predict_proba(X_test)[:, 1] 
    test_auc_roc = roc_auc_score(y_test_question, y_test_prob)
    
    train_auc_roc_scores.append(train_auc_roc)
    test_auc_roc_scores.append(test_auc_roc)

# Step 6: Evaluate the Models
# for accuracy
train_accuracies = []
test_accuracies = []
# for precision
train_precisions = []
test_precisions = []
# for recalls
train_recalls = []
test_recalls = []
# for f1_score
train_f1_scores = []
test_f1_scores = []

for i, model in enumerate(models):
    y_train_question = [answer[i] for answer in y_train]
    y_test_question = [answer[i] for answer in y_test]
    
    # Training metrics
    y_train_pred = model.predict(X_train)
    train_accuracy = accuracy_score(y_train_question, y_train_pred)
    train_accuracies.append(train_accuracy)
    
    train_precision = precision_score(y_train_question, y_train_pred, average='weighted')
    train_precisions.append(train_precision)

    train_recall = recall_score(y_train_question, y_train_pred, average='weighted')
    train_recalls.append(train_recall)

    train_f1 = f1_score(y_train_question, y_train_pred, average='weighted')
    train_f1_scores.append(train_f1)

    # Test metrics
    y_test_pred = model.predict(X_test)
    test_accuracy = accuracy_score(y_test_question, y_test_pred)
    test_accuracies.append(test_accuracy)

    test_precision = precision_score(y_test_question, y_test_pred, average='weighted')
    test_precisions.append(test_precision)

    test_recall = recall_score(y_test_question, y_test_pred, average='weighted')
    test_recalls.append(test_recall)

    test_f1 = f1_score(y_test_question, y_test_pred, average='weighted')
    test_f1_scores.append(test_f1)

# Step 7: Calculate averages
avg_train_accuracy = sum(train_accuracies) / len(train_accuracies)
avg_train_precision = sum(train_precisions) / len(train_precisions)
avg_train_recall = sum(train_recalls) / len(train_recalls)
avg_train_f1_score = sum(train_f1_scores) / len(train_f1_scores)
avg_train_auc_roc_score = sum(train_auc_roc_scores) / len(train_auc_roc_scores)

avg_test_accuracy = sum(test_accuracies) / len(test_accuracies)
avg_test_precision = sum(test_precisions) / len(test_precisions)
avg_test_recall = sum(test_recalls) / len(test_recalls)
avg_test_f1_score = sum(test_f1_scores) / len(test_f1_scores)
avg_test_auc_roc_score = sum(test_auc_roc_scores) / len(test_auc_roc_scores)

# Step 8: Print the results
print("Training Metrics:")
print(f"Accuracy: {avg_train_accuracy}")
print(f"Precision: {avg_train_precision}")
print(f"Recall: {avg_train_recall}")
print(f"F1 Score: {avg_train_f1_score}")
print(f"AUC-ROC Score: {avg_train_auc_roc_score}")

print("\nTest Metrics:")
print(f"Accuracy: {avg_test_accuracy}")
print(f"Precision: {avg_test_precision}")
print(f"Recall: {avg_test_recall}")
print(f"F1 Score: {avg_test_f1_score}")
print(f"AUC-ROC Score: {avg_test_auc_roc_score}")


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Training Metrics:
Accuracy: 0.9998612845054793
Precision: 0.999861392026659
Recall: 0.9998612845054793
F1 Score: 0.9998611583320177
AUC-ROC Score: 0.9999996409009927

Test Metrics:
Accuracy: 0.7763923524522028
Precision: 0.7332666989259001
Recall: 0.7763923524522028
F1 Score: 0.7229965944651027
AUC-ROC Score: 0.7663612622893038


## Testing the prediction ability

## file input and output


In [10]:

file_path = input("Enter the path of the input .txt file: ")
try:
    with open(file_path, 'r') as file:
        text_content = file.read()
        text_content = text_content[1:]
        text_content = text_content[:-1]
except Exception as e:
    print(f"An error occurred while loading the file: {e}")

    
# Load the new data (example data)
new_data = [
    {       "post_text":text_content}]
# Preprocess the new data
new_X = [entry['post_text'] for entry in new_data]
new_X_vectorized = vectorizer.transform(new_X)

# Use the trained models to predict annotations
new_annotations = []
for model in models:
    new_predictions = model.predict(new_X_vectorized)
    new_annotations.append(new_predictions)

# Aggregate the predictions into the desired format
last=[]
for i, entry in enumerate(new_data):
    for j, question in enumerate(annotations[0]):
        b=[]
        b.append(question)
        b.append(new_annotations[j][i])
        last.append(b)

con=[str(x) for x in last]
content=','.join(con)
file_path1=input('enter the file name:')
try:
    with open(file_path1, 'w') as file:
        file.write('{'+content+'}')
    print(f"Content successfully written to {file_path1}")
except Exception as e:
    print(f"An error occurred while writing to the file: {e}")


Enter the path of the input .txt file: tr1.txt
enter the file name:o
Content successfully written to o


 ## GUI Application


In [12]:
import tkinter as tk
from tkinter import ttk, messagebox

class TextColors:
    RESET = '\033[0m'
    RED = '\033[91m'
    GREEN = '\033[92m'
    YELLOW = '\033[93m'

# Assuming you have the rest of your script before this point...
n=0
t=0
nt=0
# Function to update the conclusion label based on conditions
def update_conclusion():
    global n, t, nt
    y = [entry[1] for entry in last]

    if 'yes' in (y[6], y[7]):
        conclusion_label.config(text="CONCLUSION: CATASTROPHIC", foreground="red")
        messagebox.showinfo("Treatment", "Ambulance Number: XYZ\nHelpline Number: ABC/WXY/CED")
    else:
        c = y.count('yes')
        if c <= 3:
            conclusion_label.config(text="CONCLUSION: HEALTHY", foreground="green")
            n += 1
        else:
            conclusion_label.config(text="CONCLUSION: VULNERABLE", foreground="orange")
            messagebox.showinfo("Treatment", "Make your bedroom sleep-friendly\nGo to sleep and wake up around the same time each day, even on the weekends\nAvoid caffeine, nicotine, and alcohol close to your bedtime\nGet regular physical activity during the daytime, at least 5 to 6 hours before going to bed.\nAvoid naps, especially in the afternoon.\nConsultance Number: XYZE")
            t += 1

# Create a themed Tkinter window
window = tk.Tk()
window.title("Conclusion Determination")
window.geometry("400x300")  # Set initial window size

# Style the GUI
style = ttk.Style()
style.configure("TLabel", font=("Helvetica", 12))
style.configure("TButton", font=("Helvetica", 10))

# Button to trigger the conclusion update
update_button = ttk.Button(window, text="Update Conclusion", command=update_conclusion)
update_button.pack(pady=20)

# Label to display the conclusion
conclusion_label = ttk.Label(window, text="", font=("Helvetica", 14))
conclusion_label.pack()

# Run the Tkinter main loop
window.mainloop()
