In [22]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics.pairwise import cosine_similarity

# Step 1: Load and preprocess data
data = pd.read_excel("../Data/HeartChatBotData.xlsx")
X = data['Questions']
y = data['Answer']

# Step 2: Vectorize text data using TF-IDF
tfidf_vectorizer = TfidfVectorizer(stop_words='english', lowercase=True, tokenizer=None)  # Tokenization handled by TF-IDF
X_tfidf = tfidf_vectorizer.fit_transform(X)

# Step 3: Train SVM model
svm_classifier = SVC(kernel='linear')
svm_classifier.fit(X_tfidf, y)

# Step 4: Train Random Forest model
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_tfidf, y)

# Function to predict answer for a given question using both models
def predict_answer(question):
    question_tfidf = tfidf_vectorizer.transform([question])

    # Predictions from SVM and Random Forest
    predicted_answer_svm = svm_classifier.predict(question_tfidf)[0]
    predicted_answer_rf = rf_classifier.predict(question_tfidf)[0]

    return predicted_answer_svm, predicted_answer_rf

# Test the function
question = "What are the symptoms of a heart attack?"
svm_answer, rf_answer = predict_answer(question)
print("SVM predicted answer:", svm_answer)
print("Random Forest predicted answer:", rf_answer)


SVM predicted answer: Symptoms of a heart attack include discomfort in the center of the chest that lasts more than a few minutes, or that goes away and comes back. It can feel like uncomfortable pressure, squeezing, fullness, or pain. Symptoms can also include pain or discomfort in one or both arms, the back, neck, jaw, or stomach, shortness of breath, cold sweat, nausea, or lightheadedness.
Random Forest predicted answer: Symptoms of a heart attack include discomfort in the center of the chest that lasts more than a few minutes, or that goes away and comes back. It can feel like uncomfortable pressure, squeezing, fullness, or pain. Symptoms can also include pain or discomfort in one or both arms, the back, neck, jaw, or stomach, shortness of breath, cold sweat, nausea, or lightheadedness.


In [23]:

# Example usage
input_question = "treatment?"
predicted_answer_svm, predicted_answer_rf = predict_answer(input_question)
print("Predicted Answer (SVM):", predicted_answer_svm)
print("Predicted Answer (Random Forest):", predicted_answer_rf)

Predicted Answer (SVM): Treatment for CHD usually involves lifestyle changes such as not smoking, eating healthy, and exercising more.
Sometimes, medications and procedures are needed.
Medications may include cholesterol-lowering drugs, anticoagulants, beta blockers, nitrates, and angiotensin-converting enzyme (ACE) inhibitors.
Procedures may include angioplasty and stent placement, or coronary artery bypass graft (CABG).
Predicted Answer (Random Forest): Treatment for CHD usually involves lifestyle changes such as not smoking, eating healthy, and exercising more.
Sometimes, medications and procedures are needed.
Medications may include cholesterol-lowering drugs, anticoagulants, beta blockers, nitrates, and angiotensin-converting enzyme (ACE) inhibitors.
Procedures may include angioplasty and stent placement, or coronary artery bypass graft (CABG).


In [24]:
import pickle
with open("../Model/cardia_bot.pkl", "wb") as f:
    pickle.dump(rf_classifier, f)