In [172]:
import pandas as pd

test_document = '/Users/churnika/Desktop/Projects/IoT_Project/Speech/Dataset/speech_to_text_20240414-185854.csv'

# Try to read the CSV file without headers
try:
    test_doc = pd.read_csv(test_document, header=None)
except pd.errors.EmptyDataError:
    print(f"No data in {test_document}")
    test_doc = pd.DataFrame()

# If the DataFrame is not empty, rename the column and convert to lowercase
if not test_doc.empty:
    test_doc.columns = ['text']
    test_doc['text'] = test_doc['text'].str.lower()
else:
    print("The DataFrame is empty.")


In [173]:
import nltk

# If you haven't downloaded the tokenizer package, uncomment the line below to download
# nltk.download('punkt')

def tokenize_text(text):
    return nltk.word_tokenize(text)

# Apply the function to the 'text' column
if not test_doc.empty:
    test_doc['tokenized_text'] = test_doc['text'].apply(tokenize_text)
else:
    print("The DataFrame is empty.")


In [174]:
from tensorflow.keras.preprocessing.text import Tokenizer

# Create a tokenizer
tokenizer = Tokenizer()

# Fit the tokenizer on the text
# This will create the vocabulary
if not test_doc.empty:
    tokenizer.fit_on_texts(test_doc['tokenized_text'].tolist())

# Convert the tokens into integers
test_doc['encoded_text'] = test_doc['tokenized_text'].apply(lambda x: tokenizer.texts_to_sequences([x])[0])

print(test_doc['encoded_text'])


0    [10, 5, 11, 1, 12, 13, 3, 2, 6, 7, 14, 1, 15, ...
1    [20, 1, 21, 2, 22, 8, 23, 5, 2, 1, 9, 2, 24, 2...
2    [29, 30, 31, 3, 32, 33, 34, 2, 35, 3, 3, 36, 3...
3    [45, 46, 47, 48, 49, 50, 1, 51, 52, 4, 53, 54,...
Name: encoded_text, dtype: object


In [175]:
from keras.preprocessing.sequence import pad_sequences

# Pad the sequences
# This will make all sequences the same length
if not test_doc.empty:
    test_doc['padded_text'] = pad_sequences(test_doc['encoded_text'].tolist()).tolist()

print(test_doc['padded_text'])


0    [10, 5, 11, 1, 12, 13, 3, 2, 6, 7, 14, 1, 15, ...
1    [0, 0, 0, 0, 0, 20, 1, 21, 2, 22, 8, 23, 5, 2,...
2    [0, 29, 30, 31, 3, 32, 33, 34, 2, 35, 3, 3, 36...
3    [0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 46, 47, 48, 49...
Name: padded_text, dtype: object


In [176]:
import numpy as np

from keras.models import load_model

# Load the pre-trained model
model = load_model('my_model.h5')

# Convert the 'padded_text' column to a numpy array
X = np.array(test_doc['padded_text'].tolist())

# Use the model to make predictions
predictions = model.predict(X)

print(predictions)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 128ms/step
[[1.97946490e-03 9.97500300e-01 4.11508692e-04 4.07480256e-05
  6.75493648e-05 4.31243166e-07]
 [4.48219199e-03 9.94602323e-01 2.64460163e-04 2.94513622e-04
  3.55155877e-04 1.43886280e-06]
 [5.46608627e-01 1.03738934e-01 8.61994829e-03 2.28097931e-01
  1.04481518e-01 8.45306646e-03]
 [1.36429563e-01 4.31507230e-01 7.72893578e-02 8.75468627e-02
  2.06835806e-01 6.03912286e-02]]


In [177]:
# Define a dictionary to map indices to class names
index_to_class = {
    0: 'sadness',
    1: 'joy',
    2: 'love',
    3: 'anger',
    4: 'fear'
}

# Find the index of the maximum probability for each sequence
max_indices = np.argmax(predictions, axis=1)

# Map the indices to class names
predicted_classes = [index_to_class[index] for index in max_indices]

print(predicted_classes)


['joy', 'joy', 'sadness', 'joy']


In [178]:
from collections import Counter

# Count the occurrences of each class
counter = Counter(predicted_classes)

# Find the class with the highest count
overall_class = counter.most_common(1)[0][0]

print(overall_class)


joy


In [180]:
import smtplib
import ssl
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email import encoders

# Check if the overall class is 'sadness'
    # Email settings
subject = "An important message"
body = "Please find the attached CSV file for more details on your patient."
sender_email = ""
receiver_email = ""
password = ""
filename = "/Users/churnika/Desktop/Projects/IoT_Project/Speech/Dataset/speech_to_text_20240414-142942.csv"
# Create a multipart message
msg = MIMEMultipart()
msg["From"] = sender_email
msg["To"] = receiver_email
msg["Subject"] = subject
# Add the email body
msg.attach(MIMEText(body, "plain"))
# Open the file in binary mode
with open(filename, "rb") as attachment:
    # Add file as application/octet-stream
    part = MIMEBase("application", "octet-stream")
    part.set_payload(attachment.read())
# Encode file in ASCII characters to send by email    
encoders.encode_base64(part)
# Add header as pdf attachment
part.add_header(
    "Content-Disposition",
    f"attachment; filename= {filename}",
)
# Add attachment to message and convert message to string
msg.attach(part)
text = msg.as_string()
# Log in to server using secure context and send email
context = ssl.create_default_context()
with smtplib.SMTP_SSL("smtp.gmail.com", 465, context=context) as server:
    server.login(sender_email, password)
    server.sendmail(sender_email, receiver_email, text)
