In [93]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics
from sklearn.metrics import accuracy_score, classification_report

In [106]:
# Reading the data
data = pd.read_excel('immverse_ai_eval_dataset.xlsx') 

#loading the data
data

Unnamed: 0,id,sentence,voice
0,1,The chef prepares the meal.,Active
1,2,The teacher explains the lesson clearly.,Active
2,3,The gardener waters the plants every morning.,Active
3,4,The kids play soccer in the park.,Active
4,5,The author wrote a thrilling novel.,Active
5,6,The scientist conducts experiments in the lab.,Active
6,7,The company launched a new product.,Active
7,8,The artist paints a beautiful portrait.,Active
8,9,The musician composes a melody.,Active
9,10,The photographer takes stunning pictures.,Active


In [95]:
# target variable and features
X = df['sentence']
y = df['voice']

In [96]:
# Split the data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=1)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=1)

In [97]:
# Vectorize the sentences (converting the text data into numerical values)
vectorizer = CountVectorizer()
X_train_dtm = vectorizer.fit_transform(X_train)
X_val_dtm = vectorizer.transform(X_val)
X_test_dtm = vectorizer.transform(X_test)

In [98]:
# Training the Naive Bayes model
nb = MultinomialNB()
nb.fit(X_train_dtm, y_train)

In [99]:
# Make predictions and calculate accuracy on validation set
y_val_pred = nb.predict(X_val_dtm)
val_accuracy = metrics.accuracy_score(y_val, y_val_pred)

print("Validation Accuracy:", val_accuracy)
print("---------------------------------------------->")
print(classification_report(y_val, y_val_pred))

Validation Accuracy: 0.75
---------------------------------------------->
              precision    recall  f1-score   support

      Active       0.67      0.67      0.67         3
     Passive       0.80      0.80      0.80         5

    accuracy                           0.75         8
   macro avg       0.73      0.73      0.73         8
weighted avg       0.75      0.75      0.75         8



In [100]:
# Display the predicted and actual labels side by side for the validation set
val_results = pd.DataFrame({'Sentence': X_val, 'Actual Label': y_val, 'Predicted Label': y_val_pred})
print("Validation Results:")
print(val_results)

Validation Results:
                                             Sentence Actual Label  \
33          A new bridge is designed by the engineer.      Passive   
2       The gardener waters the plants every morning.       Active   
28              A melody is composed by the musician.      Passive   
26         A new product was launched by the company.      Passive   
19        The student submits the assignment on time.       Active   
30  The film is shot in various locations by the d...      Passive   
17       The waiter serves the customers efficiently.       Active   
29   Stunning pictures are taken by the photographer.      Passive   

   Predicted Label  
33         Passive  
2           Active  
28         Passive  
26          Active  
19          Active  
30         Passive  
17         Passive  
29         Passive  


In [101]:
# Make predictions and calculate accuracy on test set
y_test_pred = nb.predict(X_test_dtm)
test_accuracy = metrics.accuracy_score(y_test, y_test_pred)

print("Test Accuracy:", test_accuracy)
print("---------------------------------------------->")
print(classification_report(y_test, y_test_pred,zero_division=1))

Test Accuracy: 0.875
---------------------------------------------->
              precision    recall  f1-score   support

      Active       1.00      0.00      0.00         1
     Passive       0.88      1.00      0.93         7

    accuracy                           0.88         8
   macro avg       0.94      0.50      0.47         8
weighted avg       0.89      0.88      0.82         8



In [102]:
# Display the predicted and actual labels side by side for the test set
test_results = pd.DataFrame({'Sentence': X_test, 'Actual Label': y_test, 'Predicted Label': y_test_pred})
print("Test Results:")
print(test_results)




Test Results:
                                             Sentence Actual Label  \
21    The lesson is clearly explained by the teacher.      Passive   
36     The work schedule is organized by the manager.      Passive   
39  The assignment is submitted on time by the stu...      Passive   
3                   The kids play soccer in the park.       Active   
22  The plants are watered every morning by the ga...      Passive   
32         A modern dress is created by the designer.      Passive   
27     A beautiful portrait is painted by the artist.      Passive   
31  The news is accurately reported by the journal...      Passive   

   Predicted Label  
21         Passive  
36         Passive  
39         Passive  
3          Passive  
22         Passive  
32         Passive  
27         Passive  
31         Passive  


In [104]:
# Get user input
user_input = input("Enter a sentence: ")

# Vectorize the user input
user_input_dtm = vectorizer.transform([user_input])

# Make a prediction
user_input_pred = nb.predict(user_input_dtm)

print(f'The sentence is in {user_input_pred[0]} voice.')


Enter a sentence: They called off the meeting.
The sentence is in Active voice.
