In [1]:
# importing all the necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# Reading the data into pandas dataframe
data = pd.read_excel('text_dataset.xlsx') 

In [3]:
# Loading the data 
data

Unnamed: 0,id,sentence,voice
0,1,The chef prepares the meal.,Active
1,2,The teacher explains the lesson clearly.,Active
2,3,The gardener waters the plants every morning.,Active
3,4,The kids play soccer in the park.,Active
4,5,The author wrote a thrilling novel.,Active
5,6,The scientist conducts experiments in the lab.,Active
6,7,The company launched a new product.,Active
7,8,The artist paints a beautiful portrait.,Active
8,9,The musician composes a melody.,Active
9,10,The photographer takes stunning pictures.,Active


In [4]:
# selecting the target variable and features 
X = data['sentence']
y = data['voice'] 

In [5]:
# splitting the data into train, validation and test
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [6]:
# converting the text data into numerical values
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_val_vec = vectorizer.transform(X_val)
X_test_vec = vectorizer.transform(X_test)

In [7]:
# training model
classifier = LogisticRegression()
classifier.fit(X_train_vec, y_train)

In [8]:
# evaluating on validation data
y_val_pred = classifier.predict(X_val_vec)
print(y_val_pred)
print("---------------------------------------------->")
print(X_val)

['Active' 'Active' 'Active' 'Passive' 'Passive' 'Active' 'Passive'
 'Passive']
---------------------------------------------->
15         The architect draws the plans for the house.
8                       The musician composes a melody.
4                   The author wrote a thrilling novel.
27       A beautiful portrait is painted by the artist.
25    Experiments are conducted in the lab by the sc...
13                   The engineer designs a new bridge.
26           A new product was launched by the company.
37    The customers are served efficiently by the wa...
Name: sentence, dtype: object


In [9]:
# evaluating on validation data
val_accuracy = accuracy_score(y_val, y_val_pred)
print("Validation Accuracy:", val_accuracy)
print("---------------------------------------------->")
print(classification_report(y_val, y_val_pred))

Validation Accuracy: 1.0
---------------------------------------------->
              precision    recall  f1-score   support

      Active       1.00      1.00      1.00         4
     Passive       1.00      1.00      1.00         4

    accuracy                           1.00         8
   macro avg       1.00      1.00      1.00         8
weighted avg       1.00      1.00      1.00         8



In [31]:
# evaluating on testing data
y_test_pred = classifier.predict(X_test_vec)
print(y_test_pred)
print("---------------------------------------------->")
print(X_test)

['Active']
---------------------------------------------->
19          The student submits the assignment on time.
16             The manager organizes the work schedule.
12                 The designer creates a modern dress.
34          The application is coded by the programmer.
31    The news is accurately reported by the journal...
9             The photographer takes stunning pictures.
39    The assignment is submitted on time by the stu...
6                   The company launched a new product.
Name: sentence, dtype: object


In [11]:
# evaluating on testing data
test_accuracy = accuracy_score(y_test, y_test_pred)
print("Test Accuracy:", test_accuracy)
print("---------------------------------------------->")
print(classification_report(y_test, y_test_pred))

Test Accuracy: 1.0
---------------------------------------------->
              precision    recall  f1-score   support

      Active       1.00      1.00      1.00         5
     Passive       1.00      1.00      1.00         3

    accuracy                           1.00         8
   macro avg       1.00      1.00      1.00         8
weighted avg       1.00      1.00      1.00         8



In [36]:
sample = {
    "sentence": ["Tickets were booked by pranay", "i am pranay", "order will be given by boss"]
}

In [37]:
sample_df = pd.DataFrame(sample)

In [38]:
print(sample_df)

                        sentence
0  Tickets were booked by pranay
1                    i am pranay
2    order will be given by boss


In [40]:
# X_test_vec = vectorizer.transform(sample_df)
X_test_vec = vectorizer.transform(sample_df['sentence'])
print(X_test_vec)

  (0, 8)	1
  (2, 8)	1


In [29]:
X_test

19          The student submits the assignment on time.
16             The manager organizes the work schedule.
12                 The designer creates a modern dress.
34          The application is coded by the programmer.
31    The news is accurately reported by the journal...
9             The photographer takes stunning pictures.
39    The assignment is submitted on time by the stu...
6                   The company launched a new product.
Name: sentence, dtype: object