In [27]:
import pandas as pd

# Data provided
data = [
    ("This is very good place", "pos"),
    ("I like this biryani", "pos"),
    ("I feel very happy", "pos"),
    ("This is my best work", "pos"),
    ("I do not like this restaurant", "neg"),
    ("I am tired of this stuff", "neg"),
    ("I can't deal with this", "neg"),
    ("What an idea it is", "pos"),
    ("My place is horrible", "neg"),
    ("This is an awesome place", "pos"),
    ("I do not like the taste of this juice", "neg"),
    ("I love to sing", "pos"),
    ("I am sick and tired", "neg"),
    ("I love to dance", "pos"),
    ("What a great holiday", "pos"),
    ("That is a bad locality to stay", "neg"),
    ("We will have good fun tomorrow", "pos"),
    ("I hate this food", "neg")
]

# Convert to DataFrame
df = pd.DataFrame(data, columns=['Message', 'Label'])

# Save to CSV
df.to_csv('Statements_data.csv', index=False)
print("CSV file 'Statements_data.csv' has been created.")


CSV file 'Statements_data.csv' has been created.


In [29]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score



In [62]:
# Load the data
msglbl_data = pd.read_csv('Statements_data.csv', names=['Message', 'Label'])

# Check for missing values in the dataset
print("Missing values in dataset:")
print(msglbl_data.isna().sum())

# Handle missing values by removing rows where 'Label' is NaN
msglbl_data.dropna(subset=['Label'], inplace=True)

# Map the labels to numeric values (1 for 'pos' and 0 for 'neg')
msglbl_data['labelnum'] = msglbl_data.Label.map({'pos': 1, 'neg': 0})

# Check for missing values in the 'labelnum' column
print("\nMissing values in 'labelnum' column:")
print(msglbl_data['labelnum'].isna().sum())

# Remove rows where 'labelnum' is NaN (if there are any)
msglbl_data.dropna(subset=['labelnum'], inplace=True)

# Verify that no NaN values exist in the 'labelnum' column after cleaning
print("\nData after removing NaN values from 'labelnum':")
print(msglbl_data.head())

# Place the data in X and Y Vectors
X = msglbl_data["Message"]
Y = msglbl_data['labelnum']

# Split the data into train and test sets
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y)

# Convert the text data into numerical vectors using CountVectorizer
count_vect = CountVectorizer()
Xtrain_dims = count_vect.fit_transform(Xtrain)
Xtest_dims = count_vect.transform(Xtest)

# Define the MLP Classifier
clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)

# Train the model
clf.fit(Xtrain_dims, Ytrain)

# Predict on the test data
prediction = clf.predict(Xtest_dims)

# Print accuracy metrics
print('******** Accuracy Metrics *********')
print('Accuracy : ', accuracy_score(Ytest, prediction)) 
print('Recall : ', recall_score(Ytest, prediction)) 
print('Precision : ', precision_score(Ytest, prediction))
print('Confusion Matrix : \n', confusion_matrix(Ytest, prediction))
print(10 * "-")

# Now predicting for a user-provided statement:
test_stmt = [input("Enter any statement to predict: ")]
test_dims = count_vect.transform(test_stmt)  # Transform user input using the same vectorizer
pred = clf.predict(test_dims)

# Output prediction result
for stmt, lbl in zip(test_stmt, pred):
    if lbl == 1:
        print(f"Statement: '{stmt}' is Positive")
    else:
        print(f"Statement: '{stmt}' is Negative")


Missing values in dataset:
Message    0
Label      0
dtype: int64

Missing values in 'labelnum' column:
1

Data after removing NaN values from 'labelnum':
                         Message Label  labelnum
1        This is very good place   pos       1.0
2            I like this biryani   pos       1.0
3              I feel very happy   pos       1.0
4           This is my best work   pos       1.0
5  I do not like this restaurant   neg       0.0
******** Accuracy Metrics *********
Accuracy :  0.6
Recall :  1.0
Precision :  0.5
Confusion Matrix : 
 [[1 2]
 [0 2]]
----------


Enter any statement to predict:  1


Statement: '1' is Positive
