In [None]:
# Step 1: Import libraries
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
import pandas as pd

# Step 2: Load dataset
train_data_path = "/Users/dr.niladridas/desktop/test/Amazon_customer_reviews/train.ft.txt"
test_data_path = "/Users/dr.niladridas/desktop/test/Amazon_customer_reviews/test.ft.txt"

# Load train data
train_df = pd.read_csv(train_data_path, header=None, delimiter='\t')

# Load test data
test_df = pd.read_csv(test_data_path, header=None, delimiter='\t')

# Display the first few rows of the train DataFrame
print("Train Data:")
print(train_df.head())

# Display the first few rows of the test DataFrame
print("\nTest Data:")
print(test_df.head())

# Step 3: Preprocess text data (e.g., remove punctuation, lowercase)
# Assuming labels are part of the text data and need to be extracted
train_df[0] = train_df[0].apply(lambda x: x.lower())
test_df[0] = test_df[0].apply(lambda x: x.lower())

# Extract labels from text data
train_df['label'] = train_df[0].apply(lambda x: x.split()[0])
test_df['label'] = test_df[0].apply(lambda x: x.split()[0])

# Remove labels from text data
train_df[0] = train_df[0].apply(lambda x: ' '.join(x.split()[1:]))
test_df[0] = test_df[0].apply(lambda x: ' '.join(x.split()[1:]))

# Step 4: Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(train_df[0], train_df['label'], test_size=0.2, random_state=42)

# Step 5: Vectorize text data using Bag-of-Words
vectorizer = CountVectorizer()
X_train_vect = vectorizer.fit_transform(X_train)
X_test_vect = vectorizer.transform(X_test)

# Step 6: Train Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(X_train_vect, y_train)

# Step 7: Predict sentiment on test set and evaluate model
y_pred = classifier.predict(X_test_vect)
accuracy = accuracy_score(y_test, y_pred)
print("\nAccuracy:", accuracy)

Train Data:
                                                   0
0  __label__2 Stuning even for the non-gamer: Thi...
1  __label__2 The best soundtrack ever to anythin...
2  __label__2 Amazing!: This soundtrack is my fav...
3  __label__2 Excellent Soundtrack: I truly like ...
4  __label__2 Remember, Pull Your Jaw Off The Flo...

Test Data:
                                                   0
0  __label__2 Great CD: My lovely Pat has one of ...
1  __label__2 One of the best game music soundtra...
2  __label__1 Batteries died within a year ...: I...
3  __label__2 works fine, but Maha Energy is bett...
4  __label__2 Great for the non-audiophile: Revie...
