In [3]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import accuracy_score
import requests
from io import StringIO

# URLs for the training and test data
train_url = "https://ml-course2-upgrad.s3.amazonaws.com/Naive+Bayes/Naive+Bayes+for+Text+Classification/movie_review_train.csv"
test_url = "https://ml-course2-upgrad.s3.amazonaws.com/Naive+Bayes/Naive+Bayes+for+Text+Classification/movie_review_test.csv"

# Function to load data from URL
def load_data(url):
    response = requests.get(url)
    csv_data = StringIO(response.text)
    df = pd.read_csv(csv_data)
    return df

# Load training and test data
train_data = load_data(train_url)
test_data = load_data(test_url)

# Display the first few rows of the training data
print("Training Data Sample:")
print(train_data.head())

# Display the first few rows of the test data
print("\nTest Data Sample:")
print(test_data.head())

# Extract X and y from training and test data
X_train = train_data['text'].values
y_train = train_data['class'].values
X_test = test_data['text'].values
y_test = test_data['class'].values

# Initialize CountVectorizer with specified parameters
vectorizer = CountVectorizer(stop_words='english', min_df=0.03, max_df=0.8)

# Fit and transform CountVectorizer on the training data
X_train_transformed = vectorizer.fit_transform(X_train)

# Transform test data using the fitted CountVectorizer
X_test_transformed = vectorizer.transform(X_test)

# Train Bernoulli Naive Bayes model
nb_classifier = BernoulliNB()
nb_classifier.fit(X_train_transformed, y_train)

# Predict classes for test set
y_pred = nb_classifier.predict(X_test_transformed)

# Calculate accuracy of the model
accuracy = accuracy_score(y_test, y_pred)

# Print the accuracy
print("\nAccuracy of Bernoulli Naive Bayes model:", accuracy)


Training Data Sample:
  class                                               text
0   Pos   a common complaint amongst film critics is   ...
1   Pos   whew   this film oozes energy   the kind of b...
2   Pos   steven spielberg s   amistad     which is bas...
3   Pos   he has spent his entire life in an awful litt...
4   Pos   being that it is a foreign language film with...

Test Data Sample:
  class                                               text
0   Pos   films adapted from comic books have had plent...
1   Pos   every now and then a movie comes along from a...
2   Pos   you ve got mail works alot better than it des...
3   Pos      jaws   is a rare film that grabs your atte...
4   Pos   moviemaking is a lot like being the general m...

Accuracy of Bernoulli Naive Bayes model: 0.79
