# Sentiment Analysis using NLP
This notebook performs sentiment analysis on movie reviews using TextBlob.

In [None]:
!pip install textblob

In [None]:
import nltk
nltk.download('punkt')

## Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from textblob import TextBlob
import re

## Load Dataset
Upload the IMDB Dataset CSV file.

In [None]:
from google.colab import files
uploaded = files.upload()
df = pd.read_csv('IMDB Dataset.csv')
df.head()

## Clean the Text

In [None]:
def clean_text(text):
    text = re.sub(r'<.*?>', '', text)
    text = re.sub(r'[^a-zA-Z ]', '', text)
    return text.lower()

df['clean_review'] = df['review'].apply(clean_text)
df.head()

## Sentiment Analysis using TextBlob

In [None]:
def get_sentiment(text):
    blob = TextBlob(text)
    polarity = blob.sentiment.polarity
    if polarity > 0:
        return 'positive'
    elif polarity < 0:
        return 'negative'
    else:
        return 'neutral'

df['sentiment'] = df['clean_review'].apply(get_sentiment)
df[['review', 'sentiment']].head()

## Visualize the Sentiments

In [None]:
sns.countplot(x='sentiment', data=df)
plt.title('Sentiment Distribution')
plt.show()

## (Optional) Naive Bayes Classifier

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

cv = CountVectorizer()
X = cv.fit_transform(df['clean_review'])
y = df['sentiment']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = MultinomialNB()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print('Accuracy:', accuracy_score(y_test, y_pred))