# Artificial Intelligence
Sentiment Analysis using Natural Language Processing

In [1]:
# Data handling
import pandas as pd

# NLP feature extraction
from sklearn.feature_extraction.text import TfidfVectorizer

# Naive Bayes classifier
from sklearn.naive_bayes import MultinomialNB

# Evaluation metrics
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# Load training and testing datasets
train_df = pd.read_csv("train_data (1).csv")
test_df = pd.read_csv("test_data (1).csv")

# View training data
train_df.head()

Unnamed: 0,0,1
0,"This film is absolutely awful, but nevertheles...",0
1,Well since seeing part's 1 through 3 I can hon...,0
2,I got to see this film at a preview and was da...,1
3,This adaptation positively butchers a classic ...,0
4,Råzone is an awful movie! It is so simple. It ...,0


In [3]:
train_df.columns, test_df.columns

(Index(['0', '1'], dtype='object'), Index(['0', '1'], dtype='object'))

In [4]:
# Rename columns for clarity
train_df.columns = ['text', 'label']
test_df.columns = ['text', 'label']

# Verify
train_df.columns, test_df.columns

(Index(['text', 'label'], dtype='object'),
 Index(['text', 'label'], dtype='object'))

In [5]:
train_df.isnull().sum(), test_df.isnull().sum()

(text     0
 label    0
 dtype: int64,
 text     0
 label    0
 dtype: int64)

In [6]:
# Input text
X_train = train_df['text']
X_test = test_df['text']

# Output labels
y_train = train_df['label']
y_test = test_df['label']

In [7]:
# Convert text into numerical features using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english')

# Fit on training data and transform both datasets
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [8]:
# Create Naive Bayes classifier
ai_model = MultinomialNB()

In [9]:
# Train the model
ai_model.fit(X_train_vec, y_train)

In [10]:
# Predict on test data
y_pred = ai_model.predict(X_test_vec)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
accuracy


0.82992

In [11]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.80      0.88      0.84     12500
           1       0.87      0.78      0.82     12500

    accuracy                           0.83     25000
   macro avg       0.83      0.83      0.83     25000
weighted avg       0.83      0.83      0.83     25000



In [12]:
# Predict sentiment for custom input
sample_text = ["The movie was boring and a waste of time"]

sample_vec = vectorizer.transform(sample_text)
ai_model.predict(sample_vec)

array([0])

In [13]:
sample_text = ["The movie was amazing and very inspiring"]
sample_vec = vectorizer.transform(sample_text)
ai_model.predict(sample_vec)


array([1])

## Conclusion
 The model was trained on labeled text data and evaluated on unseen test data, achieving good accuracy. This project demonstrates the use of AI to understand and classify human emotions from text.