<a href="https://colab.research.google.com/github/shivanibandi110/SARR02/blob/main/SARR_AI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [None]:
nltk.download('stopwords')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Restaurant_Reviews.tsv',delimiter='\t',quoting=3)

In [None]:
# Check if the 'text' column exists in the DataFrame
if 'text' in data.columns:
    data['text'] = data['text'].apply(preprocess_text)
else:
    print("The 'text' column does not exist in the DataFrame.")

# Alternatively, if the column name is different, replace 'text' with the actual column name
# For example, if your column is named 'review_text':
# data['review_text'] = data['review_text'].apply(preprocess_text)


The 'text' column does not exist in the DataFrame.


In [None]:
# stop_words = set(stopwords.words('english'))

# def preprocess_text(text):
#     # Convert text to lowercase
#     text = text.lower()
#     # Tokenization and removing stopwords
#     words = nltk.word_tokenize(text)
#     words = [word for word in words if word.isalnum() and word not in stop_words]
#     return ' '.join(words)

# data['Review'] = data['Review'].apply(preprocess_text)

In [None]:
# Text preprocessing
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    # Convert text to lowercase
    text = text.lower()
    # Tokenization and removing stopwords
    words = nltk.word_tokenize(text)
    words = [word for word in words if word.isalnum() and word not in stop_words]
    return ' '.join(words)


data['Review'] = data['Review'].apply(preprocess_text)



In [None]:
# Split the dataset into training and testing sets
X = data['Review']
y = data['Liked']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# TF-IDF Vectorization
vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


In [None]:
# Train a Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_tfidf, y_train)


In [None]:
# Predictions on the test set
y_pred = clf.predict(X_test_tfidf)

In [None]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 72.50%


In [None]:
# Print classification report for more detailed metrics
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.65      0.90      0.75        96
           1       0.85      0.56      0.67       104

    accuracy                           0.72       200
   macro avg       0.75      0.73      0.71       200
weighted avg       0.76      0.72      0.71       200



In [13]:
#Sentiment Analysis with Support Vector Machine (SVM)

In [14]:
import numpy as np
import pandas as pd
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [15]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [16]:
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Restaurant_Reviews.tsv',delimiter='\t',quoting=3)

In [17]:
data.info

<bound method DataFrame.info of                                                 Review  Liked
0                             Wow... Loved this place.      1
1                                   Crust is not good.      0
2            Not tasty and the texture was just nasty.      0
3    Stopped by during the late May bank holiday of...      1
4    The selection on the menu was great and so wer...      1
..                                                 ...    ...
995  I think food should have flavor and texture an...      0
996                           Appetite instantly gone.      0
997  Overall I was not impressed and would not go b...      0
998  The whole experience was underwhelming, and I ...      0
999  Then, as if I hadn't wasted enough of my life ...      0

[1000 rows x 2 columns]>

In [19]:
# Text preprocessing
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    # Convert text to lowercase
    text = text.lower()
    # Tokenization and removing stopwords
    words = text.split()
    words = [word for word in words if word.isalnum() and word not in stop_words]
    return ' '.join(words)

data['Review'] = data['Review'].apply(preprocess_text)

In [21]:
# Split the dataset into training and testing sets
X = data['Review']
y = data['Liked']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [22]:
# TF-IDF Vectorization
vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

In [23]:
# Train an SVM classifier
svm_classifier = SVC(kernel='linear', C=1.0, random_state=42)
svm_classifier.fit(X_train_tfidf, y_train)

In [24]:
# Predictions on the test set
y_pred_svm = svm_classifier.predict(X_test_tfidf)

In [25]:
# Evaluate the SVM model
svm_accuracy = accuracy_score(y_test, y_pred_svm)
print(f'SVM Accuracy: {svm_accuracy * 100:.2f}%')
print(classification_report(y_test, y_pred_svm))

SVM Accuracy: 75.00%
              precision    recall  f1-score   support

           0       0.71      0.80      0.75        96
           1       0.79      0.70      0.74       104

    accuracy                           0.75       200
   macro avg       0.75      0.75      0.75       200
weighted avg       0.75      0.75      0.75       200



# New Section