<a href="https://colab.research.google.com/github/shakeel645/Amazon-sales-Report/blob/main/Untitled16.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd
import nltk
from nltk.corpus import movie_reviews
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Download the nltk dataset if not already done
nltk.download('movie_reviews')

# Step 1: Load the dataset
# The dataset is provided by nltk's movie_reviews package

# Load the reviews and their categories (positive/negative)
reviews = []
sentiments = []
for fileid in movie_reviews.fileids():
    reviews.append(movie_reviews.raw(fileid))
    sentiments.append(1 if movie_reviews.categories(fileid)[0] == 'pos' else 0)

# Create a DataFrame
df = pd.DataFrame({'review': reviews, 'sentiment': sentiments})

# Display the first few rows of the dataset
print(df.head())

# Step 2: Split the dataset into training and testing sets
X = df['review']
y = df['sentiment']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Vectorize the text data using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Step 4: Train the logistic regression model
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)

# Step 5: Make predictions on the test set
y_pred = model.predict(X_test_tfidf)

# Step 6: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f'Accuracy: {accuracy:.2f}')
print('Classification Report:')
print(classification_rep)
print('Confusion Matrix:')
print(conf_matrix)


[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!


                                              review  sentiment
0  plot : two teen couples go to a church party ,...          0
1  the happy bastard's quick movie review \ndamn ...          0
2  it is movies like these that make a jaded movi...          0
3   " quest for camelot " is warner bros . ' firs...          0
4  synopsis : a mentally unstable man undergoing ...          0
Accuracy: 0.82
Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.82      0.82       199
           1       0.82      0.82      0.82       201

    accuracy                           0.82       400
   macro avg       0.82      0.82      0.82       400
weighted avg       0.82      0.82      0.82       400

Confusion Matrix:
[[163  36]
 [ 36 165]]


In [6]:
import pandas as pd
import nltk
from nltk.corpus import movie_reviews
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Download the nltk dataset if not already done
nltk.download('movie_reviews')

# Step 1: Load the dataset
# The dataset is provided by nltk's movie_reviews package

# Load the reviews and their categories (positive/negative)
reviews = []
sentiments = []
for fileid in movie_reviews.fileids():
    reviews.append(movie_reviews.raw(fileid))
    sentiments.append(1 if movie_reviews.categories(fileid)[0] == 'pos' else 0)

# Create a DataFrame
df = pd.DataFrame({'review': reviews, 'sentiment': sentiments})

# Display the first few rows of the dataset
print(df.head())

# Step 2: Split the dataset into training and testing sets
X = df['review']
y = df['sentiment']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Vectorize the text data using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Step 4: Train the SVM model
model = SVC(kernel='linear')  # Using a linear kernel for text classification
model.fit(X_train_tfidf, y_train)

# Step 5: Make predictions on the test set
y_pred = model.predict(X_test_tfidf)

# Step 6: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f'Accuracy: {accuracy:.2f}')
print('Classification Report:')
print(classification_rep)
print('Confusion Matrix:')
print(conf_matrix)


[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!


                                              review  sentiment
0  plot : two teen couples go to a church party ,...          0
1  the happy bastard's quick movie review \ndamn ...          0
2  it is movies like these that make a jaded movi...          0
3   " quest for camelot " is warner bros . ' firs...          0
4  synopsis : a mentally unstable man undergoing ...          0
Accuracy: 0.82
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.81      0.82       199
           1       0.82      0.84      0.83       201

    accuracy                           0.82       400
   macro avg       0.83      0.82      0.82       400
weighted avg       0.83      0.82      0.82       400

Confusion Matrix:
[[162  37]
 [ 33 168]]
