In [1]:
!pip install kaggle



**Importing the Dependencies**

In [2]:
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from zipfile import ZipFile
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D,LSTM,Embedding, MaxPooling2D, Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


**Data collecton-kaggle api**

In [3]:
kaggle_dictionary=json.load(open('/content/kaggle(1).json'))
os.environ['KAGGLE_USERNAME']=kaggle_dictionary['username']
os.environ['KAGGLE_KEY']=kaggle_dictionary['key']

In [4]:
!kaggle datasets download lakshmi25npathi/imdb-dataset-of-50k-movie-reviews

Dataset URL: https://www.kaggle.com/datasets/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews
License(s): other
Downloading imdb-dataset-of-50k-movie-reviews.zip to /content
  0% 0.00/25.7M [00:00<?, ?B/s]
100% 25.7M/25.7M [00:00<00:00, 946MB/s]


In [5]:
#unzip the dataset file

with ZipFile("/content/imdb-dataset-of-50k-movie-reviews.zip",'r') as file:
  file.extractall()


Loading **datasets**

In [6]:
data=pd.read_csv('/content/IMDB Dataset.csv')
print(data.head())


                                              review sentiment
0  One of the other reviewers has mentioned that ...  positive
1  A wonderful little production. <br /><br />The...  positive
2  I thought this was a wonderful way to spend ti...  positive
3  Basically there's a family where a little boy ...  negative
4  Petter Mattei's "Love in the Time of Money" is...  positive


In [7]:
print(data['sentiment'].value_counts())

sentiment
positive    25000
negative    25000
Name: count, dtype: int64


In [8]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   review     50000 non-null  object
 1   sentiment  50000 non-null  object
dtypes: object(2)
memory usage: 781.4+ KB


In [9]:
data.isnull().sum()

Unnamed: 0,0
review,0
sentiment,0


In [10]:
data.describe()

Unnamed: 0,review,sentiment
count,50000,50000
unique,49582,2
top,Loved today's show!!! It was a variety and not...,positive
freq,5,25000


In [11]:
data['sentiment']=data['sentiment'].map({"positive":1,"negative":0})

In [12]:
#split the data into train and test data
train_data,test_data=train_test_split(data,test_size=0.2,random_state=42)

print(train_data.shape)
print(test_data.shape)

(40000, 2)
(10000, 2)


**Data Preprocessing**

In [13]:
#Tokenization of the data
tokenizer=Tokenizer(num_words=5000)
tokenizer.fit_on_texts(train_data['review'])
X_train=pad_sequences(tokenizer.texts_to_sequences(train_data['review']),maxlen=200)
X_test=pad_sequences(tokenizer.texts_to_sequences(test_data['review']),maxlen=200)


In [14]:
y_train=train_data['sentiment']
y_test=test_data['sentiment']

**LSTM-Long Short-Term Memory**




In [15]:
model=Sequential()
model.add(Embedding(input_dim=5000,output_dim=128,input_length=200))
model.add(LSTM(128,dropout=0.2,recurrent_dropout=0.2))
model.add(Dense(1,activation='sigmoid'))




In [16]:
model.summary()

None


In [18]:
#Compile the model

model.compile(optimizer='Adam',loss='binary_crossentropy',metrics=['accuracy'])


ValueError: Could not interpret optimizer identifier: adman

**Training the model**

In [None]:
model.fit(X_train,y_train,epochs=5,batch_size=64,validation_split=0.2)


**Model Evaluation**

In [None]:
loss,accuracy=model.evaluate(X_test,y_test)
print(f"Test Loss:{loss:.4f}")
print(f"Test Accuracy:{accuracy*100:.2f}%")

**Building a predicative System**

In [19]:
def predict_sentiment(review):
  #tokenize and pad
  sequence=tokenizer.texts_to_sequences(review)
  padded_sequence=pad_sequences(sequence,maxlen=200)
  predication=model.predict(padded_sequence)
  setiment="positive" if prediction[0][0]>0.5 else "negative"
  return setiment

In [None]:
#example usage
new_review="this is movie was fantastic. I loved it"
sentiment=predict_sentiment(new_review)
print(f"The sentiment of the review is:{sentiment}")

In [None]:
#example usage
new_review="this is movie is very bad"
sentiment=predict_sentiment(new_review)
print(f"The sentiment of the review is:{sentiment}")