# Loading Movie Reviews Dataset

In [None]:
import pandas as pd
data = pd.read_csv("../input/imdb-dataset-of-50k-movie-reviews/IMDB Dataset.csv")

# Inspecting data

In [None]:
data.head()

In [None]:
data.shape

# Seperating data and labels (Targets)

In [None]:
reviews, labels = data.loc[:,data.columns!="sentiment"],data.sentiment

In [None]:
print(reviews.iloc[0])

In [None]:
print(labels.iloc[0])

# Breaking (Split) data into train and test 

In [None]:
from sklearn.model_selection import train_test_split

train_reviews, test_reviews, train_labels, test_labels = train_test_split(reviews['review'],labels,test_size=0.2)

In [None]:
len(train_reviews),len(test_reviews)

# Transforming text (words) into numbers

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

transformer = TfidfVectorizer(stop_words="english",max_features=9000)

In [None]:
train_data = transformer.fit_transform(train_reviews)
test_data = transformer.transform(test_reviews)

In [None]:
train_data.shape, test_data.shape

In [None]:
print(train_data[0])

# Encoding (converting) labels (words) to numbers

In [None]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()

In [None]:
train_labels_encoded = encoder.fit_transform(train_labels)
test_labels_encoded = encoder.transform(test_labels)

# Checking the encoding

In [None]:
train_labels_encoded , test_labels_encoded

In [None]:
train_labels[0:5],train_labels_encoded[0:5]

# Importing and loading AI model

In [None]:
from sklearn.linear_model import LogisticRegression

AI_model = LogisticRegression()

# Train on the training data (Teach the model)

In [None]:
AI_model.fit(train_data,train_labels_encoded)

# Evaluating the model (Taking test from the AI model)

In [None]:
# Measuring Accuracy
AI_model.score(test_data,test_labels_encoded)*100

In [None]:
# Predicting any test sample
AI_model.predict(test_data[1])

# Testing new input samples

In [None]:
# Postive Review Sample
sample = ['This is a fantastic movie with great caste and awesome cinemetogrpahy']
samp_feature = transformer.transform(sample)

In [None]:
AI_model.predict(samp_feature)

In [None]:
# Negative Sample
n_sample = ["That was not that great movie, it had poor caste with low level of ciemetography"]
n_samp_feature = transformer.transform(n_sample)

In [None]:
AI_model.predict(n_samp_feature)