In [None]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
import re
import string
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import numpy as np

In [None]:
# import the data 
df = pd.read_csv('IMDB.csv')
df = df.sample(500)
df.to_csv('data.csv', index=False)
df.head()

In [None]:
# data preprocessing

# Define text preprocessing functions
def lemmatization(text):
    """Lemmatize the text."""
    lemmatizer = WordNetLemmatizer()
    text = text.split()
    text = [lemmatizer.lemmatize(word) for word in text]
    return " ".join(text)

def remove_stop_words(text):
    """Remove stop words from the text."""
    stop_words = set(stopwords.words("english"))
    text = [word for word in str(text).split() if word not in stop_words]
    return " ".join(text)

def removing_numbers(text):
    """Remove numbers from the text."""
    text = ''.join([char for char in text if not char.isdigit()])
    return text

def lower_case(text):
    """Convert text to lower case."""
    text = text.split()
    text = [word.lower() for word in text]
    return " ".join(text)

def removing_punctuations(text):
    """Remove punctuations from the text."""
    text = re.sub('[%s]' % re.escape(string.punctuation), ' ', text)
    text = text.replace('؛', "")
    text = re.sub('\s+', ' ', text).strip()
    return text

def removing_urls(text):
    """Remove URLs from the text."""
    url_pattern = re.compile(r'https?://\S+|www\.\S+')
    return url_pattern.sub(r'', text)

def normalize_text(df):
    """Normalize the text data."""
    try:
        df['review'] = df['review'].apply(lower_case)
        df['review'] = df['review'].apply(remove_stop_words)
        df['review'] = df['review'].apply(removing_numbers)
        df['review'] = df['review'].apply(removing_punctuations)
        df['review'] = df['review'].apply(removing_urls)
        df['review'] = df['review'].apply(lemmatization)
        return df
    except Exception as e:
        print(f'Error during text normalization: {e}')
        raise

In [None]:
df = normalize_text(df)
df.head()

In [None]:
df['sentiment'].value_counts()

In [None]:
df

In [None]:
# x = df['sentiment'].isin(['positive','negative'])
# df = df[x]

In [None]:
df['sentiment'] = df['sentiment'].map({'positive':1, 'negative':0})
df.head()

In [None]:
df.isnull().sum()

In [None]:
# change text data into number data
vectorizer = CountVectorizer(max_features=100)
X = vectorizer.fit_transform(df['review'])
y = df['sentiment']

In [None]:
# df["review"][651]

In [None]:
X.toarray()[1]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [None]:
# classification problem 
model = LogisticRegression()
model.fit(X_train, y_train)


In [None]:
x_predict = model.predict(X_test)

x_predict

In [None]:
accuracy_score(y_test, x_predict)

In [None]:
df

In [None]:
df["review"][799]

In [None]:
rahul = "normally spike lee fan take time really get mojo see clear message ability tell story close heart lee genius unlike th hour bamboozled two favorite film his clear story film able understand struggle washington choice play well influenced others odd reason lee never able get true feeling out washington decent job handed him could tell lee s favorite film lee direct film also wrote it could tell camera work horrid writing contributed decay film film coming full circle going pretty lee behind film right thing film seen lee direct brightest modest film almost created hollywood movie instead one own know saw money right thing ran it film demonstrate true talent br br for anyone seen film perhaps stopped watching anything directed spike lee afterwards due film suggest give second chance get wrong see exactly coming film would want put behind you lee grow up work becomes own see transformation desire make money wanting make good film took awhile watch th hour did sheer brilliance perhaps actor perhaps story lee crafted amazing film one man s journey unknown guess hoping mo better blue would turn be really dark journey life man really never grew up instead got denzel denzel really one versatile actor generation consider sydney poitier cinema film showcase talent br br another issue film use spike s sister playing one love interest know you family think could filmed sex scene sister care actor much money getting paid would never it something never wish see apparently different spike went ahead showed full nude image sister without remorse sad even made blush also need somebody answer this flavor flav introducing film so sitting couch ready start film suddenly voice past spelling studio made film acknowledges himself build strong remaining story again felt lee going money film instead actual talent perhaps could afford denzel wesley movie without explosion br br there two great scene film made worth watching end get wrong bad movie always diamond every alleyway scene bleek accidentally forgets woman mesmerizing continually went back forth weaving truth confusion way proved lee actually behind camera visionary scene probably lost shuffle due remaining poor scene scene worth watching way lee introduced ended film keeping pacing direction able bring tragic character around full circle give chance change life two moment rest film pure rubbish worth viewing unless go blind br br grade'"

In [46]:
df["review"][733]

'can t believe anyone would green light let alone voluntarily star it never able get min life back br br this one worst film ever seen film bad good gone far round that s somehow bad terrible exactly expecting much low budget bandwagon jumping rehash b movie still came way expectation level even tv movie higher production value br br there very poor special effect shocking dialogue terrible acting completely unexplained plot cursed why inch snake turn foot snake anyone ever heard highly venomous garter snake python passenger snake many promise none delivered br br some comment would believe film worth watching last five minute even worth rental stay watch low budget tv movie enjoy lot more br br why made oh yes shamelessly cash internet phenomenon soap shame mallachi brother shame'

In [None]:
# 2. Transform your input

df1 = pd.DataFrame({'review': [rahul]})

df11 = normalize_text(df1)  # Apply your preprocessing pipeline
df1

In [None]:
# Transform using the same vectorizer
X_input = vectorizer.transform(df1['review'])

X_input.toarray()

In [None]:
X_input.shape

In [None]:
# 3. Predict using your trained model
prediction = model.predict(X_input)

if prediction[0]==0:
    print(rahul ,"Negative review")
else:
    print(rahul ,"Positive review")


In [None]:
# 3. Predict using your trained model
prediction = model.predict(X_input)

if prediction[0]==0:
    print(rahul[:20] ,"Negative review")
else:
    print(rahul[:20] ,"Positive review")


In [None]:
# from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# model = LogisticRegression(max_iter=1000)  # Increase max_iter to prevent non-convergence issues

# model.fit(X_train, y_train)
# # 
# y_pred = model.predict(X_test)

# # Evaluations matics
# accuracy = accuracy_score(y_test, y_pred)
# precision = precision_score(y_test, y_pred)
# recall = recall_score(y_test, y_pred)
# f1 = f1_score(y_test, y_pred)


In [None]:
# Save  vectorizer and model 
import pickle
with open("vectorizer.pkl", "wb") as f:
    pickle.dump(vectorizer, f)

with open("model.pkl", "wb") as f:
    pickle.dump(model, f)

In [None]:
# open the file
with open("vectorizer.pkl","rb") as f:
    vect = pickle.load(f)

with open("model.pkl","rb") as f:
    mod = pickle.load(f)

In [None]:
aznan = "normally spike lee fan take time really get mojo see clear message ability tell story close heart lee genius unlike th hour bamboozled two favorite film his clear story film able understand struggle washington choice play well influenced others odd reason lee never able get true feeling out washington decent job handed him could tell lee s favorite film lee direct film also wrote it could tell camera work horrid writing contributed decay film film coming full circle going pretty lee behind film right thing film seen lee direct brightest modest film almost created hollywood movie instead one own know saw money right thing ran it film demonstrate true talent br br for anyone seen film perhaps stopped watching anything directed spike lee afterwards due film suggest give second chance get wrong see exactly coming film would want put behind you lee grow up work becomes own see transformation desire make money wanting make good film took awhile watch th hour did sheer brilliance perhaps actor perhaps story lee crafted amazing film one man s journey unknown guess hoping mo better blue would turn be really dark journey life man really never grew up instead got denzel denzel really one versatile actor generation consider sydney poitier cinema film showcase talent br br another issue film use spike s sister playing one love interest know you family think could filmed sex scene sister care actor much money getting paid would never it something never wish see apparently different spike went ahead showed full nude image sister without remorse sad even made blush also need somebody answer this flavor flav introducing film so sitting couch ready start film suddenly voice past spelling studio made film acknowledges himself build strong remaining story again felt lee going money film instead actual talent perhaps could afford denzel wesley movie without explosion br br there two great scene film made worth watching end get wrong bad movie always diamond every alleyway scene bleek accidentally forgets woman mesmerizing continually went back forth weaving truth confusion way proved lee actually behind camera visionary scene probably lost shuffle due remaining poor scene scene worth watching way lee introduced ended film keeping pacing direction able bring tragic character around full circle give chance change life two moment rest film pure rubbish worth viewing unless go blind br br grade'"

In [None]:
# 2. Transform your input

df1 = pd.DataFrame({'review': [aznan]})

df11 = normalize_text(df1)  # Apply your preprocessing pipeline
df1
# Transform using the same vectorizer
X_input = vect.transform(df1['review'])

X_input.toarray()
X_input.shape

In [43]:

# 3. Predict using your trained model
prediction = mod.predict(X_input)

if prediction[0]==0:
    print(aznan[:20] ,":--> Negative review")
else:
    print(aznan[:20] ,":--> Positive review")



normally spike lee f :--> Positive review


In [None]:
# also use ML models MultinomialNB ,LogisticRegression,LinearSVC,DecisionTreeClassifier,ensemble
# Also we use Deep Learning/ use Neural network 

In [None]:
├── app.py
├── requirements.txt
├── model.pkl
├── vectorizer.pkl
|-- Templates --> html 

In [None]:
pip freeze -r requirements.txt

pip install -r requirements.txt

## ANN 

In [None]:
pip install tensorflow

In [None]:
### Define ANN Model

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
