In [None]:
!pip install flask-ngrok

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting flask-ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl (3.1 kB)
Installing collected packages: flask-ngrok
Successfully installed flask-ngrok-0.0.25


In [None]:
!pip install pyngrok

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyngrok
  Downloading pyngrok-5.2.1.tar.gz (761 kB)
[K     |████████████████████████████████| 761 kB 34.9 MB/s 
Building wheels for collected packages: pyngrok
  Building wheel for pyngrok (setup.py) ... [?25l[?25hdone
  Created wheel for pyngrok: filename=pyngrok-5.2.1-py3-none-any.whl size=19792 sha256=182f8893cd5308338cca88a3cb298ed90dda841d4308a4374d5499623ee694a9
  Stored in directory: /root/.cache/pip/wheels/5d/f2/70/526da675d32f17577ec47ac4c663084efe39d47c826b6c3bb1
Successfully built pyngrok
Installing collected packages: pyngrok
Successfully installed pyngrok-5.2.1


In [None]:
!pip install google

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!ngrok authtoken 2Jldl59iHuICWSbcy8RV52PZWnb_7GDpQz9mk1V3AiE3Efqgg

Authtoken saved to configuration file: /root/.ngrok2/ngrok.yml


In [None]:
from googlesearch import search
import numpy as np
from bs4 import BeautifulSoup
import requests

In [None]:
pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 30.2 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 68.7 MB/s 
[?25hCollecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 61.6 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.1 tokenizers-0.13.2 transformers-4.25.1


In [None]:
import torch
import torch.nn as nn
import transformers
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

In [None]:
class config:
    MAX_LEN = 512
    BATCH_SIZE = 8
    EPOCHS = 3
    DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    BERT_PATH = "bert-base-uncased"
    MODEL_PATH = "model.pth"
    # define the tokenizer
    # we use tokenizer and model
    # from huggingface's transformers
    TOKENIZER = transformers.AutoTokenizer.from_pretrained(BERT_PATH,
                                                do_lower_case=True)
  

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [None]:
class BERTDataset:
    def __init__(self, review):
        """
        :param review: list or numpy array of strings
        :param targets: list or numpy array which is binary
        """
        self.review = review
        self.tokenizer = config.TOKENIZER
        self.max_len = config.MAX_LEN
    def __len__(self):
        # this returns the length of dataset
        return len(self.review)
    def __getitem__(self, item):
        # for a given item index, return a dictionary
        # of inputs
        review = str(self.review[item])
        review = " ".join(review.split())
        # here, review is a string
        inputs = self.tokenizer.encode_plus(review,
                                            add_special_tokens=True,
                                            max_length=self.max_len,
                                            pad_to_max_length=True,
                                            truncation=True)
        # ids are ids of tokens generated
        # after tokenizing reviews
        ids = inputs["input_ids"]
        # mask is 1 where we have input
        # and 0 where we have padding
        mask = inputs["attention_mask"]
        # token type ids behave the same way as
        # mask in this specific case
        # in case of two sentences, this is 0
        # for first sentence and 1 for second sentence
        token_type_ids = inputs["token_type_ids"]
        # now we return everything
        # note that ids, mask and token_type_ids
        # are all long datatypes and targets is float
        return {
                "ids": torch.tensor(
                ids, dtype=torch.long),
                "mask": torch.tensor(
                mask, dtype=torch.long),
                "token_type_ids": torch.tensor(
                token_type_ids, dtype=torch.long),
                }

In [None]:

class BERTBaseUncased(nn.Module):
    def __init__(self):
        super(BERTBaseUncased, self).__init__()
        # we fetch the model from the BERT_PATH 
        self.bert = transformers.BertModel.from_pretrained(config.BERT_PATH,return_dict=False)
        # add a dropout for regularization
        self.bert_drop = nn.Dropout(0.3)
        # a simple linear layer for output
        self.out = nn.Linear(768, 1)
    def forward(self, ids, mask, token_type_ids):
        # BERT in its default settings returns two outputs
        # last hidden state and output of bert pooler layer
        # we use the output of the pooler which is of the size
        # (batch_size, hidden_size)
        # hidden size can be 768 or 1024 depending on
        # if we are using bert base or large respectively
        # in our case, it is 768
        _, o2 = self.bert(ids,
                          attention_mask=mask,
                          token_type_ids=token_type_ids)
        # pass through dropout layer
        bo = self.bert_drop(o2)
        # pass through linear layer
        output = self.out(bo)
        # return output
        return output

In [None]:
def eval_fn(data_loader, model, device):
    """
    this is the validation function that generates
    predictions on validation data
    :param data_loader: it is the torch dataloader object
    :param model: torch model, bert in our case
    :param device: can be cpu or cuda
    :return: output and targets
    """
    # put model in eval mode
    model.to(device)
    model.eval()
    # initialize empty lists for
    # targets and outputs
    fin_targets = []
    fin_outputs = []
    # use the no_grad scope
    # its very important else you might
    # run out of gpu memory
    with torch.no_grad():
        # this part is same as training function
        # except for the fact that there is no
        # zero_grad of optimizer and there is no loss
        # calculation or scheduler steps.
        for d in data_loader:
            ids = d["ids"]
            token_type_ids = d["token_type_ids"]
            mask = d["mask"]
            ids = ids.to(device, dtype=torch.long)
            token_type_ids = token_type_ids.to(device, dtype=torch.long)
            mask = mask.to(device, dtype=torch.long)
            outputs = model(ids=ids,
                            mask=mask,
                            token_type_ids=token_type_ids)
            # convert outputs to cpu and extend the final list
            outputs = torch.sigmoid(outputs).cpu().detach()
            fin_outputs.extend(outputs.numpy().tolist())
    return fin_outputs

In [None]:

class Movie:
  def __init__(self, movie_name):
    self.movie = movie_name.lower()

  def get_reviews(self):
    #get reviews as pandas df
    query= "imdb"+self.movie+"user reviews"
    flag=0
    l=[]
    for j in search(query, tld="co.in", num=5, stop=5, pause=2):
      l.append(j)

    url=""
    for i in l:
      if 'reviews' in i:
        url=i 
        break
    r = requests.get(url)
    
    soup = BeautifulSoup(r.content, 'html5lib') 
    ti= soup.find('h3',{'itemprop':'name'})

    title= ti.find('a').get_text()
    date= ti.find('span').get_text()
    date=''.join(date.split())
    title+=date


    if self.movie not in title.lower():
      flag=1

    table = soup.find('div', attrs = {'class':'lister-list'})
    my_reviews= table.find_all("a", {"class": "title"})

    reviews=[]
    for review in my_reviews:
      reviews.append(review.get_text())

    reviews_df= pd.DataFrame(reviews, columns =['reviews'])

    return title,reviews_df,flag   
      
  def get_sentiment(self,rev,model):
    #get sentiment by passing through model
    sentiment=0
    map={0:'bad', 1:'good'}

    rev_dataset = BERTDataset(review=rev.reviews.values)
    rev_loader = torch.utils.data.DataLoader(rev_dataset,batch_size=config.BATCH_SIZE,num_workers=2)
    device= config.DEVICE 
    outputs= eval_fn(rev_loader, model, device)

    sentiment=np.mean(outputs)
    sentiment= 0 if sentiment<0.5 else 1 
    return map[sentiment]

In [None]:
#initialising model
PATH='/content/drive/MyDrive/.ipynb_checkpoints/model.pth'
model= BERTBaseUncased()
model.load_state_dict(torch.load(PATH))

Downloading:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [None]:
#trying a movie name
mov= Movie('son of the mask')
title,rev,flag= mov.get_reviews()
print(title,mov.get_sentiment(rev,model))


Son of the Mask(2005) bad


In [None]:
#creating website after deploying model through flask and ngrok
from flask import Flask,request,render_template
from flask_ngrok import run_with_ngrok
app = Flask(__name__, template_folder='/content')
run_with_ngrok(app)   
  
@app.route('/')
def home():
    return render_template('index.html',messages='hello')

@app.route('/', methods =["GET", "POST"])
def gfg():
    if request.method == "POST":
       # getting input with name = fname in HTML form
      movie_name = request.form.get("fname")
      mov_obj= Movie(str(movie_name))
      title,reviews,flag= mov_obj.get_reviews()
      if flag==0:
        result="IMDB most recent viewers' sentiment of "+str(title)+": "+mov_obj.get_sentiment(reviews,model)
        return render_template('index.html',result=result)
      else :
        result='entered string not matched....searched instead: IMDB most recent viewers sentiment of '+str(title)+': '+mov_obj.get_sentiment(reviews,model)
        return render_template('index.html',result=result)

app.run()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


INFO:werkzeug: * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)


 * Running on http://2321-34-145-167-200.ngrok.io
 * Traffic stats available on http://127.0.0.1:4040


INFO:werkzeug:127.0.0.1 - - [03/Jan/2023 17:30:51] "[37mGET / HTTP/1.1[0m" 200 -
INFO:werkzeug:127.0.0.1 - - [03/Jan/2023 17:30:51] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [03/Jan/2023 17:31:02] "[37mPOST / HTTP/1.1[0m" 200 -
