# **Testing our models with form data (sample size 84 response)**

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("../data/Gamender - Samples (Responses) - Form Responses 1.csv")
df.head()

Unnamed: 0,Timestamp,"Write a tweet (short paragraph, less than 280 characters) reviewing a game.",Was it a negative or a positive review ?
0,10/20/2021 16:41:24,"I recommend this game, It is highly engaging a...",Positive
1,10/20/2021 17:05:14,The perfect game !!,Positive
2,10/20/2021 17:57:47,Assassin's Creed Valhalla is the most gorgeous...,Positive
3,10/21/2021 2:14:04,Oh wow,Positive
4,10/21/2021 21:13:14,I love cooking either in real .. or on screen ...,Positive


In [21]:
df

Unnamed: 0,Test,Result,prediction
0,"i recommend this game, it is highly engaging a...",2,2
1,the perfect game !!,2,2
2,assassin's creed valhalla is the most gorgeous...,2,2
3,oh wow,2,0
4,i love cooking either in real .. or on screen ...,2,2
...,...,...,...
79,life is strange started as a great game series...,0,0
80,"finished buried stars last week,i love these g...",2,2
81,"with buried stars on pc, you have no more excu...",2,0
82,lmao i was playing doctor who the edge of real...,0,0


In [3]:
df.drop("Timestamp", axis=1, inplace=True)

In [4]:
df.head()

Unnamed: 0,"Write a tweet (short paragraph, less than 280 characters) reviewing a game.",Was it a negative or a positive review ?
0,"I recommend this game, It is highly engaging a...",Positive
1,The perfect game !!,Positive
2,Assassin's Creed Valhalla is the most gorgeous...,Positive
3,Oh wow,Positive
4,I love cooking either in real .. or on screen ...,Positive


In [5]:
df.rename(columns={"Write a tweet (short paragraph, less than 280 characters) reviewing a game.": "Test",
                   "Was it a negative or a positive review ?": "Result"}, inplace=True)

In [6]:
mappa = {'Positive': 1, 'Negative': 0}
df.replace({'Result': mappa}, inplace=True)
df

Unnamed: 0,Test,Result
0,"I recommend this game, It is highly engaging a...",1
1,The perfect game !!,1
2,Assassin's Creed Valhalla is the most gorgeous...,1
3,Oh wow,1
4,I love cooking either in real .. or on screen ...,1
...,...,...
79,Life is strange started as a great game series...,0
80,"finished buried stars last week,i love these g...",1
81,"With Buried Stars on PC, You Have No More Excu...",1
82,lmao I was playing doctor who the edge of real...,0


## Cleaning Tweets

In [7]:
import re

hashtags = re.compile(r"^#\S+|\s#\S+")
mentions = re.compile(r"^@\S+|\s@\S+")
urls = re.compile(r"https?://\S+")


def process_text(text):
    text = re.sub(r'http\S+', '', text)
    text = hashtags.sub(' hashtag', text)
    text = mentions.sub(' entity', text)
    return text.strip().lower()


df["Test"] = df.Test.apply(process_text)

In [8]:
df.head()

Unnamed: 0,Test,Result
0,"i recommend this game, it is highly engaging a...",1
1,the perfect game !!,1
2,assassin's creed valhalla is the most gorgeous...,1
3,oh wow,1
4,i love cooking either in real .. or on screen ...,1


# Loading Model1 and its tokenizer

In [9]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("../models/models trial 4/bert-custom")

model = AutoModelForSequenceClassification.from_pretrained("../models/models trial 4/bert-custom")

In [10]:
import torch

In [None]:
def predictionPipeline(text):
    #id_tolabel = {0: "Negative", 1: "Positive"}
    modeleval = model.eval()
    tokenized = tokenizer(text, return_tensors='pt').to(modeleval.device)
    with torch.no_grad(): label = torch.argmax(model.forward(**tokenized).logits, dim=1)[0].cpu().item()
    return int(label)


df['prediction'] = df['Test'].apply(predictionPipeline)

Token indices sequence length is longer than the specified maximum sequence length for this model (153 > 128). Running this sequence through the model will result in indexing errors


IndexError: index out of range in self

In [12]:
df

Unnamed: 0,Test,Result,prediction
0,"i recommend this game, it is highly engaging a...",1,1
1,the perfect game !!,1,1
2,assassin's creed valhalla is the most gorgeous...,1,1
3,oh wow,1,0
4,i love cooking either in real .. or on screen ...,1,1
...,...,...,...
79,life is strange started as a great game series...,0,0
80,"finished buried stars last week,i love these g...",1,1
81,"with buried stars on pc, you have no more excu...",1,1
82,lmao i was playing doctor who the edge of real...,0,0


In [13]:
from sklearn.metrics import accuracy_score

In [14]:
accuracy_score(df["Result"], df["prediction"])

0.9285714285714286

# model 1 around 93% accuracy (probably due to our limited number of data)

# **Load Model 2 and its tokenizer**

In [15]:
tokenizer = AutoTokenizer.from_pretrained("../models/trial5")
model = AutoModelForSequenceClassification.from_pretrained("../models/trial5")

In [16]:
mappa = {1: 2}
df.replace({'Result': mappa}, inplace=True)

In [17]:
df

Unnamed: 0,Test,Result,prediction
0,"i recommend this game, it is highly engaging a...",2,1
1,the perfect game !!,2,1
2,assassin's creed valhalla is the most gorgeous...,2,1
3,oh wow,2,0
4,i love cooking either in real .. or on screen ...,2,1
...,...,...,...
79,life is strange started as a great game series...,0,0
80,"finished buried stars last week,i love these g...",2,1
81,"with buried stars on pc, you have no more excu...",2,1
82,lmao i was playing doctor who the edge of real...,0,0


In [18]:
def predictionPipeline(text):
    #id_tolabel = {0: "Negative", 2: "Positive"}
    modeleval = model.eval()
    tokenized = tokenizer(text, return_tensors='pt', truncation=True).to(modeleval.device)
    with torch.no_grad(): label = torch.argmax(model.forward(**tokenized).logits, dim=1)[0].cpu().item()
    return label


df['prediction'] = df['Test'].apply(predictionPipeline)

In [19]:
df

Unnamed: 0,Test,Result,prediction
0,"i recommend this game, it is highly engaging a...",2,2
1,the perfect game !!,2,2
2,assassin's creed valhalla is the most gorgeous...,2,2
3,oh wow,2,0
4,i love cooking either in real .. or on screen ...,2,2
...,...,...,...
79,life is strange started as a great game series...,0,0
80,"finished buried stars last week,i love these g...",2,2
81,"with buried stars on pc, you have no more excu...",2,0
82,lmao i was playing doctor who the edge of real...,0,0


In [20]:
accuracy_score(df["Result"], df["prediction"])

0.9404761904761905

# Around 94% accuracy

###### While this very high accuracy is probably caused by the lack of sufficient test dataseet model 2 will be used