In [2]:
import torch

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Available device: {device}')


Available device: cpu


In [10]:
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# autopilote-ai/EthicalEye

In [11]:
# autopilot-ai/EthicalEye

model = AutoModelForSequenceClassification.from_pretrained("autopilot-ai/EthicalEye")
tokenizer = AutoTokenizer.from_pretrained("autopilot-ai/EthicalEye")

In [12]:
# Run the model on your input
inputs = tokenizer("I love AutoTrain", return_tensors="pt")
outputs = model(**inputs)

# Get the predicted logits
logits = outputs.logits

# Apply softmax to get probabilities (scores)
probabilities = logits.softmax(dim=-1).squeeze()

# Retrieve the labels
id2label = model.config.id2label
labels = [id2label[idx] for idx in range(len(probabilities))]

# Combine labels and probabilities, then sort
label_prob_pairs = list(zip(labels, probabilities))
label_prob_pairs.sort(key=lambda item: item[1], reverse=True)  

# Print the sorted results
for label, probability in label_prob_pairs:
    print(f"Label: {label} - Probability: {probability:.4f}")

Label: Safe - Probability: 0.9984
Label: Un-Safe - Probability: 0.0016


# KoalaAI/TextClassification

In [13]:
# KoalaAI/TextClassification

# Load the model and tokenizer
model = AutoModelForSequenceClassification.from_pretrained("KoalaAI/Text-Moderation")
tokenizer = AutoTokenizer.from_pretrained("KoalaAI/Text-Moderation")

In [14]:
# Run the model on your input
inputs = tokenizer("I love AutoTrain", return_tensors="pt")
outputs = model(**inputs)

# Get the predicted logits
logits = outputs.logits

# Apply softmax to get probabilities (scores)
probabilities = logits.softmax(dim=-1).squeeze()

# Retrieve the labels
id2label = model.config.id2label
labels = [id2label[idx] for idx in range(len(probabilities))]

# Combine labels and probabilities, then sort
label_prob_pairs = list(zip(labels, probabilities))
label_prob_pairs.sort(key=lambda item: item[1], reverse=True)  

# Print the sorted results
for label, probability in label_prob_pairs:
    print(f"Label: {label} - Probability: {probability:.4f}")

Label: OK - Probability: 0.9840
Label: H - Probability: 0.0043
Label: SH - Probability: 0.0039
Label: V - Probability: 0.0019
Label: S - Probability: 0.0018
Label: HR - Probability: 0.0015
Label: V2 - Probability: 0.0011
Label: S3 - Probability: 0.0010
Label: H2 - Probability: 0.0006


In [15]:
# Run the model on your input
inputs = tokenizer("go back to your country", return_tensors="pt")
outputs = model(**inputs)

# Get the predicted logits
logits = outputs.logits

# Apply softmax to get probabilities (scores)
probabilities = logits.softmax(dim=-1).squeeze()

# Retrieve the labels
id2label = model.config.id2label
labels = [id2label[idx] for idx in range(len(probabilities))]

# Combine labels and probabilities, then sort
label_prob_pairs = list(zip(labels, probabilities))
label_prob_pairs.sort(key=lambda item: item[1], reverse=True)  

# Print the sorted results
for label, probability in label_prob_pairs:
    print(f"Label: {label} - Probability: {probability:.4f}")

Label: OK - Probability: 0.4326
Label: H - Probability: 0.3793
Label: HR - Probability: 0.0623
Label: V - Probability: 0.0536
Label: H2 - Probability: 0.0227
Label: SH - Probability: 0.0169
Label: S - Probability: 0.0127
Label: V2 - Probability: 0.0123
Label: S3 - Probability: 0.0076


# citizenlab/distilbert-base-multilingual-cased-toxicity 

In [16]:
# citizenlab/distilbert-base-multilingual-cased-toxicity 
model_path = "citizenlab/distilbert-base-multilingual-cased-toxicity"

toxicity_classifier = pipeline("text-classification", model=model_path, tokenizer=model_path)

print(toxicity_classifier("this is a lovely message"))
print(toxicity_classifier("you are an idiot and you and your family should go back to your country"))

[{'label': 'not_toxic', 'score': 0.9954179525375366}]
[{'label': 'toxic', 'score': 0.9948776960372925}]


# Other models

In [17]:
# unknown model
pipe = pipeline("text-classification")

res = pipe("This restaurant is awesome")
print(res)
print(res[0]['score'])

res = pipe("you are an idiot and you and your family should go back to your country")
print(res)


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


[{'label': 'POSITIVE', 'score': 0.9998743534088135}]
0.9998743534088135
[{'label': 'NEGATIVE', 'score': 0.9996622800827026}]


In [18]:
# FacebookAI/roberta-large-mnli
pipe2 = pipeline(model="FacebookAI/roberta-large-mnli")
pipe2("This restaurant is awesome")

Some weights of the model checkpoint at FacebookAI/roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[{'label': 'NEUTRAL', 'score': 0.7313134074211121}]