### **Import Pipeline from transformers to perform various Tasks**

### **Sentiment Analysis**

In [None]:
from transformers import pipeline
classifier=pipeline("sentiment-analysis")

In [None]:
tasks=classifier([
    "I'm learning Hugging Face course nowadays and this course is very great and i've learned a lot from this course",
    "But Balancing work and learning has been quite exhausting lately"
])
print(tasks)

### **Check labels and their Scores**

In [None]:
for task in tasks:
  Labels=task["label"]
  Scores=task["score"]
  print("Labels: ",Labels, '---->', "Score: ", Scores)

### **Initializing the Tokenizer**

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
tokenizer=AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
print(tokenizer)

### **preprocessing with tokenizer**

In [None]:
tasks=tokenizer([
    "I'm learning Hugging Face course nowadays and this course is very great and i've learned a lot from this course",
    "But Balancing work and learning has been quite exhausting lately"
])
print(tasks)

In [None]:
print(tasks["input_ids"])

In [None]:
for key in tasks:
  print("Input ids", tasks["input_ids"])
  print("keys", key)
  print("Token type ids", tasks["token_type_ids"])
  print("Attention Mask", tasks["attention_mask"])

In [None]:
raw_text=[
    "I just completed my first NLP model thanks to this course, and it feels amazing!",
    "This course is structured so well, it's the best learning experience I've had online.",
    "The pace is too fast, and I feel completely lost after the first few lessons."
]
input=tokenizer(raw_text, padding=True, truncation=True, return_tensors="pt")
print(input)


In [None]:
for key in input:
  print("Input ids", input["input_ids"])
  print("keys", key)
  print("Token type ids", input["token_type_ids"])
  print("Attention Mask", input["attention_mask"])

### **Initializing the AutoModelForSequenceClassification**

In [None]:
from transformers import AutoModel, AutoModelForSequenceClassification
model=AutoModel.from_pretrained("bert-base-uncased")

In [None]:
model

### **Model Inference**

In [None]:
output=model(**input)
output

### **Let's Check the Hidden State**

In [None]:
output.last_hidden_state

### **Loading Tokenizer and Model for Sequence Classification**

In [None]:
new_tokenizer=AutoTokenizer.from_pretrained("bert-base-uncased")
new_model=AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
print(new_tokenizer(raw_text))
new_output=new_model(**input)
print(new_output)


In [None]:
new_output.logits

### **Pipeline Step#2: Models**

In [None]:
from transformers import AutoConfig
bert_config=AutoConfig.from_pretrained("bert-base-cased")
distill_bert_config=AutoConfig.from_pretrained("distilbert-base-cased")
print("Bert Config")
print(bert_config)
print("====================================")
print("DistillBert Config")
print(distill_bert_config)

### **Build and Initiate Bert Model**

In [None]:
from transformers import BertConfig, BertModel
# building the config
bert_model_config=BertConfig()
# initiate the model
new_bert_model=BertModel(bert_model_config)
output=new_bert_model(**input)
print(output)

### **Bert Model Configuration**

In [None]:
bert_model_config

In [None]:
new_bert_model=BertModel.from_pretrained("bert-base-cased", num_hidden_layers= 12, pad_token_id= -1)
output=new_bert_model(**input)
print(output)

In [None]:
bert_model=AutoModel.from_pretrained("bert-base-cased")
new_output=bert_model(**input)
print(new_output)

In [None]:
new_output.last_hidden_state

In [None]:
bert_model.save_pretrained("/content/bert_model")

### **Using a Transformer model for inference**

In [None]:
sequences = ["Hello!", "Cool", "Nice!"]

encoded_sequences = [
    [101, 7592, 999, 102],
    [101, 4658, 1012, 102],
    [101, 3835, 999, 102],
]

In [None]:
import torch
new_input=torch.tensor(encoded_sequences)
print(new_input)

In [None]:
model=AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-uncased")

In [None]:
google_bert_output=model(new_input)
print(google_bert_output)

In [None]:
google_bert_output.logits

In [None]:
probabilities=torch.nn.functional.softmax(google_bert_output.logits, dim=-1)
print(probabilities)

## **Tokenizers**

### **Word Tokenize**

In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
tokenize_text="I'm doing Datascience and ai projects, and now a days I'm doing NLP course"
text_tokenize=tokenize_text.split()
print(text_tokenize)

In [None]:
tokenizer=AutoTokenizer.from_pretrained("bert-base-cased")
output_text=tokenizer.tokenize(tokenize_text)
print(output_text)

### **Loading and saving**

In [None]:
bert_base_model=AutoModelForSequenceClassification.from_pretrained("bert-base-cased")
output=bert_base_model(**input)
print(output)

In [None]:
outputs = tokenizer("Using a Transformer network is simple")
for key,value in outputs.items():
  print("key: ", key)
  print("value: ", value, end="\n\n")

### **Save bert-base-cased Model**

In [None]:
bert_base_model.save_pretrained("/content/bert_base_new_model")

### **Convert tokens to input ids**

In [None]:
ids=tokenizer.convert_tokens_to_ids(output_text)
print(ids)

In [None]:
decode_ids=tokenizer.decode([146, 112, 182, 1833, 7154, 22274, 1105, 170, 1182, 3203, 117, 1105, 1208, 170, 1552, 146, 112, 182, 1833, 21239, 2101, 1736])
print(decode_ids)

In [None]:
print("IDs: ", ids)
print("Input IDs: ", tokenizer(output_text)["input_ids"])

### **convert ids to tokens**

In [None]:
tokenizer.convert_ids_to_tokens([146, 112, 182, 1833, 7154, 22274, 1105, 170, 1182, 3203, 117, 1105, 1208, 170, 1552, 146, 112, 182, 1833, 21239, 2101, 1736])

### **Summing Up All Together**

In [None]:
import torch
from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
checkpoint="distilbert/distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model=AutoModelForSequenceClassification.from_pretrained(checkpoint)
raw_inputs=[
    "I've been waiting for a HuggingFace course my whole life.",
    "I have learned a lot from this course!",
]
inputs=tokenizer.tokenize(raw_inputs)
print(inputs)
# convert tokens to ids
ids = tokenizer.convert_tokens_to_ids(inputs)
print(ids)
input_ids=torch.tensor(ids)
print(input_ids)

### **Performing Inference with istilbert-base-uncased-finetuned-sst-2-english Pre-trained Model**

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

checkpoint = "distilbert/distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)

raw_inputs = [
    "I've been waiting for a HuggingFace course my whole life.",
    "I have learned a lot from this course!",
]

# Proper tokenization with batching
inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="pt")

output=model(**inputs)

# Output logits
print(output.logits)


In [None]:
padding_id=100
batched_id=[[200,200,200],
            [200,200,padding_id]]
checkpoint = "distilbert/distilbert-base-uncased-finetuned-sst-2-english"
model=AutoModelForSequenceClassification.from_pretrained(checkpoint)
sequence_1_ids=[[200,200,200]]
sequence_2_ids=[[200,200,200],
            [200,200,tokenizer.pad_token_id]]
print(model(torch.tensor(sequence_1_ids)))
print(model(torch.tensor(sequence_2_ids)))
print(model(torch.tensor(batched_id)))




In [None]:
text_sequence=["I'm doing Huggig Face course",
               "It's content is very good"]
input=tokenizer(raw_text, padding=True, truncation=True, return_tensors="pt")
print(input)

### **Check Attention Mask and Input ids**

In [None]:
for key in input:
  print(key)
  print(input[key])
  print(input['attention_mask'])
  print(input['input_ids'])

In [None]:
tokenizer.model_max_length

### **Padding**

In [None]:
new_model_input=tokenizer(text_sequence, padding="max_length")
print(new_model_input)
new_model_input_1=tokenizer(text_sequence, padding="longest")
print(new_model_input_1)
new_model_input_2=tokenizer(text_sequence, padding="max_length", max_length=8)
print(new_model_input_2)