In [185]:
from tqdm.notebook import tqdm

import random
import numpy as np
import torch

import pandas as pd
import os


import requests
import time

In [186]:
from sklearn.metrics import f1_score

from sklearn.preprocessing import MultiLabelBinarizer

In [187]:
dataset_path = "/content/drive/MyDrive/doutorado/P_IA368DD_2023S/aula4/data/"
end_file_name = "_title_max.csv"
def load_dataset(phase, path = dataset_path, end_file_name = "_title_max.csv"):
  df = pd.read_csv(f"{path}{phase}{end_file_name}",sep="|",
                   converters={"classes": lambda x: x.lower().strip("[]").replace("'","").split(", ")})
  df = df[["caption","classes"]]
  df = df.rename(columns={"caption":"synopsis","classes":"genres"})
  return df

train_data =  load_dataset("train")
test_data  = load_dataset("test")
#train_data['genres'] = train_data['genres'].apply(lambda x: x.lower)
#test_data['genres'] = test_data['genres'].apply(lambda x: x.lower)

mlb = MultiLabelBinarizer()
mlb.fit(train_data.genres)

In [188]:
class Llama_LLM_Model():
    def __init__(self, train_data, test_data ,question = ""):
        
        self.base_url="http://143.106.167.108/api"

        self.train_data = train_data
        self.test_data = test_data 
        self.temperature = 0.0
        self.top_p = 1
        self.max_length = 250
        self.wait_time = 10
        
        self.question = question
        
    def make_prompt(self,index_test, is_few_shot=True, few_number=3):
      prompt = f"""{self.question}."""
      count = 0
      if is_few_shot:
        FEW_SHOT_TEMPLATE="\n\nExample {}: {}\nAnswer:{}"
        index_train = np.random.randint(0, high=len(self.train_data), size=few_number)
        for count, i in enumerate(index_train):
          aux = self.train_data.iloc[[2]]
          example = FEW_SHOT_TEMPLATE.format(count+1,aux.synopsis.values[0],aux.genres.values[0])
          prompt = prompt + example

      else:
        prompt = prompt + " Answer in just one list separated by ';' "

      test = self.test_data.iloc[[index_test]]
      text = f"\nExample: {test.synopsis.values[0]}"

      end_prompt = "\nAnswer:"
      
      prompt = prompt + text + end_prompt

      return prompt, test.genres.values[0]

    def answer_one_synopsis(self, index_test=0, is_few_shot=True, few_number=3):
        """
        Chama o modelo passando parm_prompt
        """
        prompt, y_true = self.make_prompt(index_test=index_test, is_few_shot = is_few_shot, few_number = few_number)
        data={
            "prompt": prompt,
            "temperature": self.temperature,
            "top_p": self.top_p,
            "max_length": self.max_length
          }

        r = requests.post(f"{self.base_url}/complete", json=data)
        if r.ok:
          response=r.json()
          request_uuid=response["request_uuid"]

          ready = False
          while not ready:
            
            r = requests.get(f"{self.base_url}/get_result/{request_uuid}")
            
            response = r.json()
            ready = response['ready']
            if ready:
              break
            # Wait 10 seconds before checking again
            time.sleep(self.wait_time)
        resposta = response["generated_text"]
        resposta = resposta.lower()
        resposta = resposta.replace("science fiction","sci-fi")
        
        if is_few_shot:
          resposta = resposta.split("\\")[0]
          resposta = resposta[resposta.find("[")+1:resposta.find("]")].strip().replace("'","")
          resposta = resposta.replace(" ","")
          resposta = resposta.split(',')
        else:
          resposta = resposta.split("\n")[0]
          resposta = resposta.strip()#.replace("'","")
          resposta = resposta.split(';')
        return resposta, y_true

    def run_test(self, size = 10 ,is_few_shot=True, few_number=3):
      df_pred = []
      for row_id in tqdm(range(0,size)):
        resposta, y_true = self.answer_one_synopsis(index_test=row_id, is_few_shot=is_few_shot, few_number=few_number)
        df_pred.append((resposta, y_true))
        time.sleep(self.wait_time)
      df  = pd.DataFrame(df_pred,columns=['pred_genres','y_true'])
      return df

In [189]:
%%markdown
# Declare Llama

# Declare Llama


In [190]:
llama_model = Llama_LLM_Model(train_data=train_data, test_data=test_data, 
                              question= "Given movie synopsis, answer the genres of the movie")

In [196]:
%%markdown
# Few-shot Llama
## 1 exemplos
## size 10

# Few-shot Llama
## 1 exemplos
## size 10


In [161]:
%%time
df_fewshot_1 = llama_model.run_test(size=10, is_few_shot=True, few_number=1)

  0%|          | 0/10 [00:00<?, ?it/s]

['Crime', 'Thriller']
+++++++
['Action', 'Crime', 'Thriller']
+++++++
['Horror', 'Comedy']
+++++++
['Sci-Fi', 'Horror']
+++++++
['Documentary', 'History']
+++++++
['Western', 'Action']
+++++++
['Documentary', 'Sports']
+++++++
['Documentary', 'LGBTQ']
+++++++
['Romance', 'Comedy']
+++++++
['Animation', 'Adventure', 'Comedy', 'Family', 'Fantasy']
+++++++
CPU times: user 2.64 s, sys: 335 ms, total: 2.98 s
Wall time: 7min 13s


In [162]:
print(f1_score(mlb.transform(df_fewshot_1['y_true']), mlb.transform(df_fewshot_1['pred_genres']),average='weighted'))
print(f1_score(mlb.transform(df_fewshot_1['y_true']), mlb.transform(df_fewshot_1['pred_genres']),average='micro'))
print(f1_score(mlb.transform(df_fewshot_1['y_true']), mlb.transform(df_fewshot_1['pred_genres']),average='macro'))
print(f1_score(mlb.transform(df_fewshot_1['y_true']), mlb.transform(df_fewshot_1['pred_genres']),average=None))

0.7354497354497355
0.7755102040816326
0.42967032967032964
[1.         1.         1.         0.         0.8        0.8
 1.         0.         1.         0.         0.         0.
 1.         0.         0.         0.         0.         0.
 1.         1.         0.         0.         0.         0.57142857
 0.         1.        ]


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


In [195]:
%%markdown
# zero-shot Llama
## size 10

# zero-shot Llama
## size 10


In [164]:
%%time
df_zeroshot = llama_model.run_test(size=10, is_few_shot=False)

  0%|          | 0/10 [00:00<?, ?it/s]

['Drama', ' Crime', ' Thriller']
+++++++
['Action', 'Crime', 'Drama', 'Thriller']
+++++++
['Horror', 'Comedy']
+++++++
['Science Fiction', ' Horror', ' Thriller']
+++++++
['Documentary', 'History']
+++++++
['western', 'drama', 'biography']
+++++++
['Documentary', 'Sports']
+++++++
['Documentary', 'LGBTQ']
+++++++
['Comedy', 'Romance']
+++++++
['Animation', 'Adventure', 'Comedy', 'Family', 'Fantasy']
+++++++
CPU times: user 4.95 s, sys: 625 ms, total: 5.57 s
Wall time: 13min 22s


In [165]:
print(f1_score(mlb.transform(df_zeroshot['y_true']), mlb.transform(df_zeroshot['pred_genres']),average='weighted'))
print(f1_score(mlb.transform(df_zeroshot['y_true']), mlb.transform(df_zeroshot['pred_genres']),average='micro'))
print(f1_score(mlb.transform(df_zeroshot['y_true']), mlb.transform(df_zeroshot['pred_genres']),average='macro'))
print(f1_score(mlb.transform(df_zeroshot['y_true']), mlb.transform(df_zeroshot['pred_genres']),average=None))

0.6234567901234568
0.6666666666666667
0.33717948717948715
[0.66666667 1.         1.         0.         0.8        0.5
 1.         0.8        1.         0.         0.         0.
 0.66666667 0.         0.         0.         0.         0.
 1.         0.         0.         0.         0.         0.33333333
 0.         0.        ]


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


In [166]:
df_zeroshot

Unnamed: 0,pred_genres,y_true
0,"[Drama, Crime, Thriller]","[Crime, Drama, Thriller]"
1,"[Action, Crime, Drama, Thriller]","[Action, Crime, Drama, Thriller]"
2,"[Horror, Comedy]","[Comedy, Horror, Thriller]"
3,"[Science Fiction, Horror, Thriller]","[Horror, Sci-Fi, Thriller]"
4,"[Documentary, History]",[Documentary]
5,"[western, drama, biography]","[Action, Crime, Drama, Thriller, Western]"
6,"[Documentary, Sports]",[Documentary]
7,"[Documentary, LGBTQ]",[Documentary]
8,"[Comedy, Romance]","[Comedy, Romance]"
9,"[Animation, Adventure, Comedy, Family, Fantasy]","[Animation, Adventure, Family, Musical]"


In [194]:
%%markdown
# Few-shot Llama
## 2 exemplos
## size 50

# Few-shot Llama
## 2 exemplos
## size 50


In [181]:
%%time
df_fewshot_2 = llama_model.run_test(size=50, is_few_shot=True, few_number=2)

  0%|          | 0/50 [00:00<?, ?it/s]

['crime', 'drama']
+++++++
['action', 'crime', 'thriller']
+++++++
['horror', 'comedy']
+++++++
['horror', 'sci-fi']
+++++++
['documentary', 'history']
+++++++
['western', 'biography', 'crime']
+++++++
['documentary', 'sports']
+++++++
['documentary', 'lgbtq+']
+++++++
['romance', 'comedy']
+++++++
['animation', 'adventure', 'comedy', 'family', 'fantasy']
+++++++
['drama', 'romance']
+++++++
['horror', 'thriller']
+++++++
['drama', 'history']
+++++++
['drama', 'history']
+++++++
['crime', 'filmnoir']
+++++++
['drama', 'war']
+++++++
['adventure', 'action']
+++++++
['documentary', 'crime']
+++++++
['horror', 'sci-fi']
+++++++
['crime', 'thriller']
+++++++
['comedy', 'romance']
+++++++
['documentary', 'biography']
+++++++
['comedy', 'drama']
+++++++
['crime', 'thriller']
+++++++
['drama', 'romance']
+++++++
['crime', 'thriller']
+++++++
['horror', 'romance']
+++++++
['documentary', 'biography']
+++++++
['crime', 'mystery']
+++++++
['comedy', 'fantasy']
+++++++
['adventure', 'animation', 

In [182]:
print(f1_score(mlb.transform(df_fewshot_2['y_true']), mlb.transform(df_fewshot_2['pred_genres']),average='weighted'))
print(f1_score(mlb.transform(df_fewshot_2['y_true']), mlb.transform(df_fewshot_2['pred_genres']),average='micro'))
print(f1_score(mlb.transform(df_fewshot_2['y_true']), mlb.transform(df_fewshot_2['pred_genres']),average='macro'))
print(f1_score(mlb.transform(df_fewshot_2['y_true']), mlb.transform(df_fewshot_2['pred_genres']),average=None))

0.6933327120274023
0.6912442396313364
0.47486040668732976
[0.5        0.75       0.8        0.         0.88       0.88
 0.92307692 0.5625     0.8        0.66666667 0.         0.
 0.93333333 0.         0.         0.85714286 0.         0.
 0.33333333 0.88888889 0.         0.         0.         0.57142857
 1.         1.        ]


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


In [191]:
%%markdown
# zero-shot Llama
## size 50

# zero-shot Llama
## size 50


In [None]:
%%time
df_zeroshot_50 = llama_model.run_test(size=50, is_few_shot=False)

In [None]:
print(f1_score(mlb.transform(df_zeroshot_50['y_true']), mlb.transform(df_zeroshot_50['pred_genres']),average='weighted'))
print(f1_score(mlb.transform(df_zeroshot_50['y_true']), mlb.transform(df_zeroshot_50['pred_genres']),average='micro'))
print(f1_score(mlb.transform(df_zeroshot_50['y_true']), mlb.transform(df_zeroshot_50['pred_genres']),average='macro'))
print(f1_score(mlb.transform(df_zeroshot_50['y_true']), mlb.transform(df_zeroshot_50['pred_genres']),average=None))