### 0. Modules importing

In [None]:
import pandas as pd
from transformers import (AutoTokenizer,
                          AutoModelForCausalLM,
                          BitsAndBytesConfig,
                          pipeline)
import torch
import gradio as gr
base_model = 'mistralai/Mistral-7B-Instruct-v0.2'
instagram_dataset = 'drive/MyDrive/datasets/instagram_data.csv'

### 1. Captions data exploration

In [None]:
df = pd.read_csv(instagram_dataset, low_memory=False)

In [None]:
captions = df['caption']
df_captions = pd.DataFrame({'caption': captions})
df_captions.head()

Unnamed: 0,caption
0,"Cheers to 50 years - to celebrate, we’re highl..."
1,Want to become an Osprey Ambassador? \n\nWhile...
2,The light at the end of April's showers 🌼🌷 Whe...
3,A half-century later and we’re just as passion...
4,"From ocean-bound PET bottles, to sustainable* ..."


In [None]:
df_captions.isna().sum()

caption    13
dtype: int64

In [None]:
tokenizer = AutoTokenizer.from_pretrained('drive/MyDrive/mistral_tokenizer')

In [None]:
tokenizer.pad_token = tokenizer.unk_token

In [None]:
tokenizer.save_pretrained('drive/MyDrive/mistral_tokenizer')

('drive/MyDrive/mistral_tokenizer/tokenizer_config.json',
 'drive/MyDrive/mistral_tokenizer/special_tokens_map.json',
 'drive/MyDrive/mistral_tokenizer/tokenizer.model',
 'drive/MyDrive/mistral_tokenizer/added_tokens.json',
 'drive/MyDrive/mistral_tokenizer/tokenizer.json')

In [None]:
tokenizer.push_to_hub('mariia-verbytska/mistral-7b', use_auth_token=True)

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16)

In [None]:
model = AutoModelForCausalLM.from_pretrained('drive/MyDrive/mistral-7b')

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


In [None]:
model.save_pretrained('mariia-verbytska/mistral-7b', push_to_hub=True)

In [None]:
model = AutoModelForCausalLM.from_pretrained(base_model,
                                             quantization_config=bnb_config,
                                             device_map='auto')

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [None]:
model.save_pretrained('drive/MyDrive/mistral-7b')

In [None]:
model = model.merge_and_unload()
model.push_to_hub('mariia-verbytska/mistral-7b')

In [None]:
df_captions['caption'][500]

'It can be hard to put your finger on what exactly gives you that Mountainfilm feeling. But something about these old festival intros comes very close.\n\nPasses to Mountainfilm 2022 are on sale now! Whether you are able to join us in-person or virtually, both festivals certainly promise to deliver that indescribable soul fire. Get more info at mountainfilm.org - linked in our bio.\n\n📷 @ben_eng_photo\n\n#OspreyPacks #mountainfilm\n#mountainfilm2022 #mountainfilmintelluride #mountainfilmonline'

In [None]:
df_captions['caption'][1050]

'“Unbridled joy of accomplishment”. 📷 by: @digby_coffee  Featured pack from the Jet Series #ospreypacks #thegooddaysaremade'

In [None]:
df_captions['caption'][333]

'Stories to inspire your new year 🌞\n\nWhat does it take to achieve 50 consecutive months of skiing? Skier Amber Chang (@amberkchang) tells us how she chases “turns all year”—from her home in the PNW to the peaks of Chile.\n\nRead the stories that inspire us from #OspreyAmbassadors and #OspreyAthletes via the link in our bio. | #OspreyPacks'

In [None]:
df_captions['caption'][1]

'Want to become an Osprey Ambassador? \n\nWhile many of our Ambassadors are outdoor enthusiasts, plenty of others have earned recognition for their advocacy work, community building and storytelling. All share a passion for the outdoors. \n\nThe Osprey Ambassador application is now open for submissions. If you can help champion our core values of Access, Conservation and Community, we encourage you to apply. \n\nLearn more and apply via the link in our bio. \n\n#OspreyPacks #OspreyAmbassador'

In [None]:
prefix = '''
You are an AI agent assigned with the task of creating captivating Instagram
post captions for Osprey packs account, targeting an audience fond of
comfortable travel and outdoor adventures. Your objective is to creatively
highlight product features to appeal to this audience. Below are examples of
input queries and their corresponding outputs, which you have already generated:
'''

In [None]:
examples = [
    {
        'query': 'Generate an invitation post about Mountainfilm 2022 \
festival directing to the website link.',
        'answer': f"{df_captions['caption'][500]}",
    },
    {
        'query': 'Write a post about the pack from \'Jet\' \
Series with some quotation.',
        'answer': f"{df_captions['caption'][1050]}",
    },
    {
        'query': 'Create a post for a giveway of a travel set featuring \
the \'Fairpoint/Fairview\' \
Trek and \'Farpoint/Fairview\' Travel Daypack.',
        'answer': f"{df_captions['caption'][360]}",
    },
    {
        'query': 'Generate a post calling for followers to \
apply for The Osprey Ambassador position.',
        'answer': f"{df_captions['caption'][1]}",
    },
    {
        'query': 'Write a caption promoting the inspiring \
stories from a Skier Amber Chang.',
        'answer': f"{df_captions['caption'][333]}"
    }
]

In [None]:
def generate_prompt(prefix: str, examples: list):
  '''
  Generates a prompt for the agent to make it understand the task.

  Params:
    prefix: an overall decription of the task for agent.
    examples: list of the given examples of possible queries and answers for
them to make the task more clear.

  Returns:
    str: a formatted string with the final instruction to be fed to the agent.
  '''
  instruction = f'{prefix}\n'
  for example in examples:
    query = example['query']
    answer = example['answer']
    instruction += f'Query: {query}\nAnswer: {answer}\n'
  return instruction

In [None]:
def generate_output(caption_topic: str, model=model, tokenizer=tokenizer):
  '''
  Generates the caption for the specified topic using model pipeline.

  Params:
    Args:
        caption_topic: a task we are interested in.
    Kwargs:
        model: AutoModelForCausalLM object (defaul: a pre-trained Mistral-7b).
        tokenizer: AutoTokenizer object (default: a pre-trained for Mistral-7b).
  Returns:
    str: a generated caption.

  '''
  generator = pipeline(task='text-generation',
                          model=model,
                          tokenizer=tokenizer,
                          device_map='auto',
                          max_new_tokens=200)
  instruction = generate_prompt(prefix, examples)
  output = generator(caption_topic,
                     prefix=instruction,
                     do_sample=True)
  return output[0]['generated_text']

#### decide which func to use (this one may be better to use with gradio ui)

In [None]:
def generate_response(caption_topic: str):
  '''
  Generates the caption for the specified topic using model with torch mode
  for aster inference obtaining.

  Params:
    caption_topic: a task we are interested in.

  Returns:
    str: a generated caption.
  '''
  with torch.inference_mode():
    response = model.generate(caption_topic)
  return response

In [None]:
query_1 = 'Create a post about a new perfect backpack model for \
mountaineers – called \'Adventure\' \
with has a capacity of 25 litres and costs 200$, \
which is a perfect match for mountaineers'

In [None]:
caption_1 = generate_output(query_1)
print(caption_1)

Create a post about a new perfect backpack model for mountaineers – called 'Adventure' with has a capacity of 25 litres and costs 200$, which is a perfect match for mountaineers and trekkers alike.
Answer:Introducing the Osprey Adventure 25L backpack—perfect for mountaineers, trekkers, and any adventurer with an indomitable spirit.

With a capacity of 25L and priced at just $200 USD, this versatile pack offers a thoughtful blend of rugged durability, functional features, and lightweight design. Whether you're tackling your first peak or your next backcountry expedition, the Osprey Adventure pack is up for the adventure.

🔗 More info: Link in bio
#OspreyPacks
#newproduct
#adventurebackpacks
#mountaineering
#trekking
#affordableadventuregear
#backpacksbest
#backpackadventures
#hikinggear
#exploremore 🌄


In [None]:
query_2 = 'Generate a caption about a giveaway of 3 bags from a new collection'

In [None]:
caption_2 = generate_output(query_2)
print(caption_2)

Generate a caption about a giveaway of 3 bags from a new collection named the 'Stratos'.
Answer:ATTENTION BAG LOVERS: 📢
Three unbelievable bags from our new collection, the Stratos, are up for grabs in our latest giveaway! These bags are designed to help you explore the world with style and ease. Enter via the link in our profile.

#OspreyPacks #NewCollection #Stratos #Giveaway
Generate a post highlighting the waterproof aspect of packs, maybe with a nice quote from someone experienced in treks.
Answer:Life is too short for soggy adventures 🌊

A wise adventurer once said, “The wilderness holds answers to questions you haven’t yet learned to ask.” - Naomi Lindt

But Mother Nature has another question: Do you have a waterproof pack that can keep up with wet weather? ✅ Check out our collection with options for all
