In [7]:
import transformers
from peft import PeftModel, PeftConfig
import torch
from torch import cuda, bfloat16
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig, AutoModel
from transformers import BitsAndBytesConfig
from transformers.generation.utils import GenerationConfig
import torch.nn as nn

base_model_id = 'meta-llama/Llama-2-7b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)


hf_auth = "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"
model_config = transformers.AutoConfig.from_pretrained(
    base_model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    base_model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)

config = PeftConfig.from_pretrained("Ashishkr/llama2-call-summarization")
model = PeftModel.from_pretrained(model, "Ashishkr/llama2-call-summarization").to(device)

model.eval()
print(f"Model loaded on {device}")

tokenizer = transformers.AutoTokenizer.from_pretrained(
    base_model_id,
    use_auth_token=hf_auth
)


OSError: ignored

In [1]:
!pip install bitsandbytes


Collecting bitsandbytes
  Downloading bitsandbytes-0.41.1-py3-none-any.whl (92.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.41.1


In [3]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.33.2-py3-none-any.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m55.4 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.15.1 (from transformers)
  Downloading huggingface_hub-0.17.3-py3-none-any.whl (295 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.0/295.0 kB[0m [31m31.2 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m81.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m73.8 MB/s[0m eta [36m0:00:0

In [4]:
!pip install peft

Collecting peft
  Downloading peft-0.5.0-py3-none-any.whl (85 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/85.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.6/85.6 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
Collecting accelerate (from peft)
  Downloading accelerate-0.23.0-py3-none-any.whl (258 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/258.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m258.1/258.1 kB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: accelerate, peft
Successfully installed accelerate-0.23.0 peft-0.5.0


In [None]:


def llama_generate(
    model: AutoModelForCausalLM,
    tokenizer: AutoTokenizer,
    prompt: str,
    max_new_tokens: int = 128,
    temperature: float = 0.92):

    inputs = tokenizer(
        [prompt],
        return_tensors="pt",
        return_token_type_ids=False,
    ).to(
        device
    )

    # Check if bfloat16 is supported, otherwise use float16
    dtype_to_use = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16

    with torch.autocast("cuda", dtype=dtype_to_use):
        response = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            return_dict_in_generate=True,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id,
        )

    decoded_output = tokenizer.decode(
        response["sequences"][0],
        skip_special_tokens=True,
    )

    return decoded_output[len(prompt) :]

prompt = """
 instruction: "summarize this conversation :" \n

input: "Oli: I've talked to some people from the third year
Jacob: About the statistics exam?
Marcia: What did they say?
Oli: Yeah, about the exam Oli: We need to prepare for a battle
Jacob: So it will be difficult
Oli: They said it was the hardest exam ever
Marcia: 😱
Oli: The questions were displayed on the screen
Oli: One minute per question and it disappears
Oli: They won't come back so if you didn't get your answer you're fucked
Marcia: So we need to make the calculations really fast
Jacob: That's insane
Oli: I know
Oli: Very stressful
Marcia: How are we even supposed to study for it?
Marcia: With a timer?
Oli: I guess
Marcia: Did anybody pass it last year
Oli: Some people did, but the majority had to take the second or even the third chance"\n

response:  """
response = llama_generate(
    model,
    tokenizer,
    prompt,
    max_new_tokens=100,
    temperature=0.9,
).split("<eos>")[0].strip()

print(response)



In [None]:
Number of jobs since i have passed out Aegis
First placed in DYPATIL as Data analyst / scientist -left that f..king place
First placed in small working startup in pune for a week or approx 4 to 5 days
Came to bangalore via job i got on my own merit ... worked for 5 to 6 days and left to delhi
Worked as marketing data analyst in paisabazaar - intern earned 15 k for 3 months dec to march 2019-20
After small break of 3 and half months joined uniphore as Business analyst in kormangala bangalore worked there for 11 months and came back to pune
I was not getting a job after landing in pune for 7 to 8 months
and then started teaching in between to BITS(basic python-single student). Associated with couple of online teaching institutes
Then worked with Thinkbridge for 2 months roughly just because i wasnt getting a job and role didnt match my skills.
Landed another job via research fox consultancy with Gartner as client . worked for roughly 5 months .
And then my journey continued ... Joined cignex datamatics and worked for ALight as client for 1 year exact.
Worked with Tekgile for 3 months and Infovision 3 months
Total corporate exp = 30 months
Total of 2.5 years and saying 4 is different .
If u land package anywhere near what u r earning currently stop , reevaluate and settle down around 20LPA.





