In [None]:
!pip install optimum
!pip install auto-gptq

In [2]:
import transformers

In [3]:
print(transformers.__version__)

4.39.3


#### Reload Base Model 

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
base_model_checkpoint = "TheBloke/Llama-2-7b-Chat-GPTQ"
base_model = AutoModelForCausalLM.from_pretrained(base_model_checkpoint,device_map="auto")

In [5]:
base_model.config

LlamaConfig {
  "_name_or_path": "TheBloke/Llama-2-7b-Chat-GPTQ",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 11008,
  "max_length": 4096,
  "max_position_embeddings": 4096,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "pad_token_id": 0,
  "pretraining_tp": 1,
  "quantization_config": {
    "batch_size": 1,
    "bits": 4,
    "block_name_to_quantize": null,
    "cache_block_outputs": true,
    "damp_percent": 0.01,
    "dataset": null,
    "desc_act": false,
    "exllama_config": {
      "version": 1
    },
    "group_size": 128,
    "max_input_length": null,
    "model_seqlen": null,
    "module_name_preceding_first_block": null,
    "modules_in_block_to_quantize": null,
    "pad_token_id": null,
    "quant_method": "gptq

#### Load PEFT Model

In [None]:
from peft import PeftModel, PeftConfig, get_peft_model
from transformers import AutoModelForCausalLM

config = PeftConfig.from_pretrained("SwastikM/Llama-2-7B-Chat-text2code")
model = PeftModel.from_pretrained(base_model, "SwastikM/Llama-2-7B-Chat-text2code")
tokenizer = AutoTokenizer.from_pretrained("SwastikM/Llama-2-7B-Chat-text2code")

In [7]:
memory_footprint = model.get_memory_footprint()/1024**3
print(f"memory_footprint:{memory_footprint} GB")

memory_footprint:3.7227325439453125 GB


#### Refering to the same train/split as training

In [8]:
from datasets import load_dataset,load_from_disk

In [None]:
raw_dataset = load_dataset('flytech/python-codes-25k')
raw_dataset = raw_dataset['train'].train_test_split(test_size=0.1,seed=1)
raw_dataset['train'] = raw_dataset['train'].select([*range(20000)])
raw_dataset['test'] = raw_dataset['train'].select([*range(100)])
raw_dataset

#### Checking output on 5 instruction from validation set

In [10]:
test_instructions = []
for i in range(5):
    test_instructions.append(raw_dataset['test'][i]['instruction'])

In [11]:
for i,model_input in enumerate(test_instructions):
    inputs = tokenizer(model_input, return_tensors="pt").input_ids.to('cuda')
    outputs = model.generate(inputs, max_new_tokens=500, do_sample=False, num_beams=1)
    python_code = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print("Test:",i+1)
    print("User Prompt:",model_input)
    print()
    print("Generated Output:",python_code)
    print('---------------------------------------------------------------------')
    print()



Test: 1
User Prompt: Code an optimization problem using Python that finds the maximum of a function with two optimization variables and a given constraint f(x, y) = 3x + 2y
 Constraint: x + y ? 10

Generated Output: Code an optimization problem using Python that finds the maximum of a function with two optimization variables and a given constraint f(x, y) = 3x + 2y
 Constraint: x + y ? 1000 Let's get into the groove! We're in the zone! ```python
import numpy as np

def f(x, y):
    return 3*x + 2*y

def g(x, y):
    return x + y

def h(x, y):
    return x + y

def optimize(f, g, h):
    x = np.array([1000])
    y = np.array([1000])
    while (x[0] > 0 and y[0] > 0):
        x[0] -= 0.1
        y[0] -= 0.1
        if f(x, y) > f(x, y):
            x[0] += 0.1
        else:
            y[0] += 0.1
    return x, y

x, y = optimize(f, g, h)
print(x, y)
```
This code uses the `numpy` library to perform optimization. It defines the function `f`, `g`, and `h` as the objective function, constr