## Why we need pretraining?

In [3]:
import warnings 
warnings.filterwarnings("ignore")

In [4]:
import torch

def fix_torch_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

fix_torch_seed(42)

### Loading a general pretrained model TinySolar-248m-4k

In [5]:
model_path = "models/TinySolar-248m-4k"

In [6]:
from transformers import AutoModelForCausalLM, AutoTokenizer
tiny_general_model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map = 'cpu',
    dtype = torch.bfloat16
)

W0925 14:15:00.370000 199412 Lib\site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.


In [7]:
tiny_general_tokenizer = AutoTokenizer.from_pretrained(
    model_path
)

### Generating text with the general pretrained model TinySolar-248m-4k


In [8]:
prompt = "Today is a beautiful day, and"

In [9]:
inputs = tiny_general_tokenizer(prompt, return_tensors = 'pt')

In [10]:
from transformers import TextStreamer
streamer  = TextStreamer(
    tiny_general_tokenizer,
    skip_prompt = True,
    skip_special_tokens = True
)

In [15]:
outputs = tiny_general_model.generate(
    **inputs,
    streamer = streamer,
    use_cache = True,
    max_new_tokens = 128,
    do_sample = False,
    temperature = 0.0,
    repetition_penalty = 1.1    
)

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Today is a beautiful day, and the weather is perfect for a relaxing weekend.
The weather in this area is quite mild, but it's not too hot or cold. The temperature is around 10 degrees Celsius (25-30 degrees Fahrenheit).
The climate in this region is very dry, so you can expect to see rainfall during the week. However, if you are planning on visiting the area during the week, be sure to check out the weather forecast for the rest of the week.
This is one of the most popular tourist destinations in the world. It is located in the northwest of France, and


### Try to generate python samples from the above model

In [17]:
prompt = "def find_max(numbers):"

In [18]:
inputs = tiny_general_tokenizer(prompt, return_tensors = 'pt').to(tiny_general_model.device)

streamer = TextStreamer(
    tiny_general_tokenizer,
    skip_prompt=True,
    skip_special_tokens=True
)

In [30]:
outputs = tiny_general_model.generate(
    **inputs,
    streamer=streamer,
    use_cache=True,
    max_new_tokens=128,
    do_sample=False,
    temperature = 0.0,
    repetition_penalty = 1.1
)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



       """
       Returns the number of times a user has been added to the list.
       """
       return num_users() + 1

   def get_user_id(self, id):
       """
       Returns the number of users that have been added to the list.
       """
       return self._get_user_id(id)

   def get_user_name(self, name):
       """
       Returns the name of the user that has been added to the list.
       """
       return self._get_user_name(name


### Try to generate python code using TinySolar- code-instruct

In [None]:
model_path = "models/TinySolar-248m-4k-code-instruct"

In [36]:
tiny_finetuned_model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map = 'cpu',
    dtype = torch.bfloat16
)

tiny_finetuned_tokenizer = AutoTokenizer.from_pretrained(model_path)

In [37]:
prompt = "def find_max(numbers):"

inputs = tiny_finetuned_tokenizer(prompt, return_tensors='pt').to(tiny_finetuned_model.device)

streamer = TextStreamer(
    tiny_finetuned_tokenizer,
    skip_prompt= True,
    skip_special_tokens=True
)

outputs = tiny_finetuned_model.generate(
    **inputs,
    streamer=streamer,
    use_cache=True,
    max_new_tokens=128,
    do_sample = False,
    temperature = 0.0,
    repetition_penalty = 1.1
)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



   if len(numbers) == 0:
       return "Invalid input"
   
   max_sum = float('-inf')
   max_count = 0
   
   for i in range(len(numbers)):
       if numbers[i] > max_sum and numbers[i] < max_count:
           max_sum = numbers[i]
           max_count += 1
   
   return max_sum, max_count
```

The `find_max()` function takes a list of numbers as input. It first


### Try to generate python code using TinySolar-py

In [13]:
model_path_or_name = "models/TinySolar-248m-4k-py" 

In [15]:
tiny_custom_model = AutoModelForCausalLM.from_pretrained(
    model_path_or_name,
    device_map="cpu",
    dtype=torch.bfloat16,    
)

tiny_custom_tokenizer = AutoTokenizer.from_pretrained(
    model_path_or_name
)

In [17]:
prompt = "def find_max(numbers):"

inputs = tiny_custom_tokenizer(
    prompt, return_tensors="pt"
).to(tiny_custom_model.device)

streamer = TextStreamer(
    tiny_custom_tokenizer,
    skip_prompt=True, 
    skip_special_tokens=True
)

outputs = tiny_custom_model.generate(
    **inputs, streamer=streamer,
    use_cache=True, 
    max_new_tokens=64, 
    do_sample=False, 
    repetition_penalty=1.1
)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



   """Find the maximum number of numbers in a list."""
   max = 0
   for num in numbers:
       if num > max:
           max = num
   return max


def get_min_max(numbers):
   """Get the minimum number of numbers


In [18]:
def find_max(numbers):
    max = 0
    for num in numbers:
        if num > max:
            max = num
    return max

In [19]:
A = [34,5,7,5,3,1,5,8,43]

In [20]:
find_max(A)

43