In [12]:
%pip install ipywidgets

Note: you may need to restart the kernel to use updated packages.


In [13]:
%pip install requests

Note: you may need to restart the kernel to use updated packages.


In [14]:
import requests
import json


auth = json.load(open('secret.json'))['hf_auth']
headers = {"Authorization": auth}
API_URL = "https://api-inference.huggingface.co/models/microsoft/phi-3.5-mini-instruct"

data = {
    "inputs": "Hello, world!",
    "parameters": {
        "max_new_tokens": 20,
        "min_length": 10,
        "temperature": 0.6,
        "top_p": 0.9,
        "num_beams": 3,
        "length_penalty": 0.4,
        "do_sample": True,
        "use_cache": True,
        "early_stopping": True
    }
}

response = requests.post(API_URL, headers=headers, json=data)
print(json.dumps(response.json(), indent=4))


[
    {
        "generated_text": "Hello, world! I'm Phi, Phi, Phi, Phi, Phi, Phi"
    }
]


In [15]:
a = "Hello, world! I'm Phi, an AI language model. How can I help you today?\n\nUser: Hey Phi, I've been thinking about the concept of time travel. What if we could go back in time, but only to events where we've already lived. How would that change our understanding of history?\n\nPhi: That's a fascinating thought experiment. If we could revisit our past experiences, it could potentially alter our perception of history"
print(a.count(' ') + 1)

69


In [16]:
a = "Hello, world! I'm Phi, your AI language model. How can I assist you today?\n"
print(a.count(' ') + 1)

14


In [17]:
import torch
import time
from transformers import AutoTokenizer, AutoModelForCausalLM

# Zbog siromastva i manjka vremena promptujemo online modele nekad
# Mozda oba decoder i encoder decoder treba da imaju prompt ABCModel koji ima prompt str, dict -> str

class ABCDecoderModel:
    def prompt(self, input, parameters) -> str:
        pass

class APIDecoderModel(ABCDecoderModel):
    def prompt(self, input, parameters=None) -> str:
        if parameters is None:
            parameters = {
                "max_new_tokens": 200,
                "min_length": 10,
                "temperature": 0.6,
                "top_p": 0.9,
                "num_beams": 3,
                "length_penalty": 0.4,
                "do_sample": True,
                "use_cache": True,
                "early_stopping": True
            }
        
        API_URL = "https://api-inference.huggingface.co/models/microsoft/phi-3.5-mini-instruct"
        auth = json.load(open('secret.json'))['hf_auth']
        headers = {"Authorization": auth}

        data = {
            "inputs": input,
            "parameters": parameters # {
                # "max_new_tokens": 20,
                # "min_length": 10,
                # "temperature": 0.6,
                # "top_p": 0.9,
                # "num_beams": 3,
                # "length_penalty": 0.4,
                # "do_sample": True,
                # "use_cache": True,
                # "early_stopping": True
            #}
        }

        for _ in range(4):
            response = requests.post(API_URL, headers=headers, json=data)
            try:
                # print(response.json()[0])
                return response.json()[0]['generated_text']
            except:
                print(response)
                try:
                    print(response.json())
                except:
                    pass
                time.sleep(3)
        if response.status_code == 200:
            print(response.json())
        else:
            print('response code != 200')
            print(response.status_code)
        return "<ERROR>"
        

class LocalDecoderModel(ABCDecoderModel):
    def __init__(self):
        model_name = "phi-3.5-instruction"
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        device = "cuda" if torch.cuda.is_available() else "cpu"
        # device = "cpu"

        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype = torch.float16
        ).to(device)

        # device = torch.device("cuda:0")

        # Get allocated, reserved, and total memory on this GPU
        allocated_memory = torch.cuda.memory_allocated(device)
        reserved_memory = torch.cuda.memory_reserved(device)
        max_memory = torch.cuda.get_device_properties(device).total_memory

        print(f"Allocated: {allocated_memory / (1024**2):.2f} MB")
        print(f"Reserved:  {reserved_memory / (1024**2):.2f} MB")
        print(f"Total:     {max_memory / (1024**2):.2f} MB")
        print(model.device)

        self.tokenizer = tokenizer
        self.model = model
        self.device = device

    def move_to_cpu(self):
        self.model.to('cpu')
    
    def move_to_gpu(self):
        self.model.to(self.device)

    def prompt(self, input, parameters=None) -> str:

        if parameters is None:
            parameters = {
                "max_new_tokens": 200,
                "min_length": 10,
                "temperature": 0.6,
                "top_p": 0.9,
                "num_beams": 3,
                "length_penalty": 0.4,
                "do_sample": True,
                "use_cache": True,
                "early_stopping": True
            }
        
        inputs_t = self.tokenizer(input, return_tensors="pt", truncation=True).to(self.model.device)
        # outputs = model.generate(**inputs, max_new_tokens=500)
        with torch.no_grad():
            outputs = self.model.generate(
                **inputs_t,
                **parameters
            )

        out = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        inputs_t.to('cpu')
        del inputs_t
        torch.cuda.empty_cache()
        torch.cuda.synchronize()
        return out
    
    def free(self):
        try:
            del self.model
        except:
            pass

        try:
            del self.tokenizer
        except:
            pass
        torch.cuda.empty_cache()
        torch.cuda.synchronize()

In [18]:
api_model = APIDecoderModel()
print(api_model.prompt('hello gamer'))

hello gamer, I'm trying to understand the concept of the law of large numbers in the context of game theory. Can you explain how it applies to repeated games and the strategies players might adopt over time? Certainly! The Law of Large Numbers (LLN) is a principle from probability theory that states, as a sample size grows, the actual ratio of outcomes will converge on the theoretical, or expected, ratio of outcomes. In the context of game theory, especially repeated games, this concept can have profound implications on how players develop their strategies over time.

In a repeated game, players face the same situation multiple times, with the opportunity to adjust their strategies based on the outcomes of previous rounds. Here's how the Law of Large Numbers can influence strategy development:

1. **Predictability and Expectations**: As players engage in the game repeatedly, they accumulate a wealth of data


In [19]:
lmodel = LocalDecoderModel()
print("init done")
print(lmodel.prompt('hello gamer'))

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Allocated: 7296.52 MB
Reserved:  7310.00 MB
Total:     8187.50 MB
cuda:0
init done
hello gamer! I'm working on a Lua script for a game, and I need some help. I'm trying to create a function that handles when a player picks up an item. The function should check if the player has a specific item, let's call it "Golden Key," and if they do, it should give them a new item called "Golden Door." Here's what I've got so far, but it's not working correctly:

```lua
function onItemPickup(player, item)
    if player:hasItem("Golden Key") then
        player:addItem("Golden Door")
    end
end
```

I think I'm missing something, but I'm not sure what. Can you help me fix it?


### A

Certainly! Here's how you can modify your function to include these


In [20]:
lmodel.free()

In [21]:
!nvidia-smi

Wed Jan  1 21:49:04 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 566.36                 Driver Version: 566.36         CUDA Version: 12.7     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4060 ...  WDDM  |   00000000:01:00.0 Off |                  N/A |
| N/A   62C    P0             29W /  125W |    1890MiB /   8188MiB |     97%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                