In [1]:
!pip install "transformers" "accelerate"


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
import os
import torch
from unittest.mock import patch

from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.dynamic_module_utils import get_imports


def fixed_get_imports(filename: str | os.PathLike) -> list[str]:
    """Work around for https://huggingface.co/microsoft/phi-1_5/discussions/72."""
    imports = get_imports(filename)
    if not torch.cuda.is_available() and "flash_attn" in imports:
        imports.remove("flash_attn")
    return imports

with patch("transformers.dynamic_module_utils.get_imports", fixed_get_imports):
    model = AutoModelForCausalLM.from_pretrained(
        "microsoft/Phi-3-mini-4k-instruct",
        device_map="auto",
        torch_dtype="auto",
        attn_implementation="eager",
        trust_remote_code=True,
    )
    tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")

`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attention` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.


model.safetensors.index.json:   0%|          | 0.00/16.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

Some parameters are on the meta device because they were offloaded to the disk.


tokenizer_config.json:   0%|          | 0.00/3.44k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.94M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/306 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/599 [00:00<?, ?B/s]

In [5]:
from transformers import pipeline

generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=False,
    max_new_tokens=500,
    do_sample=False
)

Device set to use mps


In [7]:
messages = [
    {"role": "user", "content": "Create a funny joke about Squid Games."}
]

output = generator(messages)
print(output[0]["generated_text"])

 Why don't squids play hide and seek?

Because good luck hiding when you're the star of Squid Games!


In [8]:
prompt = "Write an email apologizing to Mathias for eating his lunch. Explain how it happened.<|assistant|>"

input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("mps")

generation_output = model.generate(
    input_ids=input_ids,
    max_new_tokens=20
)
print(tokenizer.decode(generation_output[0]))

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Write an email apologizing to Mathias for eating his lunch. Explain how it happened.<|assistant|> Subject: Sincere Apologies for Eating Your Lunch

Dear Mathias


In [9]:
for id in input_ids[0]:
    print(tokenizer.decode(id))

Write
an
email
apolog
izing
to
Math
ias
for
e
ating
his
l
unch
.
Exp
lain
how
it
happened
.
<|assistant|>


In [12]:
tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-base')

tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/474 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

In [13]:
from transformers import AutoModel
model = AutoModel.from_pretrained('microsoft/deberta-v3-xsmall')
tokens = tokenizer('The answer to everything is 42', return_tensors='pt')
output = model(**tokens)[0]

config.json:   0%|          | 0.00/578 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/241M [00:00<?, ?B/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [14]:
output

tensor([[[-3.3034e+00, -1.3611e-01,  7.7370e-03,  ..., -8.9477e-02,
          -4.0026e-01,  2.0351e-01],
         [ 1.4900e-01,  1.9346e-01,  1.3302e-01,  ..., -5.9063e-02,
          -1.5275e-04, -9.9055e-01],
         [-3.1421e-01,  3.9509e-01,  1.2259e-01,  ..., -3.9398e-02,
          -5.1547e-01, -7.0252e-01],
         ...,
         [-1.2463e+00,  3.4301e-01,  1.5588e-01,  ...,  3.2109e-01,
          -1.8537e-02, -5.5037e-03],
         [-3.4291e-01, -8.9625e-02, -2.6710e-01,  ..., -6.7299e-01,
          -4.4655e-01, -4.8536e-01],
         [-3.0789e+00,  2.6972e-01,  4.3562e-02,  ..., -2.6193e-01,
          -5.3166e-01, -3.1264e-01]]], grad_fn=<NativeLayerNormBackward0>)

In [15]:
output.shape

torch.Size([1, 8, 384])

In [16]:
for token in tokens['input_ids'][0]:
    print(tokenizer.decode(token))

[CLS]
The
 answer
 to
 everything
 is
 42
[SEP]


In [17]:
!pip install "sentence_transformers"

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting sentence_transformers
  Using cached sentence_transformers-3.3.1-py3-none-any.whl.metadata (10 kB)
Collecting Pillow (from sentence_transformers)
  Downloading pillow-11.1.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (9.1 kB)
Downloading sentence_transformers-3.3.1-py3-none-any.whl (268 kB)
Downloading pillow-11.1.0-cp311-cp311-macosx_11_0_arm64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m26.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Pillow, sentence_transformers
Successfully installed Pillow-11.1.0 sentence_transformers-3.3.1

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [18]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
vector = model.encode("You can run fast!")

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [19]:
vector

array([-3.75788175e-02,  4.18258496e-02, -3.12561952e-02,  5.04651479e-02,
        7.14919418e-02,  3.05077676e-02, -2.70160791e-02,  4.89523783e-02,
       -1.02532497e-02, -1.01752169e-02, -3.73260374e-03,  6.42043306e-03,
       -3.22167464e-02,  5.52268475e-02,  3.02441306e-02, -6.22370541e-02,
        1.22366389e-02, -5.97827248e-02,  6.02886602e-02,  4.04265244e-03,
        5.83045706e-02, -1.32668959e-02,  1.25319771e-02, -9.10135079e-03,
       -2.42285784e-02, -3.40190642e-02,  3.43343839e-02,  3.66259762e-03,
       -5.92255825e-03, -4.56817038e-02, -4.17692214e-02, -2.32958477e-02,
        2.23593581e-02,  3.04565728e-02,  2.00352883e-06, -9.76666436e-03,
        3.65819335e-02,  2.36225948e-02, -5.48911579e-02,  3.21304165e-02,
        3.08987368e-02, -3.30057251e-03, -5.75894900e-02,  4.97739809e-03,
       -2.50325967e-02,  3.94218117e-02,  2.20866427e-02, -6.96355626e-02,
       -1.84363667e-02,  6.47622943e-02, -3.19238529e-02, -3.34458717e-04,
       -9.44131315e-02, -

In [20]:
vector.shape

(768,)

In [24]:
!pip install "gensim" "numpy"

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [26]:
import gensim.downloader as api
model = api.load("glove-wiki-gigaword-50")
model.most_similar([model['king']], topn=11)

ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

In [30]:
!pip install "pandas"

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [34]:
import pandas as pd
from urllib import request

data = request.urlopen('https://storage.googleapis.com/maps-premium/dataset/yes_complete/train.txt')
lines = data.read().decode("utf-8").split("\n")[2:]

playlists = [s.rstrip().split() for s in lines if len(s.split()) > 1]
songs_file = request.urlopen('https://storage.googleapis.com/maps-premium/dataset/yes_complete/song_hash.txt')
songs_file = songs_file.read().decode("utf-8").split('\n')
songs = [s.rstrip().split('\t') for s in songs_file]
songs_df = pd.DataFrame(data=songs, columns=['id', 'title', 'artist'])
songs_df = songs_df.set_index('id')

In [35]:
print("Playlist #1:\n", playlists[0], '\n')
print("Playlist #2:\n", playlists[1])

Playlist #1:
 ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '2', '42', '43', '44', '45', '46', '47', '48', '20', '49', '8', '50', '51', '52', '53', '54', '55', '56', '57', '25', '58', '59', '60', '61', '62', '3', '63', '64', '65', '66', '46', '47', '67', '2', '48', '68', '69', '70', '57', '50', '71', '72', '53', '73', '25', '74', '59', '20', '46', '75', '76', '77', '59', '20', '43'] 

Playlist #2:
 ['78', '79', '80', '3', '62', '81', '14', '82', '48', '83', '84', '17', '85', '86', '87', '88', '74', '89', '90', '91', '4', '73', '62', '92', '17', '53', '59', '93', '94', '51', '50', '27', '95', '48', '96', '97', '98', '99', '100', '57', '101', '102', '25', '103', '3', '104', '105', '106', '107', '47', '108', '109', '110', '111', '112', '113', '25', '63', '62', '114', '115', '84', '116', '117', '