In [2]:
import fitz
from tqdm.auto import tqdm

def text_formatter(text):
    clean_text = text.replace('\n', ' ').replace('\r', ' ').replace('\t', ' ')
    return clean_text

def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    page_and_text = []
    for page_number, page in tqdm(enumerate(doc)):
        text = page.get_text()
        text = text_formatter(text)
        page_and_text.append({"page_number": page_number, 
                              "page_char_count": len(text),
                              "page_word_count": len(text.split()),
                              "page_sentence_count": len(text.split('.')),
                              "page_token_count": len(text) / 4,
                              "text": text})
    return page_and_text
       

page_and_text = extract_text_from_pdf('meta.pdf')
page_and_text[:5]

  from .autonotebook import tqdm as notebook_tqdm
Python(64276) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
19it [00:00, 259.78it/s]


[{'page_number': 0,
  'page_char_count': 2958,
  'page_word_count': 479,
  'page_sentence_count': 31,
  'page_token_count': 739.5,
  'text': "1    Meta Platforms, Inc. (META)  Third Quarter 2024 Results Conference Call  October 30th, 2024    Kenneth Dorell, Director, Investor Relations      Thank you. Good afternoon and welcome to Meta Platforms third quarter 2024 earnings  conference call. Joining me today to discuss our results are Mark Zuckerberg, CEO and Susan Li,  CFO.    Before we get started, I would like to take this opportunity to remind you that our remarks today  will include forward‐looking statements. Actual results may differ materially from those  contemplated by these forward‐looking statements.    Factors that could cause these results to differ materially are set forth in today’s earnings press  release, and in our quarterly report on form 10-Q filed with the SEC. Any forward‐looking  statements that we make on this call are based on assumptions as of today and we und

In [1]:
page_and_text = extract_text_from_pdf('meta.pdf')
page_and_text[:5]

NameError: name 'extract_text_from_pdf' is not defined

In [2]:
import random
random.sample(page_and_text, 5)

[{'page_number': 11,
  'page_char_count': 2853,
  'page_word_count': 497,
  'page_sentence_count': 20,
  'page_token_count': 713.25,
  'text': '12      There are also places where we hope over time that we’ll be able to deploy  these tools against a lot of our content moderation efforts to help make the big  body of content moderation work that we undertake, to help make it more  efficient and effective for us to do so.       And there are lots of other places around the company where I would say we’re  relatively early in exploring the way that we can use LLM based tools to make  different types of work streams more efficient. So all that is to say, it’s  something we’re pretty excited about.      We have lots of teams focused on it. There are sort of small opportunities and  G&A functions to what we hope will be big opportunities in areas like content  moderation and coding productivity over time.      On your second question about headcount, we’re really -- again, we’re still mid- b

In [3]:
import pandas as pd

df = pd.DataFrame(page_and_text)
df.head()

Unnamed: 0,page_number,page_char_count,page_word_count,page_sentence_count,page_token_count,text
0,0,2958,479,31,739.5,"1 Meta Platforms, Inc. (META) Third Quarte..."
1,1,3900,669,29,975.0,2 Meta AI now has more than 500 monthly act...
2,2,2393,380,32,598.25,3 digital worlds so you can feel present wi...
3,3,2831,444,39,707.75,4 impacted in part by the timing of third q...
4,4,3473,533,26,868.25,5 Turning now to the business outlook. Ther...


In [4]:
df.describe().round(2)

Unnamed: 0,page_number,page_char_count,page_word_count,page_sentence_count,page_token_count
count,19.0,19.0,19.0,19.0,19.0
mean,9.0,2818.79,471.26,24.74,704.7
std,5.63,652.97,107.13,6.76,163.24
min,0.0,593.0,97.0,8.0,148.25
25%,4.5,2680.5,463.0,22.0,670.12
50%,9.0,2837.0,478.0,25.0,709.25
75%,13.5,3027.5,505.0,28.5,756.88
max,18.0,3900.0,669.0,39.0,975.0


In [5]:
from spacy.lang.en import English

nlp = English()

nlp.add_pipe('sentencizer')

doc = nlp("Hello it is Tri. Nice to meet you")

print(list(doc.sents))



[Hello it is Tri., Nice to meet you]


In [6]:
for item in tqdm(page_and_text):

    item["sentences"] = list(nlp(item["text"]).sents)
    
    # Make sure all sentences are strings
    item["sentences"] = [str(sentence) for sentence in item["sentences"]]
    
    # Count the sentences 
    item["page_sentence_count_spacy"] = len(item["sentences"])

100%|██████████| 19/19 [00:00<00:00, 271.16it/s]


In [7]:
# Inspect an example
random.sample(page_and_text, k=1)

[{'page_number': 0,
  'page_char_count': 2958,
  'page_word_count': 479,
  'page_sentence_count': 31,
  'page_token_count': 739.5,
  'text': "1    Meta Platforms, Inc. (META)  Third Quarter 2024 Results Conference Call  October 30th, 2024    Kenneth Dorell, Director, Investor Relations      Thank you. Good afternoon and welcome to Meta Platforms third quarter 2024 earnings  conference call. Joining me today to discuss our results are Mark Zuckerberg, CEO and Susan Li,  CFO.    Before we get started, I would like to take this opportunity to remind you that our remarks today  will include forward‐looking statements. Actual results may differ materially from those  contemplated by these forward‐looking statements.    Factors that could cause these results to differ materially are set forth in today’s earnings press  release, and in our quarterly report on form 10-Q filed with the SEC. Any forward‐looking  statements that we make on this call are based on assumptions as of today and we und

In [8]:
df = pd.DataFrame(page_and_text)
df.describe().round(2)

Unnamed: 0,page_number,page_char_count,page_word_count,page_sentence_count,page_token_count,page_sentence_count_spacy
count,19.0,19.0,19.0,19.0,19.0,19.0
mean,9.0,2818.79,471.26,24.74,704.7,24.21
std,5.63,652.97,107.13,6.76,163.24,5.29
min,0.0,593.0,97.0,8.0,148.25,8.0
25%,4.5,2680.5,463.0,22.0,670.12,22.5
50%,9.0,2837.0,478.0,25.0,709.25,25.0
75%,13.5,3027.5,505.0,28.5,756.88,27.0
max,18.0,3900.0,669.0,39.0,975.0,33.0


Mean of sentences are 25 sentences with roughly 704 tokens
Check out which model to use for Token input dimensions

In [9]:
num_senteces_chunk = 8
def split_list(input_list: list, 
               slice_size: int) -> list[list[str]]:
    """
    Splits the input_list into sublists of size slice_size (or as close as possible).

    For example, a list of 17 sentences would be split into two lists of [[10], [7]]
    """
    return [input_list[i:i + slice_size] for i in range(0, len(input_list), slice_size)]

for item in tqdm(page_and_text):
    item["sentence_chunks"] = split_list(input_list=item["sentences"],
                                         slice_size=num_senteces_chunk)
    item["num_chunks"] = len(item["sentence_chunks"])

100%|██████████| 19/19 [00:00<00:00, 334839.39it/s]


In [10]:
sample = random.sample(page_and_text, k=1)
sample[0]["sentence_chunks"]

[['6    recent months to make Meta AI more helpful and engaging.',
  'Last month, we began introducing  Voice, so you can speak with Meta AI more naturally, and it’s now fully available in English to  people in the US, Australia, Canada and New Zealand.',
  'In the US, people can now also upload  photos to Meta AI to learn more about them, write captions for posts, and add, remove, or change  things about their images with a simple text prompt.',
  'These are all built with our first multi-modal  foundation model, Llama 3.2.',
  '   Threads remains another area where we see exciting potential.',
  'We are bringing on an increasing  number of new users each quarter while depth of engagement also continues to grow.',
  'Looking  ahead, we plan to introduce more features to make it even easier for people to stay up to date on  topics they care about.',
  '   Now to the second driver of our revenue performance: increasing monetization efficiency.'],
 ['There  are two parts to this work.',


In [11]:
df = pd.DataFrame(page_and_text)
df.describe().round(2)

Unnamed: 0,page_number,page_char_count,page_word_count,page_sentence_count,page_token_count,page_sentence_count_spacy,num_chunks
count,19.0,19.0,19.0,19.0,19.0,19.0,19.0
mean,9.0,2818.79,471.26,24.74,704.7,24.21,3.47
std,5.63,652.97,107.13,6.76,163.24,5.29,0.84
min,0.0,593.0,97.0,8.0,148.25,8.0,1.0
25%,4.5,2680.5,463.0,22.0,670.12,22.5,3.0
50%,9.0,2837.0,478.0,25.0,709.25,25.0,4.0
75%,13.5,3027.5,505.0,28.5,756.88,27.0,4.0
max,18.0,3900.0,669.0,39.0,975.0,33.0,5.0


In [12]:
import re

# Split each chunk into its own item
pages_and_chunks = []
for item in tqdm(page_and_text):
    for sentence_chunk in item["sentence_chunks"]:
        chunk_dict = {}
        chunk_dict["page_number"] = item["page_number"]
        
        # Join the sentences together into a paragraph-like structure, aka a chunk (so they are a single string)
        joined_sentence_chunk = "".join(sentence_chunk).replace("  ", " ").strip()
        joined_sentence_chunk = re.sub(r'\.([A-Z])', r'. \1', joined_sentence_chunk) # ".A" -> ". A" for any full-stop/capital letter combo 
        chunk_dict["sentence_chunk"] = joined_sentence_chunk

        # Get stats about the chunk
        chunk_dict["chunk_char_count"] = len(joined_sentence_chunk)
        chunk_dict["chunk_word_count"] = len([word for word in joined_sentence_chunk.split(" ")])
        chunk_dict["chunk_token_count"] = len(joined_sentence_chunk) / 4 # 1 token = ~4 characters
        
        pages_and_chunks.append(chunk_dict)

# How many chunks do we have?
len(pages_and_chunks)

100%|██████████| 19/19 [00:00<00:00, 13350.94it/s]


66

In [13]:
random.sample(pages_and_chunks, k=1)

[{'page_number': 6,
  'sentence_chunk': '7  video generation features - video expansion and image animation. We expect to make them more broadly available by early next year.  Next, I would like to discuss our approach to capital allocation. We continue to take a long-term view in running the business, which involves investing in a portfolio of opportunities that we expect will generate returns over different time periods. We are very optimistic about the set of opportunities in front of us, and believe that investing now in both infrastructure and talent will not only accelerate our progress, but increase the likelihood of maximizing returns within each area. This includes investing in both near-term initiatives to deliver continued healthy revenue growth within our core business, as well as longer-term opportunities that have the scale to deliver compelling returns over time. Given the lead time of our longer-term investments, we also continue to maximize our flexibility so that we c

In [14]:
# Get stats about our chunks
df = pd.DataFrame(pages_and_chunks)
df.describe().round(2)

Unnamed: 0,page_number,chunk_char_count,chunk_word_count,chunk_token_count
count,66.0,66.0,66.0,66.0
mean,8.27,793.56,139.35,198.39
std,5.3,342.67,60.17,85.67
min,0.0,0.0,1.0,0.0
25%,4.0,578.5,104.5,144.62
50%,8.0,830.5,145.5,207.62
75%,13.0,1037.25,180.0,259.31
max,18.0,1441.0,264.0,360.25


In [15]:
# Show random chunks with under 30 tokens in length
min_token_length = 30
for row in df[df["chunk_token_count"] <= min_token_length].iterrows():
    print(f'Chunk token count: {row[1]["chunk_token_count"]} | Text: {row[1]["sentence_chunk"]}')

Chunk token count: 7.25 | Text: Our capital expenditures were
Chunk token count: 11.25 | Text: Reality Labs operating loss was $4.4 billion.
Chunk token count: 14.0 | Text: With that, Krista, let’s open up the call for questions.
Chunk token count: 0.0 | Text: 


In [16]:
from sentence_transformers import SentenceTransformer
embedding_model = SentenceTransformer(model_name_or_path="all-mpnet-base-v2", 
                                      device="cpu") # choose the device to load the model to (note: GPU will often be *much* faster than CPU)


ImportError: dlopen(/Users/tringuyen1803/Documents/RAG/venv/lib/python3.9/site-packages/torch/_C.cpython-39-darwin.so, 0x0002): Library not loaded: @rpath/libtorch_cpu.dylib
  Referenced from: <9AC90911-7AE8-3789-886C-7C0C9EAC562D> /Users/tringuyen1803/Documents/RAG/venv/lib/python3.9/site-packages/torch/lib/libtorch_python.dylib
  Reason: tried: '/Users/tringuyen1803/Documents/RAG/venv/lib/python3.9/site-packages/torch/lib/libtorch_cpu.dylib' (no such file), '/Users/runner/work/_temp/anaconda/envs/wheel_py39/lib/libtorch_cpu.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/Users/runner/work/_temp/anaconda/envs/wheel_py39/lib/libtorch_cpu.dylib' (no such file), '/Users/runner/work/_temp/anaconda/envs/wheel_py39/lib/libtorch_cpu.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/Users/runner/work/_temp/anaconda/envs/wheel_py39/lib/libtorch_cpu.dylib' (no such file), '/Users/tringuyen1803/Documents/RAG/venv/lib/python3.9/site-packages/torch/lib/libtorch_cpu.dylib' (no such file), '/Users/runner/work/_temp/anaconda/envs/wheel_py39/lib/libtorch_cpu.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/Users/runner/work/_temp/anaconda/envs/wheel_py39/lib/libtorch_cpu.dylib' (no such file), '/Users/runner/work/_temp/anaconda/envs/wheel_py39/lib/libtorch_cpu.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/Users/runner/work/_temp/anaconda/envs/wheel_py39/lib/libtorch_cpu.dylib' (no such file), '/Users/tringuyen1803/Documents/RAG/venv/lib/python3.9/site-packages/torch/lib/libtorch_cpu.dylib' (no such file), '/usr/local/lib/libtorch_cpu.dylib' (no such file), '/usr/lib/libtorch_cpu.dylib' (no such file, not in dyld cache)

In [None]:
sentences = [
    "The Sentences Transformers library provides an easy and open-source way to create embeddings.",
    "Sentences can be embedded one by one or as a list of strings.",
    "Embeddings are one of the most powerful concepts in machine learning!",
    "Learn to use embeddings well and you'll be well on your way to being an AI engineer."
]

# Sentences are encoded/embedded by calling model.encode()
embeddings = embedding_model.encode(sentences)
embeddings_dict = dict(zip(sentences, embeddings))

# See the embeddings
for sentence, embedding in embeddings_dict.items():
    print("Sentence:", sentence)
    print("Embedding:", embedding)
    print("embedding shape:", embedding.shape)

Sentence: The Sentences Transformers library provides an easy and open-source way to create embeddings.
Embedding: [-2.07983628e-02  3.03164423e-02 -2.01217756e-02  6.86484501e-02
 -2.55256519e-02 -8.47686734e-03 -2.07240504e-04 -6.32378086e-02
  2.81606764e-02 -3.33354436e-02  3.02633736e-02  5.30721694e-02
 -5.03527522e-02  2.62288712e-02  3.33314091e-02 -4.51577157e-02
  3.63044851e-02 -1.37120357e-03 -1.20171094e-02  1.14947008e-02
  5.04511073e-02  4.70857024e-02  2.11914033e-02  5.14606386e-02
 -2.03747042e-02 -3.58889587e-02 -6.67795015e-04 -2.94394121e-02
  4.95859347e-02 -1.05639417e-02 -1.52014401e-02 -1.31755823e-03
  4.48197462e-02  1.56023670e-02  8.60379600e-07 -1.21394766e-03
 -2.37979088e-02 -9.09360184e-04  7.34486990e-03 -2.53936765e-03
  5.23370765e-02 -4.68043759e-02  1.66214891e-02  4.71579544e-02
 -4.15599309e-02  9.01927124e-04  3.60277966e-02  3.42214070e-02
  9.68227163e-02  5.94829395e-02 -1.64984781e-02 -3.51249427e-02
  5.92521578e-03 -7.07922154e-04 -2.4103

In [None]:
pages_and_chunks_df = pd.DataFrame(pages_and_chunks).to_dict(orient="records")


In [None]:
embedding_model.to("cpu") # requires a GPU installed, for reference on my local machine, I'm using a NVIDIA RTX 4090

# Create embeddings one by one on the GPU
for item in tqdm(pages_and_chunks_df):
    item["embedding"] = embedding_model.encode(item["sentence_chunk"])

  0%|          | 0/66 [00:00<?, ?it/s]

100%|██████████| 66/66 [00:07<00:00,  9.01it/s]


In [None]:
pages_and_chunks_df

[{'page_number': 0,
  'sentence_chunk': '1  Meta Platforms, Inc. (META) Third Quarter 2024 Results Conference Call October 30th, 2024  Kenneth Dorell, Director, Investor Relations   Thank you. Good afternoon and welcome to Meta Platforms third quarter 2024 earnings conference call. Joining me today to discuss our results are Mark Zuckerberg, CEO and Susan Li, CFO.  Before we get started, I would like to take this opportunity to remind you that our remarks today will include forward‐looking statements. Actual results may differ materially from those contemplated by these forward‐looking statements.  Factors that could cause these results to differ materially are set forth in today’s earnings press release, and in our quarterly report on form 10-Q filed with the SEC. Any forward‐looking statements that we make on this call are based on assumptions as of today and we undertake no obligation to update these statements as a result of new information or future events.  During this call we wi

In [None]:
# Save embeddings to file
text_chunks_and_embeddings_df = pd.DataFrame(pages_and_chunks_df)
embeddings_df_save_path = "text_chunks_and_embeddings_df.csv"
text_chunks_and_embeddings_df.to_csv(embeddings_df_save_path, index=False)

In [None]:
text_chunks_and_embedding_df_load = pd.read_csv(embeddings_df_save_path)
text_chunks_and_embedding_df_load.head()

Unnamed: 0,page_number,sentence_chunk,chunk_char_count,chunk_word_count,chunk_token_count,embedding
0,0,"1 Meta Platforms, Inc. (META) Third Quarter 2...",1020,166,255.0,[ 5.08212931e-02 -1.46360518e-02 -4.16572727e-...
1,0,A reconciliation of GAAP to non‐GAAP measures ...,801,139,200.25,[ 4.08924222e-02 2.55432371e-02 -2.79905945e-...
2,0,And we just passed a milestone of 2 billion ca...,583,99,145.75,[ 4.42170240e-02 3.99474846e-03 -1.42558841e-...
3,0,We are making a lot of progress with our AI ef...,495,86,123.75,[ 1.11740353e-02 1.07218064e-01 -5.26394956e-...
4,1,2 Meta AI now has more than 500 monthly activ...,1183,208,295.75,[ 3.73920053e-02 5.87407537e-02 -2.37385724e-...


In [None]:
import random

import torch
import numpy as np 
import pandas as pd

device = "cuda" if torch.cuda.is_available() else "cpu"

# Import texts and embedding df
text_chunks_and_embedding_df = pd.read_csv("text_chunks_and_embeddings_df.csv")

# Convert embedding column back to np.array (it got converted to string when it got saved to CSV)
text_chunks_and_embedding_df["embedding"] = text_chunks_and_embedding_df["embedding"].apply(lambda x: np.fromstring(x.strip("[]"), sep=" "))

# Convert texts and embedding df to list of dicts
pages_and_chunks = text_chunks_and_embedding_df.to_dict(orient="records")

# Convert embeddings to torch tensor and send to device (note: NumPy arrays are float64, torch tensors are float32 by default)
embeddings = torch.tensor(np.array(text_chunks_and_embedding_df["embedding"].tolist()), dtype=torch.float32).to(device)
embeddings.shape

torch.Size([66, 768])

In [None]:
from sentence_transformers import util, SentenceTransformer
text_chunks_and_embedding_df.head()

Unnamed: 0,page_number,sentence_chunk,chunk_char_count,chunk_word_count,chunk_token_count,embedding
0,0,"1 Meta Platforms, Inc. (META) Third Quarter 2...",1020,166,255.0,"[0.0508212931, -0.0146360518, -0.0416572727, -..."
1,0,A reconciliation of GAAP to non‐GAAP measures ...,801,139,200.25,"[0.0408924222, 0.0255432371, -0.0279905945, -0..."
2,0,And we just passed a milestone of 2 billion ca...,583,99,145.75,"[0.044217024, 0.00399474846, -0.0142558841, -0..."
3,0,We are making a lot of progress with our AI ef...,495,86,123.75,"[0.0111740353, 0.107218064, -0.0526394956, -0...."
4,1,2 Meta AI now has more than 500 monthly activ...,1183,208,295.75,"[0.0373920053, 0.0587407537, -0.0237385724, -0..."


In [None]:
query = "Q3 total revenue"
print(f"Query: {query}")

query_embedding = embedding_model.encode(query, convert_to_tensor=True)

# 3. Get similarity scores with the dot product (we'll time this for fun)
from time import perf_counter as timer
start_time = timer()
dot_scores = util.dot_score(a=query_embedding, b=embeddings)[0]
end_time = timer()

print("dotscore", dot_scores[25])
print(f"Time take to get scores on {len(embeddings)} embeddings: {end_time-start_time:.5f} seconds.")
top_results_dot_product = torch.topk(dot_scores, k=5)
top_results_dot_product

Query: Q3 total revenue
dotscore tensor(0.3891)
Time take to get scores on 66 embeddings: 0.00013 seconds.


torch.return_types.topk(
values=tensor([0.6300, 0.6010, 0.5042, 0.5016, 0.4866]),
indices=tensor([10, 12,  9, 13, 14]))

In [None]:
import textwrap

def print_wrapped(text, wrap_length=80):
    wrapped_text = textwrap.fill(text, wrap_length)
    print(wrapped_text)

In [None]:
print(f"Query: {query}")
print("Top 5 results using dot product:")

for score, idx in zip(top_results_dot_product.values, top_results_dot_product.indices):
    print(f"Score: {score:.4f}")
    print_wrapped(pages_and_chunks[idx]["sentence_chunk"])
    print()

Query: Q3 total revenue
Top 5 results using dot product:
Score: 0.6300
R&D increased 21%, mostly driven by higher headcount-related expenses and
infrastructure costs.  Marketing & Sales decreased 2% driven primarily by lower
restructuring costs.  G&A decreased 10% driven primarily by lower legal-related
expenses.  We ended the third quarter with over 72,400 employees, up 9% year-
over-year, with growth primarily driven by hiring in our priority areas of
monetization, infrastructure, Reality Labs, generative AI, as well as regulation
and compliance.  Third quarter operating income was $17.4 billion, representing
a 43% operating margin.  Our tax rate for the quarter was 12%.  Net income was
$15.7 billion or $6.03 per share.  Capital expenditures, including principal
payments on finance leases, were $9.2 billion, driven by investments in servers,
data centers and network infrastructure.

Score: 0.6010
4  impacted in part by the timing of third quarter server deliveries, which will
be paid

In [17]:
!huggingface-cli whoami


trihnguy


In [None]:
def retrieve_relevant_resources(query: str,
                                embeddings: torch.tensor,
                                model: SentenceTransformer=embedding_model,
                                n_resources_to_return: int=5,
                                print_time: bool=True):
    """
    Embeds a query with model and returns top k scores and indices from embeddings.
    """

    # Embed the query
    query_embedding = model.encode(query, 
                                   convert_to_tensor=True) 

    # Get dot product scores on embeddings
    start_time = timer()
    dot_scores = util.dot_score(query_embedding, embeddings)[0]
    end_time = timer()

    if print_time:
        print(f"[INFO] Time taken to get scores on {len(embeddings)} embeddings: {end_time-start_time:.5f} seconds.")

    scores, indices = torch.topk(input=dot_scores, 
                                 k=n_resources_to_return)

    return scores, indices

In [24]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers.utils import is_flash_attn_2_available
from transformers import BitsAndBytesConfig

quantization_config = BitsAndBytesConfig(load_in_4bit=True,
                                         bnb_4bit_compute_dtype=torch.float16)


# 2. Pick a model we'd like to use (this will depend on how much GPU memory you have available)
#model_id = "google/gemma-7b-it"
model_id = "google/gemma-2b-it" # (we already set this above)
print(f"[INFO] Using model_id: {model_id}")

# 3. Instantiate tokenizer (tokenizer turns text into numbers ready for the model) 
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_id)

# 4. Instantiate the model
llm_model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=model_id, 
                                                 torch_dtype=torch.float16, # datatype to use, we want float16
                                                 low_cpu_mem_usage=False, # use full memory 
                                                 )

AttributeError: partially initialized module 'torch' has no attribute 'version' (most likely due to a circular import)

In [35]:
!pip uninstall torch transformers -y


Python(84093) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


Found existing installation: torch 2.5.1
Uninstalling torch-2.5.1:
  Successfully uninstalled torch-2.5.1
Found existing installation: transformers 4.47.1
Uninstalling transformers-4.47.1:
  Successfully uninstalled transformers-4.47.1


In [36]:
!pip install torch --no-cache-dir

Python(84164) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


Collecting torch
  Downloading torch-2.5.1-cp39-none-macosx_11_0_arm64.whl.metadata (28 kB)
Downloading torch-2.5.1-cp39-none-macosx_11_0_arm64.whl (63.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.9/63.9 MB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: torch
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
sentence-transformers 3.3.1 requires transformers<5.0.0,>=4.41.0, which is not installed.[0m[31m
[0mSuccessfully installed torch-2.5.1
