In [17]:
import os

from groq import Groq

client = Groq(
    api_key=os.environ.get("GROQ_API_KEY"),
)

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "how to calculate the number of tokens of given text?",
        }
    ],
    model="llama3-70b-8192",
)

print(chat_completion.choices[0].message.content)

A token is a unit of text, such as a word, character, or subword, depending on the context and the task at hand. Calculating the number of tokens in a given text involves breaking down the text into its constituent parts. Here are some common ways to calculate the number of tokens:

**Method 1: Word-level tokenization**

Split the text into individual words, separated by spaces, punctuation, or special characters. This is the most common approach.

Example: "Hello, how are you?" → ["Hello,", "how", "are", "you?"]

Tokens: 4

**Method 2: Character-level tokenization**

Split the text into individual characters.

Example: "Hello" → ["H", "e", "l", "l", "o"]

Tokens: 5

**Method 3: Subword-level tokenization**

Split the text into subwords, which are smaller units of text, such as word pieces or morphemes.

Example: "unbreakable" → ["un-", "break-", "able"]

Tokens: 3

**Method 4: Tokenization using NLTK (Natural Language Toolkit)**

Use a library like NLTK, which provides a tokenization 

In [None]:
import pygame
import sys
import random
import time

# Direction constants
UP = 1
RIGHT = 2
DOWN = 3
LEFT = 4

class SnakeGame:
    def __init__(self):
        pygame.init()
        self.width, self.height = 800, 600
        self.screen = pygame.display.set_mode((self.width, self.height))
        pygame.display.set_caption("Snake Game")
        self.clock = pygame.time.Clock()
        self.reset()

    def reset(self):
        self.direction = RIGHT
        self.snake = [(200, 200), (220, 200), (240, 200)]
        self.apple = self.get_random_apple()
        self.score = 0

    def get_random_apple(self):
        x = random.randint(0, (self.width - 10) // 10) * 10
        y = random.randint(0, (self.height - 10) // 10) * 10
        return (x, y)

    def play(self):
        while True:
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    pygame.quit()
                    sys.exit()
                elif event.type == pygame.KEYDOWN:
                    if event.key == pygame.K_UP and self.direction != DOWN:
                        self.direction = UP
                    elif event.key == pygame.K_DOWN and self.direction != UP:
                        self.direction = DOWN
                    elif event.key == pygame.K_LEFT and self.direction != RIGHT:
                        self.direction = LEFT
                    elif event.key == pygame.K_RIGHT and self.direction != LEFT:
                        self.direction = RIGHT

            self.move_snake()
            self.check_collision()
            self.draw_game()

            pygame.display.update()
            self.clock.tick(10)

    def move_snake(self):
        head = self.snake[0]
        if self.direction == UP:
            new_head = (head[0], head[1] - 10)
        elif self.direction == RIGHT:
            new_head = (head[0] + 10, head[1])
        elif self.direction == DOWN:
            new_head = (head[0], head[1] + 10)
        elif self.direction == LEFT:
            new_head = (head[0] - 10, head[1])

        self.snake.insert(0, new_head)

        if self.snake[0] == self.apple:
            self.score += 1
            self.apple = self.get_random_apple()
        else:
            self.snake.pop()

    def check_collision(self):
        if (self.snake[0][0] < 0 or self.snake[0][0] > self.width - 10 or
            self.snake[0][1] < 0 or self.snake[0][1] > self.height - 10):
            self.game_over()
        for i in range(1, len(self.snake)):
            if self.snake[0] == self.snake[i]:
                self.game_over()

    def game_over(self):
        print("Game Over! Your score is", self.score)
        self.reset()

    def draw_game(self):
        self.screen.fill((255, 255, 255))
        for pos in self.snake:
            pygame.draw.rect(self.screen, (0, 255, 0), pygame.Rect(pos[0], pos[1], 10, 10))
        pygame.draw.rect(self.screen, (255, 0, 0), pygame.Rect(self.apple[0], self.apple[1], 10, 10))
        pygame.display.update()

if __name__ == "__main__":
    game = SnakeGame()
    game.play()

In [10]:
import PyPDF2

def extract_text_from_pdf(pdf_path):
    file=PyPDF2.PdfReader(pdf_path)
    return file

file=extract_text_from_pdf('/Users/matansharon/python/Data_science/papers/attention is all you need.pdf')

In [16]:
file.pages[10].extract_text()

'[5]Kyunghyun Cho, Bart van Merrienboer, Caglar Gulcehre, Fethi Bougares, Holger Schwenk,\nand Yoshua Bengio. Learning phrase representations using rnn encoder-decoder for statistical\nmachine translation. CoRR , abs/1406.1078, 2014.\n[6]Francois Chollet. Xception: Deep learning with depthwise separable convolutions. arXiv\npreprint arXiv:1610.02357 , 2016.\n[7]Junyoung Chung, Çaglar Gülçehre, Kyunghyun Cho, and Yoshua Bengio. Empirical evaluation\nof gated recurrent neural networks on sequence modeling. CoRR , abs/1412.3555, 2014.\n[8]Chris Dyer, Adhiguna Kuncoro, Miguel Ballesteros, and Noah A. Smith. Recurrent neural\nnetwork grammars. In Proc. of NAACL , 2016.\n[9]Jonas Gehring, Michael Auli, David Grangier, Denis Yarats, and Yann N. Dauphin. Convolu-\ntional sequence to sequence learning. arXiv preprint arXiv:1705.03122v2 , 2017.\n[10] Alex Graves. Generating sequences with recurrent neural networks. arXiv preprint\narXiv:1308.0850 , 2013.\n[11] Kaiming He, Xiangyu Zhang, Shaoqing