In [1]:
import torch
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
# device = torch.device("cpu")

# Extracts details available in pg no 29

In [2]:
import fitz  # PyMuPDF

# Path to the PDF file
pdf_path = "../specifications/AUTOSAR_SWS_CRCLibrary.pdf"


# Keywords to search for CRC-32-related content
keywords = ["32-bit CRC", "CRC32", "IEEE-802.3", "0xF4ACFB13", "polynomial"]

# Open the PDF
doc = fitz.open(pdf_path)

# Extract relevant content
extracted_text = []
inside_crc_section = False

for page in doc:
    text = page.get_text("text")
    lines = text.split("\n")

    for line in lines:
        # If we find a relevant keyword, start extracting
        if any(keyword in line for keyword in keywords):
            inside_crc_section = True

        # Collect content if we are inside the CRC-32 section
        if inside_crc_section:
            extracted_text.append(line)

        # Stop extracting when another major section starts
        if "7.2.4" in line:  # Next section after CRC-32
            inside_crc_section = False
            break

# Close the document
doc.close()

# Join extracted lines into a single text
crc32_content = "\n".join(extracted_text)

# Print extracted content
print(crc32_content)

# Save extracted content to a text file (optional)
with open("CRC32_Extracted.txt", "w", encoding="utf-8") as f:
    f.write(crc32_content)


the polynomial 0x8005
• Editorial changes
2017-12-08
4.3.1
AUTOSAR
Release
Management
• Editorial changes
2016-11-30
4.3.0
AUTOSAR
Release
Management
• Introduction of a new CRC-64 for
E2E Profile 7
• Editorial changes
2015-07-31
4.2.2
AUTOSAR
Release
Management
• Corrected the magic check for the
CRC32 and CRC32P4
1 of 59
Document ID 16: AUTOSAR_SWS_CRCLibrary

Specification of CRC Routines
AUTOSAR CP R22-11
2014-10-31
4.2.1
AUTOSAR
Release
Management
• Introduction of a new CRC-32 with
the polynomial 0xF4ACFB13
• Editorial changes
2014-03-31
4.1.3
AUTOSAR
Release
Management
• CRC32 IEEE 802.3 check values
corrected
• Editorial changes
2013-10-31
4.1.2
AUTOSAR
Release
Management
• Editorial changes
• Removed chapter(s) on change
documentation
2013-03-15
4.1.1
AUTOSAR
Administration
• New examples on how to use CRC
routines and clarifications concerning
CCITT standard
• Removal of debugging concept
2011-12-22
4.0.3
AUTOSAR
Administration
• The GetVersionInfo API is always
available
201

# The following block utilizes an a pretrained LLM.
## TODO: Will finetune LLM to generate code using IL(Imitation Learning)

In [3]:
from transformers import LlamaTokenizer, LlamaForCausalLM

model_path = 'openlm-research/open_llama_3b'
# model_path = 'openlm-research/open_llama_7b'

tokenizer = LlamaTokenizer.from_pretrained(model_path)
model = LlamaForCausalLM.from_pretrained(
    model_path, torch_dtype=torch.float16, device_map=device, offload_folder="./offload"
)

You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message


In [14]:
# TODO: Will later give prompt as input from UI or command line
prompt = 'Write a Verilog module that computes a 32-bit CRC32 checksum using the polynomial 0x04C11DB7 for an 8-bit input stream.'
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
# input_ids = input_ids.float()

input_ids

tensor([[    1, 14734,   260,  5281,   302,   458,  7996,   342,   520,  2229,
           260, 31822, 31878, 31855, 31854,  3295,   314,  7416, 31878, 31855,
         11607,   390,  1340,   266, 28713, 31822, 31852, 31859, 31852, 31882,
         31851, 31853, 31853, 11722, 31888,   329,   363, 31822, 31886, 31854,
          3295,  5025,  6659, 31843]], device='mps:0')

# TODO: Different model will be used later to achieve the intended result and then finetune it.

In [16]:
generation_output = model.generate(
    input_ids=input_ids, max_new_tokens=100
)
print(tokenizer.decode(generation_output[0]))

<s>Write a Verilog module that computes a 32-bit CRC32 checksum using the polynomial 0x04C11DB7 for an 8-bit input stream.
Write a Verilog module that computes a 32-bit CRC32 checksum using the polynomial 0x04C11DB7 for an 8-bit input stream. The module should be able to compute the checksum for any input stream.
The module should be able to compute the checksum for any input stream.
The module should be able to compute the checksum for any input stream. The module should be able to compute the checks


In [None]:
generation_output = model.generate(
    input_ids=input_ids, max_new_tokens=1000
)
print(tokenizer.decode(generation_output[0]))

<s>Write a Verilog module that computes a 32-bit CRC32 checksum using the polynomial 0x04C11DB7 for an 8-bit input stream.
Write a Verilog module that computes a 32-bit CRC32 checksum using the polynomial 0x04C11DB7 for an 8-bit input stream. The module should be able to compute the checksum for any input stream.
The module should be able to compute the checksum for any input stream.
The module should be able to compute the checksum for any input stream. The module should be able to compute the checksum for any input stream.
The module should be able to compute the checksum for any input stream. The module should be able to compute the checksum for any input stream.
The module should be able to compute the checksum for any input stream. The module should be able to compute the checksum for any input stream. The module should be able to compute the checksum for any input stream.
The module should be able to compute the checksum for any input stream. The module should be able to compute 

In [13]:
generation_output = model.generate(
    input_ids=input_ids, max_new_tokens=1000
)
print(tokenizer.decode(generation_output[0]))

<s>32-bit CRC32 calculator verilog code

### 1. CRC32

```
module crc32(
 input clk,
 input rst,
 input r1,
 input r2,
 output reg c1,
 output reg c2
);

always @(posedge clk) begin
 if (rst) begin
 c1 <= 0;
 c2 <= 0;
 end else begin
 c1 <= c2;
 c2 <= c1;
 end
end
```

### 2. CRC32

```
module crc32(
 input clk,
 input rst,
 input r1,
 input r2,
 output reg c1,
 output reg c2
);

always @(posedge clk) begin
 if (rst) begin
 c1 <= 0;
 c2 <= 0;
 end else begin
 c1 <= c2;
 c2 <= c1;
 end
end
```

### 3. CRC32

```
module crc32(
 input clk,
 input rst,
 input r1,
 input r2,
 output reg c1,
 output reg c2
);

always @(posedge clk) begin
 if (rst) begin
 c1 <= 0;
 c2 <= 0;
 end else begin
 c1 <= c2;
 c2 <= c1;
 end
end
```

### 4. CRC32

```
module crc32(
 input clk,
 input rst,
 input r1,
 input r2,
 output reg c1,
 output reg c2
);

always @(posedge clk) begin
 if (rst) begin
 c1 <= 0;
 c2 <= 0;
 end else begin
 c1 <= c2;
 c2 <= c1;
 end
end
```

### 5. CRC32

```
module crc32(
 input clk,

Test Prompt

In [None]:
# display the generated text
prompt = 'Calculate the CRC32 checksum for the input data 0x12345678.'
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
generation_output = model.generate(
    input_ids=input_ids, max_new_tokens=150
)
print("Generated Text:")
print(tokenizer.decode(generation_output[0]))
for i, token in enumerate(generation_output):
    print(f"{tokenizer.convert_ids_to_tokens(token)}")

Generated Text:
<s>Calculate the CRC32 checksum for the input data 0x12345678.

```
crc32 0x12345678
```

### CRC32 checks
['<s>', '▁Calc', 'ulate', '▁the', '▁C', 'RC', '3', '2', '▁checks', 'um', '▁for', '▁the', '▁input', '▁data', '▁', '0', 'x', '1', '2', '3', '4', '5', '6', '7', '8', '.', '<0x0A>', '<0x0A>', '``', '`', '<0x0A>', 'c', 'rc', '3', '2', '▁', '0', 'x', '1', '2', '3', '4', '5', '6', '7', '8', '<0x0A>', '``', '`', '<0x0A>', '<0x0A>', '##', '#', '▁C', 'RC', '3', '2', '▁checks']
