In [None]:
!pip install transformers


In [None]:
!pip install psutil
#this is for the RAM
!pip install --upgrade pandas
import psutil
import torch
import subprocess
import pandas as pd
import threading
import time
import warnings
import os
import matplotlib.pyplot as plt

In [None]:
def print_memory_usage():
    available = psutil.virtual_memory().available / (1024 * 1024 * 1024)  # Convert to GB
    total = psutil.virtual_memory().total / (1024 * 1024 * 1024)  # Convert to GB
    used = psutil.virtual_memory().used / (1024 * 1024 * 1024)  # Convert to GB
    print(f"Total RAM: {total:.2f} GB")
    print(f"Used RAM: {used:.2f} GB")
    print(f"Available RAM: {available:.2f} GB")

print_memory_usage()

In [None]:
print(torch.__version__)  
print(torch.version.cuda) 

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name = "meta-llama/Meta-Llama-3-8B"
modelpath="model"
model_cache_dir = "modelCheckpoint"  # Specify your desired directory here
torch.cuda.empty_cache()

my_model =AutoModelForCausalLM.from_pretrained(model_name,
                                             cache_dir=modelpath,
                                            torch_dtype=torch.float16,
                                              device_map="auto",
                                              token="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") 
tokenizer = AutoTokenizer.from_pretrained(model_name, token="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")


In [None]:
import json
SC2_INSTRUCT_PROMPT = """You are an exceptionally intelligent coding assistant that consistently delivers accurate and reliable responses to user instructions.

### Instruction
{instruction}

### Response
{response}"""
def extract_prompts(file_path):
    prompts = {}
    with open(file_path, 'r') as file:
        for line in file:
            entry = json.loads(line)
            task_id = entry.get('task_id', '')  # Assuming each entry has a unique task_id
            prompt = entry.get('prompt', '').strip()
            
            # Adding instructional text and code block formatting
            prompt_header = "Write a Python function to solve the given task:"
            instruction = f"""{prompt_header}
```python
{prompt}
```"""

            # Define the response prefix template
            prefix_template = "```python\n{prompt}"
            # Assuming the use of markdown for code blocks, add a newline if needed
            prefix = "" if SC2_INSTRUCT_PROMPT.endswith("\n") else "\n"
            response_prefix = prefix + (
                prefix_template.replace("{prompt}", prompt)
                if "{prompt}" in prefix_template
                else prefix_template
            )
            
            # Add formatted instruction and response prefix instead of raw prompt
            prompts[task_id] = {
                "instruction": instruction,
                "response_prefix": response_prefix
            }
    return prompts

# Usage example:
prompts = extract_prompts("humaneval/human-eval-v2-20210705.jsonl")
print(prompts)

In [None]:
def extract_function_body(completion: str) -> str:
    response_marker = "### Response"
    code_block_marker = "```"

    response_start = completion.find(response_marker)
    if response_start == -1:
        return ""

    code_block_start = completion.find(code_block_marker, response_start)
    if code_block_start == -1:
        return ""

    code_block_start += len(code_block_marker)
    code_block_end = completion.find(code_block_marker, code_block_start)
    if code_block_end == -1:
        code_block_end = len(completion)

    function_code = completion[code_block_start:code_block_end].strip()
    function_def_start = function_code.find('def ')
    if function_def_start == -1:
        return ""

    function_body_start = function_code.find('\n', function_def_start)
    if function_body_start == -1:
        return ""

    function_body = function_code[function_body_start:].strip()
    # Remove any docstrings
    while '"""' in function_body or "'''" in function_body:
        docstring_start = function_body.find('"""') if '"""' in function_body else function_body.find("'''")
        docstring_end = function_body.find('"""', docstring_start + 3) if '"""' in function_body else function_body.find("'''", docstring_start + 3)
        if docstring_end == -1:
            break
        function_body = function_body[:docstring_start] + function_body[docstring_end + 3:]

    

    return function_body

In [None]:
#NEW APPROACH WHERE WRITING TO A FILE OCCURS OUTSIDE OF THE MEASURING
import os
import warnings
import json
import subprocess
import time
import threading
import pandas as pd
import matplotlib.pyplot as plt

os.environ["TOKENIZERS_PARALLELISM"] = "false"
warnings.filterwarnings('ignore')

# Placeholder for global flag to control the background monitoring
keep_monitoring = True

def fetch_gpu_power():
    """Fetch the current power usage of GPUs using nvidia-smi."""
    # The command to fetch power usage
    cmd = "nvidia-smi --query-gpu=power.draw --format=csv,noheader,nounits"
    power_draw_str = subprocess.check_output(cmd, shell=True).decode('utf-8').strip().split('\n')
    # Convert power draw strings to floats
    power_draw = [float(x) for x in power_draw_str]
    return power_draw

def monitor_gpu_energy_usage(output_csv="gpu_energy_usage.csv"):
    # List to store energy readings
    readings = []
    
    while keep_monitoring:
        # Fetch real energy readings for the GPUs
        gpu_power = fetch_gpu_power()
        gpu_0_power = gpu_power[0] if len(gpu_power) > 0 else 0
        gpu_1_power = gpu_power[1] if len(gpu_power) > 1 else 0
        
        # Get the current timestamp
        now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
        
        # Append new reading to the list
        readings.append([now, gpu_0_power, gpu_1_power])
        
        # Wait a bit before the next measurement
        time.sleep(0.5)  # Adjust the frequency of measurements as needed

    # Once monitoring is done, create a DataFrame and save to CSV
    df = pd.DataFrame(readings, columns=["Timestamp", "GPU_0_Power_W", "GPU_1_Power_W"])
    # Calculate the total power for each row
    df['Total_Power_W'] = df['GPU_0_Power_W'] + df['GPU_1_Power_W']
    # Calculate the average power for each row
    df['Average_Power_W'] = (df['GPU_0_Power_W'] + df['GPU_1_Power_W']) / 2
    # Time interval in hours
    time_interval_hours = 0.5 / 3600

    # Calculate energy for each GPU in watt-hours (Wh)
    df['GPU_0_Energy_Wh'] = df['GPU_0_Power_W'] * time_interval_hours
    df['GPU_1_Energy_Wh'] = df['GPU_1_Power_W'] * time_interval_hours
    # If you want a running total of energy consumption, you can do a cumulative sum
    df['GPU_0_Energy_Wh_Cumulative'] = df['GPU_0_Energy_Wh'].cumsum()
    df['GPU_1_Energy_Wh_Cumulative'] = df['GPU_1_Energy_Wh'].cumsum()

    # Plot GPU_0 Power
    plt.figure(figsize=(12, 6))
    plt.plot(df['Timestamp'], df['GPU_0_Power_W'], label='GPU 0 Power (W)', marker='o')

    # Plot GPU_1 Power
    plt.plot(df['Timestamp'], df['GPU_1_Power_W'], label='GPU 1 Power (W)', marker='x')

    plt.title('GPU Power Consumption Over Time')
    plt.xlabel('Timestamp')
    plt.ylabel('Power (W)')
    plt.legend()
    plt.xticks(rotation=45)  # Rotate x-axis labels for better readability
    plt.tight_layout()  # Adjust layout to fit labels
    plt.show()

    # Plotting Total Power Consumption in Wh
    plt.figure(figsize=(12, 6))
    plt.plot(df['Timestamp'], df['Total_Power_W'], label='Total Power (W)', marker='o', linestyle='-', color='purple')
    plt.title('Total GPU Power Consumption Over Time')
    plt.xlabel('Timestamp')
    plt.ylabel('Total Power (W)')
    plt.legend()
    plt.xticks(rotation=45)
    plt.grid(True)
    plt.tight_layout()
    plt.show()  # Display the second plot

    df.to_csv(output_csv, index=False)

def perform_inference(prompts):
    """Perform model inference on a list of prompts."""
    results = {}
    for task_id, prompt in prompts.items():
        input_text = SC2_INSTRUCT_PROMPT.format(
            instruction=prompt["instruction"],
            response=prompt["response_prefix"]
        )
        input_ids = tokenizer.encode(input_text, return_tensors="pt")
        input_ids = input_ids.to("cuda")
        stop_tokens = ["\n```"]
        
        # Convert stop_tokens to IDs
        #THE CONFIG FOR END TOKENS IS SLIGHTLY DIFFERENT HERE WITH THE REST OF THE MODEL SETUPS
        eos_token_ids = tokenizer.convert_tokens_to_ids(stop_tokens)
        
        # Ensure eos_token_ids is an integer
        if isinstance(eos_token_ids, list) and len(eos_token_ids) == 1:
            eos_token_id = eos_token_ids[0]
        else:
            raise ValueError("Failed to convert stop tokens to valid token IDs")
        
        output = my_model.generate(
            input_ids, temperature=0.0, top_p=1, num_return_sequences=1, 
            do_sample=False, pad_token_id=tokenizer.eos_token_id, 
            eos_token_id=eos_token_id, max_new_tokens=150
        )
        
        completion = tokenizer.decode(output[0], skip_special_tokens=True)
        results[task_id] = completion
    
    return results

def extract_and_write_results(results, output_jsonl="outputLlama3_8b_4.jsonl"):
    """Extract function bodies from results and write to JSONL."""
    extracted_results = []
    for task_id, completion in results.items():
        truncated_response = extract_function_body(completion)
        extracted_results.append({'task_id': task_id, 'completion': truncated_response})
    
    # Write to JSONL file
    with open(output_jsonl, 'w') as f:
        for result in extracted_results:
            f.write(json.dumps(result) + '\n')

# Start monitoring in a background thread
monitor_thread = threading.Thread(target=monitor_gpu_energy_usage, args=("Llama3/gpu_energy_usageLlama3_8b_16bitTensors7.csv",))
monitor_thread.start()

# From HumanEval the first 10 tasks
try:
    inference_results = perform_inference(prompts)
finally:
    # Ensure the monitoring stops when the main task is done
    keep_monitoring = False
    monitor_thread.join()

# Process results after monitoring is complete
extract_and_write_results(inference_results)

print("Monitoring stopped. CSV file should be generated with real GPU power usage values.")
print("Inference results have been processed and written to JSONL file.")


In [None]:
print(inference_results)

In [None]:
##OLD APPROACH WHERE WIRITING TO THE FILE IS INSIDE

os.environ["TOKENIZERS_PARALLELISM"] = "false"
warnings.filterwarnings('ignore')

# Placeholder for global flag to control the background monitoring
keep_monitoring = True
def write_jsonl(data, path):
    with open(path, 'a') as f:
        f.write(json.dumps(data) + '\n')
def fetch_gpu_power():
    """Fetch the current power usage of GPUs using nvidia-smi."""
    # The command to fetch power usage
    cmd = "nvidia-smi --query-gpu=power.draw --format=csv,noheader,nounits"
    power_draw_str = subprocess.check_output(cmd, shell=True).decode('utf-8').strip().split('\n')
    # Convert power draw strings to floats
    power_draw = [float(x) for x in power_draw_str]
    return power_draw

def monitor_gpu_energy_usage(output_csv="gpu_energy_usage.csv"):
    # List to store energy readings
    readings = []
    
    while keep_monitoring:
        # Fetch real energy readings for the GPUs
        gpu_power = fetch_gpu_power()
        gpu_0_power = gpu_power[0] if len(gpu_power) > 0 else 0
        gpu_1_power = gpu_power[1] if len(gpu_power) > 1 else 0
        
        # Get the current timestamp
        now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
        
        # Append new reading to the list
        readings.append([now, gpu_0_power, gpu_1_power])
        
        # Wait a bit before the next measurement
        time.sleep(0.5)  # Adjust the frequency of measurements as needed

    # Once monitoring is done, create a DataFrame and save to CSV
    df = pd.DataFrame(readings, columns=["Timestamp", "GPU_0_Power_W", "GPU_1_Power_W"])
    # Calculate the total power for each row
    df['Total_Power_W'] = df['GPU_0_Power_W'] + df['GPU_1_Power_W']
    # Calculate the average power for each row
    df['Average_Power_W'] = (df['GPU_0_Power_W'] + df['GPU_1_Power_W']) / 2
    # Time interval in hours
    time_interval_hours = 0.5 / 3600

    # Calculate energy for each GPU in watt-hours (Wh)
    df['GPU_0_Energy_Wh'] = df['GPU_0_Power_W'] * time_interval_hours
    df['GPU_1_Energy_Wh'] = df['GPU_1_Power_W'] * time_interval_hours
    # If you want a running total of energy consumption, you can do a cumulative sum
    df['GPU_0_Energy_Wh_Cumulative'] = df['GPU_0_Energy_Wh'].cumsum()
    df['GPU_1_Energy_Wh_Cumulative'] = df['GPU_1_Energy_Wh'].cumsum()

   
    # Plot GPU_0 Power
    plt.figure(figsize=(12, 6))
    plt.plot(df['Timestamp'], df['GPU_0_Power_W'], label='GPU 0 Power (W)', marker='o')

    # Plot GPU_1 Power
    plt.plot(df['Timestamp'], df['GPU_1_Power_W'], label='GPU 1 Power (W)', marker='x')

    
    plt.title('GPU Power Consumption Over Time')
    plt.xlabel('Timestamp')
    plt.ylabel('Power (W)')
    plt.legend()
    plt.xticks(rotation=45)  # Rotate x-axis labels for better readability
    plt.tight_layout()  # Adjust layout to fit labels
    plt.show()
    # Plotting Total Power Consumption in Wh
    plt.figure(figsize=(12, 6))
    plt.plot(df['Timestamp'], df['Total_Power_W'], label='Total Power (W)', marker='o', linestyle='-', color='purple')
    plt.title('Total GPU Power Consumption Over Time')
    plt.xlabel('Timestamp')
    plt.ylabel('Total Power (W)')
    plt.legend()
    plt.xticks(rotation=45)
    plt.grid(True)
    plt.tight_layout()
    plt.show()  # Display the second plot
    df.to_csv(output_csv, index=False)
def perform_inference(prompts):
    """Perform model inference on a list of prompts."""
    for task_id, prompt in prompts.items():
        input_text = SC2_INSTRUCT_PROMPT.format(
            instruction=prompt["instruction"],
            response=prompt["response_prefix"]
        )
        input_ids = tokenizer.encode(input_text, return_tensors="pt")
        input_ids = input_ids.to("cuda")
        stop_tokens = ["\n```"]
        
        # Convert stop_tokens to IDs
        eos_token_ids = tokenizer.convert_tokens_to_ids(stop_tokens)
        
        # Ensure eos_token_ids is an integer
        if isinstance(eos_token_ids, list) and len(eos_token_ids) == 1:
            eos_token_id = eos_token_ids[0]
        else:
            raise ValueError("Failed to convert stop tokens to valid token IDs")
        output = my_model.generate(
            input_ids, temperature=0.0, top_p=1, num_return_sequences=1, 
            do_sample=False, pad_token_id=tokenizer.eos_token_id, 
            eos_token_id=eos_token_id, max_new_tokens=150
        )
        completion = tokenizer.decode(output[0], skip_special_tokens=True)
        truncated_response= extract_function_body(completion)
   
        write_jsonl({'task_id': task_id, 'completion': truncated_response}, "Llama3/outputLlama3_8b_3.jsonl")
# Start monitoring in a background thread
monitor_thread = threading.Thread(target=monitor_gpu_energy_usage, args=("Llama3/gpu_energy_usageLlama3_8b_16bitTensors6.csv",))
monitor_thread.start()

# From HumanEval the first 10 tasks
try:
    perform_inference(prompts)
finally:
    # Ensure the monitoring stops when the main task is done
    keep_monitoring = False
    monitor_thread.join()

print("Monitoring stopped. CSV file should be generated with real GPU power usage values.")