In [2]:
!nvidia-smi

Thu Nov  9 20:41:18 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P4            On   | 00000000:00:04.0 Off |                    0 |
| N/A   62C    P0    67W /  75W |   7596MiB /  7680MiB |    100%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [5]:
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# The path to your file (change this to the name of your file)
file_path = 'training_log_finetune_vit_non_tma_big_and_small.txt'

# Read the file
with open(file_path, 'r') as file:
    lines = file.readlines()

# Initialize lists to hold the extracted data
loss_steps = []
loss_values = []
val_steps = []
val_accuracies = []

# Regular expressions to match lines with loss and validation accuracy
loss_regex = re.compile(r"step: (\d+), loss: ([\d.]+)")
val_accuracy_regex = re.compile(r"step: (\d+), validation accuracy: ([\d.]+)%")

# Process the file line by line
for line in lines:
    loss_match = loss_regex.search(line)
    val_accuracy_match = val_accuracy_regex.search(line)
    
    # If the line contains loss information, add it to the list
    if loss_match:
        step, loss = loss_match.groups()
        loss_steps.append(int(step))
        loss_values.append(float(loss))
    
    # If the line contains validation accuracy, add it to the list
    if val_accuracy_match:
        step, accuracy = val_accuracy_match.groups()
        val_steps.append(int(step))
        val_accuracies.append(float(accuracy))

# Calculate the moving average of the loss with a window size of 5
window_size = 1000
moving_averages = np.convolve(loss_values, np.ones(window_size)/window_size, mode='valid')

# Convert the lists to Pandas Series for easier plotting
loss_series = pd.Series(loss_values, index=loss_steps, name="Loss")
moving_average_series = pd.Series(moving_averages, index=loss_steps[window_size - 1:], name="Moving Average Loss")
val_accuracy_series = pd.Series(val_accuracies, index=val_steps, name="Validation Accuracy")

# Set the style for the plot
plt.style.use('ggplot')

# Create a figure and a set of subplots
fig, ax1 = plt.subplots(figsize=(14, 7))

# Plot loss and moving average on the primary y-axis
loss_line, = ax1.plot(loss_series, label='Loss', color='blue', alpha=0.2)
moving_average_line, = ax1.plot(moving_average_series, label='Moving Average Loss', color='blue', linewidth=2)
ax1.set_xlabel('Step')
ax1.set_ylabel('Loss')
ax1.tick_params(axis='y')

# Create a secondary y-axis for validation accuracy
ax2 = ax1.twinx()
val_accuracy_line, = ax2.plot(val_accuracy_series, label='Validation Accuracy', color='green', marker='o', linestyle='--')
ax2.set_ylabel('Validation Accuracy (%)')
ax2.tick_params(axis='y')

# Set up the legend
lines = [loss_line, moving_average_line, val_accuracy_line]
ax1.legend(lines, [l.get_label() for l in lines])

# Show the plot
plt.title('Training Loss and Validation Accuracy Over Time')
plt.show()


ValueError: v cannot be empty