In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd

import math
import numpy as np
import random
from google.colab import drive
import datetime
import sys
import os
from sklearn.model_selection import train_test_split
from tqdm import tqdm

# imports the torch_xla package
# !pip install torch_xla
# import torch_xla
# import torch_xla.core.xla_model as xm

# Load in Data

In [None]:
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%cd /content/drive/MyDrive/Colab\ Notebooks/dexcom

/content/drive/MyDrive/Colab Notebooks/dexcom


In [None]:
CHECKPOINT_FOLDER = "./saved_transformer"

# Util Functions

In [None]:
random.seed(42)

# function: parses date string into DateTime Object
# input: date
# output: DateTime Object
def dateParser(date):
    mainFormat = '%Y-%m-%d %H:%M:%S.%f'
    altFormat = '%Y-%m-%d %H:%M:%S'
    try:
        return datetime.datetime.strptime(date, mainFormat)
    except ValueError:
        return datetime.datetime.strptime(date, altFormat)

# function: return a DataFrame from directory
# input: file directory dexcom
# output: DataFrame
def getGlucoseData(fileDir):
    df = pd.read_csv(fileDir)
    data = pd.DataFrame()
    data['Time'] = df['Timestamp (YYYY-MM-DDThh:mm:ss)']
    data['Glucose'] = pd.to_numeric(df['Glucose Value (mg/dL)'])
    data.drop(data.index[:12], inplace=True)
    data['Time'] = np.array([dateParser(dateStr) for dateStr in data['Time']])
    data['Day'] = np.array([date.day for date in data['Time']])
    data = data.reset_index()
    return data

# function: create samples given glucose data
# input: data, length, numSamples
# output: np array of samples and value after
def sampleTransformer(data, length, numSamples=100):
    SOS_token = np.array([301])
    EOS_token = np.array([302])
    ans = []
    for i in range(numSamples):
        start = random.randint(0,len(data)- 2 * length - 1)
        while True in np.isnan(data[start: start + 2 * length + 1]):
            start = random.randint(0,len(data)-2 * length-1)
        begin = np.concatenate((SOS_token, data[start : start + length], EOS_token))
        end = np.concatenate((SOS_token, data[start + length : start + 2 * length], EOS_token))
        ans.append([begin, end])
    np.random.shuffle(ans)
    return np.array(ans)

# function: create a matrix of samples
# input: glucoseDict, length, numSamples
# output: train_data, val_data
def createSamplesArray(glucoseDict, length, numSamples):
    shuffled = list(glucoseDict.keys())
    np.random.shuffle(shuffled)
    train_choice, val_choice = (shuffled[:12], shuffled[12:])
    train_data = []
    val_data = []
    for i in train_choice:
        data = glucoseDict[i]
        train = sampleTransformer(data, length, numSamples)
        for trainVal in train:
            train_data.append(trainVal)
    for i in val_choice:
        data = glucoseDict[i]
        val = sampleTransformer(data, length, numSamples)
        for validVal in val:
            val_data.append(validVal)
    train_data = np.array(train_data)
    val_data = np.array(val_data)
    return train_data, val_data

# function: create samples of glucose data regardless of which sample it is from
# input: glucoseDict, length, numSamples
# output: X, y
def createGlucoseSamples(glucoseDict, length, numSamples):
    data = sampleTransformer(glucoseDict[random.choice(list(glucoseDict.keys()))], length, numSamples)
    random.shuffle(data)
    return np.array([data[i][0] for i in range(len(data))]), np.array([data[i][1] for i in range(len(data))])

# Model

In [None]:
class HyperParams:
    def __init__(self):
        # Constance hyperparameters. They have been tested and don't need to be tuned.
        self.NUM_TOKENS = 303
        self.NUM_HEADS = 16
        self.EMBEDDING_DIM = 16
        self.NUM_ENCODER_LAYERS = 4
        self.NUM_DECODER_LAYERS = 4
        self.DROPOUT_P = 0.5
        self.LEARNING_RATE = 0.001
        self.WEIGHT_DECAY = 0
        self.NORM_FIRST = True

In [None]:
class Transformer(nn.Module):
    # Constructor
    def __init__(
        self,
        num_tokens,
        embedding_dim,
        num_heads,
        num_encoder_layers,
        num_decoder_layers,
        dropout_p,
        norm_first,
        device):
        super().__init__()

        # INFO
        self.model_type = "Transformer"
        self.num_tokens = num_tokens
        self.embedding_dim = embedding_dim
        self.num_heads = num_heads
        self.num_encoder_layers = num_encoder_layers
        self.num_decoder_layers = num_decoder_layers
        self.dropout_p = dropout_p
        self.norm_first = norm_first
        self.device = device


        # LAYERS
        self.positional_encoder = PositionalEncoding(
            embedding_dim=embedding_dim, dropout_p=dropout_p, max_len=5000, device=self.device
        )
        self.embedding = nn.Embedding(
            num_tokens,
            embedding_dim
        )
        self.transformer = nn.Transformer(
            d_model=self.embedding_dim,
            nhead=self.num_heads,
            num_encoder_layers=self.num_encoder_layers,
            num_decoder_layers=self.num_decoder_layers,
            dropout=self.dropout_p,
            norm_first=self.norm_first
        )
        self.layer_norm = nn.LayerNorm(embedding_dim)
        self.out = nn.Linear(embedding_dim, num_tokens)

    # function: forward of model
    # input: src, tgt, tgt_mask
    # output: output after forward run through model
    def forward(self, src, tgt, tgt_mask=None):
        # Src size must be (batch_size, src, sequence_length)
        # Tgt size must be (batch_size, tgt, sequence_length)

        src = self.embedding(src) * math.sqrt(self.embedding_dim)
        tgt = self.embedding(tgt) * math.sqrt(self.embedding_dim)
        src = self.positional_encoder(src)
        tgt = self.positional_encoder(tgt)

        # permute to have batch_size come first
        src = src.permute(1,0,2)
        tgt = tgt.permute(1,0,2)

        # layer normalization for quicker training of transformer model
        # transformer model
        transformer_out = self.transformer(src, tgt, tgt_mask=tgt_mask)

        out = self.out(transformer_out)

        return out

    # function: creates a mask with 0's in bottom left of matrix
    # input: size
    # output: mask
    def get_tgt_mask(self, size) -> torch.tensor:
        mask = torch.tril(torch.ones(size,size) * float('-inf')).T
        for i in range(size):
            mask[i, i] = 0
        return mask

In [None]:
class PositionalEncoding(nn.Module):
    def __init__(self, embedding_dim, dropout_p, max_len, device):
        super().__init__()

        # Layers
        self.dropout = nn.Dropout(dropout_p)
        self.positional_encoding = self.get_positional_encoding(max_len, embedding_dim).to(device)

    def get_positional_encoding(self, max_len, embedding_dim):
        positional_encoding = torch.zeros(max_len, embedding_dim)
        # column of positions
        position = torch.arange(0, max_len, dtype=torch.float32).view(-1,1)
        # division term
        div_term = torch.exp(torch.arange(0, embedding_dim, 2, dtype=torch.float32) * (-math.log(10000.0) / embedding_dim))
        # even numbered positional encoding
        positional_encoding[:, 0::2] = torch.sin(position * div_term)
        # odd numbered positional encoding
        positional_encoding[:, 1::2] = torch.cos(position * div_term)
        positional_encoding = positional_encoding.unsqueeze(0).transpose(0,1)
        return positional_encoding

    def forward(self, token_embedding: torch.tensor) -> torch.tensor:
        # Residual connection + pos encoding
        return self.dropout(token_embedding + self.positional_encoding[:token_embedding.size(0), :])

# Get glucose data from Dexcom files

In [None]:
dexcomFiles = [f"Dexcom_{str(i).zfill(3)}.csv" for i in range(1, 17)]
glucoseDict = {}
idx = 0
for file in dexcomFiles:
  idx += 1
  df = getGlucoseData(file)
  glucoseDict[idx] = df['Glucose'].to_numpy()

In [None]:
# train_data, val_data = createSamplesArray(glucoseDict, 12, 1000)
X,y = createGlucoseSamples(glucoseDict, 12, 10000)
# get X_train, X_test, X_val (7:1:2), and y_train, y_test, y_val (7:1:2)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.125, random_state=42)

In [None]:
def createBatches(data, batch_size=16, padding=False, padding_token=-1):
    batches = []
    idx = 0
    while idx + batch_size < len(data):
        batches.append(np.array(data[idx : idx + batch_size]).astype(int))
        idx += batch_size
    print(f"{len(batches)} batches of size {batch_size}")
    return batches

train_data = [[X_train[i], y_train[i]] for i in range(len(X_train))]
val_data = [[X_val[i], y_val[i]] for i in range(len(X_val))]
test_data = [[X_test[i], y_test[i]] for i in range(len(X_test))]
train_dataloader = createBatches(train_data)
val_dataloader = createBatches(val_data)
test_dataloader = createBatches(test_data)

546 batches of size 16
124 batches of size 16
78 batches of size 16


# Training

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
hyperparams = HyperParams()
model = Transformer(
    num_tokens=hyperparams.NUM_TOKENS,
    embedding_dim=hyperparams.EMBEDDING_DIM,
    num_heads=hyperparams.NUM_HEADS,
    num_encoder_layers=hyperparams.NUM_ENCODER_LAYERS,
    num_decoder_layers=hyperparams.NUM_DECODER_LAYERS,
    dropout_p=hyperparams.DROPOUT_P,
    norm_first=hyperparams.NORM_FIRST,
    device = device
).to(device)
opt = optim.Adam(model.parameters(), lr=hyperparams.LEARNING_RATE, weight_decay=hyperparams.WEIGHT_DECAY, eps=1e-6)
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.to(device)



In [None]:
def train(model, opt, loss_fn, dataloader):

    model.train()
    total_loss = 0

    accuracy = []

    for batch in dataloader:
        # X --> first half
        # y --> second half
        X, y = batch[:, 0], batch[:, 1]
        X, y = torch.tensor(X).to(device), torch.tensor(y).to(device)
        # Now we shift the tgt by one so with the <SOS> we predict the token at pos 1
        # y_input [SOS_token, ...]
        y_input = y[:,:-1]
        # y_expected [..., EOS]
        y_expected = y[:,1:]

        # Get mask to mask out the next words
        sequence_length = len(y_input[0])
        tgt_mask = model.get_tgt_mask(sequence_length).to(device)

        # Standard training except we pass in y_input and tgt_mask
        pred = model(X, y_input, tgt_mask)

        # Permute pred to have batch size first again
        pred = pred.permute(1, 2, 0)
        y_pred = torch.argmax(pred.detach(), axis=1)
        mape = mean_absolute_percentage_error(y_expected, y_pred)
        accuracy.append(100 - mape.cpu().detach().item())
        loss = loss_fn(pred, y_expected)

        opt.zero_grad()
        loss.backward()
        opt.step()

        total_loss += loss.detach().item()
    epoch_loss = total_loss / len(dataloader)
    epoch_acc = np.mean(accuracy)

    return epoch_loss, epoch_acc


def evaluate(model, loss_fn, dataloader):

    model.eval()
    total_loss = 0

    accuracy = []

    with torch.no_grad():
        for batch in dataloader:
            X, y = batch[:, 0], batch[:, 1]
            X, y = torch.tensor(X, dtype=torch.long, device=device), torch.tensor(y, dtype=torch.long, device=device)

            # Now we shift the tgt by one so with the <SOS> we predict the token at pos 1
            # y_input [SOS_token, ...]
            y_input = y[:,:-1]
            # y_expected [..., EOS]
            y_expected = y[:,1:]

            # Get mask to mask out the next words
            sequence_length = len(y_input[0])
            tgt_mask = model.get_tgt_mask(sequence_length).to(device)
            pred = model(X, y_input, tgt_mask)

            # Permute pred to have batch size first again
            pred = pred.permute(1, 2, 0)
            y_pred = torch.argmax(pred, axis=1)
            mape = mean_absolute_percentage_error(y_expected, y_pred)
            accuracy.append(100 - mape.cpu().detach().item())
            loss = loss_fn(pred, y_expected)
            total_loss += loss.detach().item()

        # sanity check
        # print(y_pred[0][1:])
        # print(y_expected[0][:-1])

    epoch_loss = total_loss / len(dataloader)
    epoch_acc = np.mean(accuracy)

    return epoch_loss, epoch_acc

def mean_absolute_percentage_error(y_true, y_pred):
    return torch.mean(torch.abs((y_true[:, :-1] - y_pred[:, 1:])) / y_true[:, :-1]) * 100

In [None]:
def fit(model, opt, loss_fn, train_dataloader, val_dataloader, test_dataloader, epochs):

    # Used for plotting later on
    train_loss_list, validation_loss_list = [], []

    best_test_acc = float('-inf')

    best_val_loss = float('inf')

    print("Training and validating model")
    for epoch in range(epochs):
        print("-"*25, f"Epoch {epoch + 1}","-"*25)

        train_loss, train_accuracy = train(model, opt, loss_fn, train_dataloader)
        train_loss_list += [train_loss]

        validation_loss, validation_accuracy = evaluate(model, loss_fn, val_dataloader)
        validation_loss_list += [validation_loss]



        print(f"Training loss: {train_loss:.4f}")
        print(f"Training accuracy: {train_accuracy:.4f}")
        print(f"Validation loss: {validation_loss:.4f}")
        print(f"Validation accuracy: {validation_accuracy:.4f}")
        if validation_loss < best_val_loss:
            best_val_loss = validation_loss
            if not os.path.exists(CHECKPOINT_FOLDER):
                os.makedirs(CHECKPOINT_FOLDER)
            print("Saving ...")
            state = {'state_dict': model.state_dict(),
                    'epoch': epoch,
                    'lr': 0.001}
            torch.save(state['state_dict'], os.path.join(CHECKPOINT_FOLDER, 'ermTransformer.pth'))

        test_loss, test_accuracy = evaluate(model, loss_fn, test_dataloader)
        print(f"Test loss: {test_loss:.4f}")
        print(f"Test accuracy: {test_accuracy:.4f}")

        if test_accuracy > best_test_acc:
            best_test_acc = test_accuracy

    print(f"Test accuracy after {epochs} epochs: {best_test_acc}")
    return train_loss_list, validation_loss_list

train_loss_list, validation_loss_list = fit(model, opt, loss_fn, train_dataloader, val_dataloader, test_dataloader, 20)

Training and validating model
------------------------- Epoch 1 -------------------------
Training loss: 4.1770
Training accuracy: -30.8233
Validation loss: 3.3716
Validation accuracy: 73.8758
Saving ...
Test loss: 3.3609
Test accuracy: 73.6626
------------------------- Epoch 2 -------------------------
Training loss: 3.3731
Training accuracy: 56.6853
Validation loss: 2.8706
Validation accuracy: 93.9534
Saving ...
Test loss: 2.8591
Test accuracy: 93.9754
------------------------- Epoch 3 -------------------------
Training loss: 3.0866
Training accuracy: 69.7497
Validation loss: 2.7049
Validation accuracy: 97.1657
Saving ...
Test loss: 2.6940
Test accuracy: 97.1518
------------------------- Epoch 4 -------------------------
Training loss: 2.9471
Training accuracy: 72.4043
Validation loss: 2.6169
Validation accuracy: 98.2234
Saving ...
Test loss: 2.6054
Test accuracy: 98.2474
------------------------- Epoch 5 -------------------------
Training loss: 2.8470
Training accuracy: 74.0237
Vali

# Leave One Person Out Cross Validation (LOPOCV)

In [None]:
def fit(model, opt, loss_fn, train_dataloader, val_dataloader, test_dataloader, epochs):

    # Used for plotting later on
    train_loss_list, validation_loss_list = [], []

    best_test_acc = float('-inf')

    best_val_loss = float('inf')

    print("Training and validating model")
    for epoch in tqdm(range(epochs), total=epochs):
        # print("-"*25, f"Epoch {epoch + 1}","-"*25)

        train_loss, train_accuracy = train(model, opt, loss_fn, train_dataloader)
        train_loss_list += [train_loss]

        validation_loss, validation_accuracy = evaluate(model, loss_fn, val_dataloader)
        validation_loss_list += [validation_loss]

        # print(f"Training loss: {train_loss:.4f}")
        # print(f"Training accuracy: {train_accuracy:.4f}")
        # print(f"Validation loss: {validation_loss:.4f}")
        # print(f"Validation accuracy: {validation_accuracy:.4f}")
        """if validation_loss < best_val_loss:
            best_val_loss = validation_loss
            if not os.path.exists(CHECKPOINT_FOLDER):
                os.makedirs(CHECKPOINT_FOLDER)
            print("Saving ...")
            state = {'state_dict': model.state_dict(),
                    'epoch': epoch,
                    'lr': 0.001}
            torch.save(state['state_dict'], os.path.join(CHECKPOINT_FOLDER, 'ermTransformer.pth'))"""

        test_loss, test_accuracy = evaluate(model, loss_fn, test_dataloader)
        # print(f"Test loss: {test_loss:.4f}")
        # print(f"Test accuracy: {test_accuracy:.4f}")

        if test_accuracy > best_test_acc:
            best_test_acc = test_accuracy

    print(f"Test accuracy after {epochs} epochs: {best_test_acc}")
    return train_loss_list, validation_loss_list, best_test_acc

In [None]:
best_test_accs = []
for lopo in range(16):
  print(f"Leaving out subject {lopo+1}")
  dexcomFiles = [f"Dexcom_{str(i).zfill(3)}.csv" for i in range(1, 17) if i != lopo+1]
  glucoseDict = {}
  idx = 0
  for file in dexcomFiles:
    idx += 1
    df = getGlucoseData(file)
    glucoseDict[idx] = df['Glucose'].to_numpy()
  # train_data, val_data = createSamplesArray(glucoseDict, 12, 1000)
  X,y = createGlucoseSamples(glucoseDict, 12, 9000)
  X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=float(2/9), random_state=42)
  # get test data
  glucoseDictTest = {}
  glucoseDictTest[f"Dexcom_{str(lopo+1).zfill(3)}.csv"] = getGlucoseData(f"Dexcom_{str(lopo+1).zfill(3)}.csv")['Glucose'].to_numpy()
  X_test, y_test = createGlucoseSamples(glucoseDictTest, 12, 1000)
  # get X_train, X_test, X_val (7:1:2), and y_train, y_test, y_val (7:1:2)
  train_data = [[X_train[i], y_train[i]] for i in range(len(X_train))]
  val_data = [[X_val[i], y_val[i]] for i in range(len(X_val))]
  test_data = [[X_test[i], y_test[i]] for i in range(len(X_test))]
  train_dataloader = createBatches(train_data)
  val_dataloader = createBatches(val_data)
  test_dataloader = createBatches(test_data)

  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  hyperparams = HyperParams()
  model = Transformer(
      num_tokens=hyperparams.NUM_TOKENS,
      embedding_dim=hyperparams.EMBEDDING_DIM,
      num_heads=hyperparams.NUM_HEADS,
      num_encoder_layers=hyperparams.NUM_ENCODER_LAYERS,
      num_decoder_layers=hyperparams.NUM_DECODER_LAYERS,
      dropout_p=hyperparams.DROPOUT_P,
      norm_first=hyperparams.NORM_FIRST,
      device = device
  ).to(device)
  opt = optim.Adam(model.parameters(), lr=hyperparams.LEARNING_RATE, weight_decay=hyperparams.WEIGHT_DECAY, eps=1e-6)
  loss_fn = nn.CrossEntropyLoss()
  loss_fn = loss_fn.to(device)

  train_loss_list, validation_loss_list, best_test_acc = fit(model, opt, loss_fn, train_dataloader, val_dataloader, test_dataloader, 10)
  best_test_accs.append(best_test_acc)

Leaving out subject 1




437 batches of size 16
124 batches of size 16
62 batches of size 16
Training and validating model


100%|██████████| 10/10 [10:15<00:00, 61.52s/it]


Test accuracy after 10 epochs: 96.5121211736433
Leaving out subject 2




437 batches of size 16
124 batches of size 16
62 batches of size 16
Training and validating model


100%|██████████| 10/10 [10:14<00:00, 61.44s/it]


Test accuracy after 10 epochs: 90.44678805720422
Leaving out subject 3
437 batches of size 16
124 batches of size 16
62 batches of size 16




Training and validating model


100%|██████████| 10/10 [10:18<00:00, 61.84s/it]


Test accuracy after 10 epochs: 88.76579225063324
Leaving out subject 4




437 batches of size 16
124 batches of size 16
62 batches of size 16
Training and validating model


100%|██████████| 10/10 [10:16<00:00, 61.68s/it]


Test accuracy after 10 epochs: 95.0692483571268
Leaving out subject 5




437 batches of size 16
124 batches of size 16
62 batches of size 16
Training and validating model


100%|██████████| 10/10 [10:17<00:00, 61.72s/it]


Test accuracy after 10 epochs: 88.12829586959654
Leaving out subject 6




437 batches of size 16
124 batches of size 16
62 batches of size 16
Training and validating model


100%|██████████| 10/10 [10:52<00:00, 65.30s/it]


Test accuracy after 10 epochs: 88.97202157974243
Leaving out subject 7




437 batches of size 16
124 batches of size 16
62 batches of size 16
Training and validating model


100%|██████████| 10/10 [10:48<00:00, 64.81s/it]


Test accuracy after 10 epochs: 72.713529757915
Leaving out subject 8




437 batches of size 16
124 batches of size 16
62 batches of size 16
Training and validating model


100%|██████████| 10/10 [10:53<00:00, 65.30s/it]


Test accuracy after 10 epochs: 92.67791716898641
Leaving out subject 9




437 batches of size 16
124 batches of size 16
62 batches of size 16
Training and validating model


100%|██████████| 10/10 [10:22<00:00, 62.20s/it]


Test accuracy after 10 epochs: 93.87542084532399
Leaving out subject 10




437 batches of size 16
124 batches of size 16
62 batches of size 16
Training and validating model


100%|██████████| 10/10 [10:18<00:00, 61.89s/it]


Test accuracy after 10 epochs: 86.92322143047086
Leaving out subject 11




437 batches of size 16
124 batches of size 16
62 batches of size 16
Training and validating model


100%|██████████| 10/10 [10:14<00:00, 61.49s/it]


Test accuracy after 10 epochs: 92.9126719563238
Leaving out subject 12




437 batches of size 16
124 batches of size 16
62 batches of size 16
Training and validating model


100%|██████████| 10/10 [10:06<00:00, 60.67s/it]


Test accuracy after 10 epochs: 93.96207946731198
Leaving out subject 13




437 batches of size 16
124 batches of size 16
62 batches of size 16
Training and validating model


100%|██████████| 10/10 [10:05<00:00, 60.55s/it]


Test accuracy after 10 epochs: 93.91763395263303
Leaving out subject 14




437 batches of size 16
124 batches of size 16
62 batches of size 16
Training and validating model


100%|██████████| 10/10 [10:06<00:00, 60.63s/it]


Test accuracy after 10 epochs: 95.28384989307773
Leaving out subject 15




437 batches of size 16
124 batches of size 16
62 batches of size 16
Training and validating model


100%|██████████| 10/10 [10:10<00:00, 61.03s/it]
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Test accuracy after 10 epochs: 97.0989654160315
Leaving out subject 16
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-42-1bf920425f09>", line 9, in <cell line: 2>
    df = getGlucoseData(file)
  File "<ipython-input-5-cbcdc4bd728a>", line 18, in getGlucoseData
    df = pd.read_csv(fileDir)
  File "/usr/local/lib/python3.10/dist-packages/pandas/util/_decorators.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/pandas/util/_decorators.py", line 331, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py", line 950, in read_csv
    return _read(filepath_or_buffer, kwds)
  File "/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py", line 605, in _read
    parser = TextFileReader(filepa