In [2]:
!pip3 install fastapi pandas torch transformers uvicorn google-cloud-storage scikit-learn


Collecting fastapi
  Obtaining dependency information for fastapi from https://files.pythonhosted.org/packages/4d/d2/3ad038a2365fefbac19d9a046cab7ce45f4c7bfa81d877cbece9707de9ce/fastapi-0.103.2-py3-none-any.whl.metadata
  Downloading fastapi-0.103.2-py3-none-any.whl.metadata (24 kB)
Collecting transformers
  Obtaining dependency information for transformers from https://files.pythonhosted.org/packages/1a/d1/3bba59606141ae808017f6fde91453882f931957f125009417b87a281067/transformers-4.34.0-py3-none-any.whl.metadata
  Downloading transformers-4.34.0-py3-none-any.whl.metadata (121 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.5/121.5 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting uvicorn
  Obtaining dependency information for uvicorn from https://files.pythonhosted.org/packages/79/96/b0882a1c3f7ef3dd86879e041212ae5b62b4bd352320889231cc735a8e8f/uvicorn-0.23.2-py3-none-any.whl.metadata
  Using cached uvicorn-0.23.2-py3-none-any.whl.metadata (6.2 kB)
Co

In [3]:
from fastapi import FastAPI, UploadFile, HTTPException
import pandas as pd
import torch
import json
import io
from transformers import BartTokenizer


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
import torch.nn as nn
import torch.nn.functional as F
from transformers import BartTokenizer, BartForConditionalGeneration, BartConfig

class BrainTranslator(nn.Module):
    def __init__(self, pretrained_layers, in_feature=840, decoder_embedding_size=1024, additional_encoder_nhead=8, additional_encoder_dim_feedforward=2048):
        super(BrainTranslator, self).__init__()
        
        self.pretrained = pretrained_layers
        self.additional_encoder_layer = nn.TransformerEncoderLayer(d_model=in_feature, nhead=additional_encoder_nhead, dim_feedforward=additional_encoder_dim_feedforward, batch_first=True)
        self.additional_encoder = nn.TransformerEncoder(self.additional_encoder_layer, num_layers=6)
        self.fc1 = nn.Linear(in_feature, decoder_embedding_size)

    def forward(self, input_embeddings_batch, input_masks_batch, input_masks_invert):
        encoded_embedding = self.additional_encoder(input_embeddings_batch, src_key_padding_mask=input_masks_invert)
        encoded_embedding = F.relu(self.fc1(encoded_embedding))
        out = self.pretrained(inputs_embeds=encoded_embedding, attention_mask=input_masks_batch, return_dict=True)
        
        return out


In [7]:
from transformers import BartForConditionalGeneration

_MODEL = None

def get_model():
    global _MODEL

    if _MODEL is None:
        pretrained_bart = BartForConditionalGeneration.from_pretrained('facebook/bart-large')
        checkpoint_path = '/Users/michaelholborn/Documents/SoftwareLocal/monotropism/thoughtx/task1_task2_taskNRv2_finetune_BrainTranslator_skipstep1_b1_20_30_5e-05_5e-07_unique_sent.pt'  # Change to the path of your model
        _MODEL = BrainTranslator(pretrained_bart)
        model_weights = torch.load(checkpoint_path, map_location=torch.device('cpu'))
        _MODEL.load_state_dict(model_weights)
        _MODEL.eval()

    return _MODEL


In [22]:
import numpy as np
from sklearn.preprocessing import StandardScaler

def preprocess_eeg_data_for_inference(raw_eeg_data: np.ndarray, segment: bool = False, segment_length: int = 128) -> np.ndarray:
    eeg_data_df = pd.DataFrame(raw_eeg_data)
    eeg_data_filled = eeg_data_df.fillna(eeg_data_df.mean())
    scaler = StandardScaler()
    normalized_data = scaler.fit_transform(eeg_data_filled)
    if segment:
        segmented_data = segment_eeg_data(normalized_data, segment_length)
        return segmented_data
    print("Segment Data: ",segment)
    print("Normalised Data: ",normalized_data)

    return normalized_data

def segment_eeg_data(eeg_data: np.ndarray, segment_length: int = 128) -> np.ndarray:
    num_segments = eeg_data.shape[1] // segment_length
    segmented_data = []
    for i in range(num_segments):
        start_idx = i * segment_length
        end_idx = (i + 1) * segment_length
        segment = eeg_data[:, start_idx:end_idx]
        segmented_data.append(segment)
    return np.array(segmented_data)




In [17]:
# Load data
with open("datasets/results.json", 'r') as file:
    data = json.load(file)

raw_eeg_data = pd.DataFrame(data)

In [20]:

eeg_tensor = preprocess_eeg_data_for_inference(raw_eeg_data)



Segment Data:  False
Normalised Data:  [[-0.12980619 -0.25275585 -0.15944155 ...  0.92220916  0.12201108
  -0.43434176]
 [ 0.02935555 -0.49875623 -0.50513756 ...  1.62593771  0.4048283
  -0.4977017 ]
 [-0.68281424 -1.04184985 -0.89878684 ...  1.64638032 -0.21750464
  -0.85330254]
 ...
 [-0.5933969  -0.83465343 -0.66745245 ...  1.21945275 -0.01502839
  -0.20512466]
 [ 0.20163897 -0.38502061 -0.3968381  ...  1.36628486 -0.14704242
  -0.5807963 ]
 [-0.55645603 -0.13538309  0.14307366 ...  0.37879432  0.09646755
  -0.3338418 ]]


In [24]:
model =get_model()
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large')



In [26]:

def generate_text_from_eeg(input_sample: dict, model, tokenizer, device="cpu") -> str:
    """
    Generate text from preprocessed EEG data using a trained model.
    
    Parameters:
    - input_sample: The prepared input sample.
    - model: The trained EEG-to-text model.
    - tokenizer: The BART tokenizer.
    - device: The device to run the model on (e.g., "cpu", "cuda").
    
    Returns:
    - Generated text.
    """
    
    # Move the sample and model to the specified device
    input_sample["sent_level_EEG"] = input_sample["sent_level_EEG"].to(device)
    input_sample["target_ids"] = input_sample["target_ids"].to(device)
    model = model.to(device)
    
    # Set the model to evaluation mode
    model.eval()
    
    print("input_sample: ",input_sample)
    
    # Perform inference
    with torch.no_grad():
        outputs = model(input_ids=None, encoder_outputs=(input_sample["sent_level_EEG"], None), decoder_input_ids=input_sample["target_ids"])
    
    # Extract the generated token IDs from the model's outputs
    generated_ids = outputs.logits.argmax(dim=-1)
    
    # Decode the token IDs to text
    generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    
    return generated_text

### Step 1: Ensure input_sample is structured correctly
Before calling generate_text_from_eeg, the input_sample should be structured as a dictionary with keys sent_level_EEG and target_ids. We need to check if this is the case.

In [27]:

# Preprocess the data
placeholder_token = tokenizer("<s>", return_tensors="pt")
input_sample = {
    "sent_level_EEG": torch.tensor(eeg_tensor),
    "target_ids": placeholder_token["input_ids"]
}

results_json = generate_text_from_eeg(input_sample,model,tokenizer)


TypeError: BrainTranslator.forward() got an unexpected keyword argument 'input_ids'