# Live Inference

In [187]:
from neural_decoder.neural_decoder_trainer import loadModel
model = loadModel('/content/drive/MyDrive/TreeHacks/BOO2')


In [6]:
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore


cred = credentials.Certificate('/content/treehacks-c0d12-firebase-adminsdk-fbsvc-02119ca686.json')
firebase_admin.initialize_app(credential=cred)
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="/content/treehacks-c0d12-firebase-adminsdk-fbsvc-02119ca686.json"


In [63]:
db = firestore.Client()

In [32]:
from io import StringIO

In [188]:
import pandas as pd
import numpy as np

csv_str = db.collection("stream").order_by("timestamp").limit(1).get()[0].to_dict()['brain']
inp_stream = StringIO(csv_str)
inp = pd.read_csv(inp_stream)

inp = inp[inp.columns[:64]]

## PREP DATA
def inflate_dims(arr, group_size=16):

    x, y = arr.shape
    mid = y // 2

    first_half = arr[:, :mid]
    second_half = arr[:, mid:]

    result = np.concatenate([
        np.repeat(first_half, group_size, axis=1),
        np.repeat(second_half, group_size, axis=1)
    ], axis=1)

    return result

dataset = {
        'data': inflate_dims(inp.T.values),
        'sentenceText': np.array(['hello there',]),
        'blockIdx': np.arange(0).repeat(len(inp.T)/20).reshape(-1,1)
    }

import scipy

scipy.io.savemat('/content/tmp.mat', dataset)

from g2p_en import G2p

import nltk
nltk.download('averaged_perceptron_tagger_eng')

g2p = G2p()
PHONE_DEF = [
    'AA', 'AE', 'AH', 'AO', 'AW',
    'AY', 'B',  'CH', 'D', 'DH',
    'EH', 'ER', 'EY', 'F', 'G',
    'HH', 'IH', 'IY', 'JH', 'K',
    'L', 'M', 'N', 'NG', 'OW',
    'OY', 'P', 'R', 'S', 'SH',
    'T', 'TH', 'UH', 'UW', 'V',
    'W', 'Y', 'Z', 'ZH'
]
PHONE_DEF_SIL = PHONE_DEF + ['SIL']

def phoneToId(p):
    return PHONE_DEF_SIL.index(p)

import scipy

def loadFeaturesAndNormalize(sessionPath,type='train'):

    dat = scipy.io.loadmat(sessionPath)

    input_features = []
    transcriptions = []
    frame_lens = []
    block_means = []
    block_stds = []
    n_trials = dat['sentenceText'].shape[0]

    #collect area 6v tx1 and spikePow features
    for i in range(n_trials):
        #get time series of TX and spike power for this trial
        #first 128 columns = area 6v only
        if type == 'train':
            features = np.concatenate([dat['tx1'][0,i][:,0:128], dat['spikePow'][0,i][:,0:128]], axis=1)
        else:
            features = dat['data']

        sentence_len = features.shape[0]
        sentence = dat['sentenceText'][i].strip()

        input_features.append(features)
        transcriptions.append(sentence)
        frame_lens.append(sentence_len)

    #block-wise feature normalization
    blockNums = np.squeeze(dat['blockIdx'])
    blockList = np.unique(blockNums)
    blocks = []
    for b in range(len(blockList)):
        sentIdx = np.argwhere(blockNums==blockList[b])
        sentIdx = sentIdx[:,0].astype(np.int32)
        blocks.append(sentIdx)

    for b in range(len(blocks)):
        feats = np.concatenate(input_features[blocks[b][0]:(blocks[b][-1]+1)], axis=0)
        feats_mean = np.mean(feats, axis=0, keepdims=True)
        feats_std = np.std(feats, axis=0, keepdims=True)
        for i in blocks[b]:
            input_features[i] = (input_features[i] - feats_mean) / (feats_std + 1e-8)

    #convert to tfRecord file
    session_data = {
        'inputFeatures': input_features,
        'transcriptions': transcriptions,
        'frameLens': frame_lens
    }

    return session_data


def getDataset(fileName,type='train'):
    session_data = loadFeaturesAndNormalize(fileName,type)

    allDat = []
    trueSentences = []
    seqElements = []

    for x in range(len(session_data['inputFeatures'])):
        allDat.append(session_data['inputFeatures'][x])
        trueSentences.append(session_data['transcriptions'][x])

        thisTranscription = str(session_data['transcriptions'][x]).strip()
        thisTranscription = re.sub(r'[^a-zA-Z\- \']', '', thisTranscription)
        thisTranscription = thisTranscription.replace('--', '').lower()
        addInterWordSymbol = True

        phonemes = []
        for p in g2p(thisTranscription):
            if addInterWordSymbol and p==' ':
                phonemes.append('SIL')
            p = re.sub(r'[0-9]', '', p)  # Remove stress
            if re.match(r'[A-Z]+', p):  # Only keep phonemes
                phonemes.append(p)

        #add one SIL symbol at the end so there's one at the end of each word
        if addInterWordSymbol:
            phonemes.append('SIL')

        seqLen = len(phonemes)
        maxSeqLen = 500
        seqClassIDs = np.zeros([maxSeqLen]).astype(np.int32)
        seqClassIDs[0:seqLen] = [phoneToId(p) + 1 for p in phonemes]
        seqElements.append(seqClassIDs)

    newDataset = {}
    newDataset['sentenceDat'] = allDat
    newDataset['transcriptions'] = trueSentences
    newDataset['phonemes'] = seqElements

    timeSeriesLens = []
    phoneLens = []
    for x in range(len(newDataset['sentenceDat'])):
        timeSeriesLens.append(newDataset['sentenceDat'][x].shape[0])

        zeroIdx = np.argwhere(newDataset['phonemes'][x]==0)
        phoneLens.append(zeroIdx[0,0])

    newDataset['timeSeriesLens'] = np.array(timeSeriesLens)
    newDataset['phoneLens'] = np.array(phoneLens)
    newDataset['phonePerTime'] = newDataset['phoneLens'].astype(np.float32) / newDataset['timeSeriesLens'].astype(np.float32)
    return newDataset


inp_dataset=getDataset('/content/tmp.mat',type='val')


[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!


In [193]:
inp_loader = torch.utils.data.DataLoader(
        SpeechDataset([inp_dataset]), batch_size=1, shuffle=False, num_workers=0
    )

rnn_outputs = {
    "logits": [],
    "logitLengths": [],
    "trueSeqs": [],
    "transcriptions": [],
}
for j, (X, y, X_len, y_len, _) in enumerate(inp_loader):
        X, y, X_len, y_len, dayIdx = (
            X.to(device),
            y.to(device),
            X_len.to(device),
            y_len.to(device),
            torch.tensor([dayIdx], dtype=torch.int64).to(device),
        )
        pred = model.forward(X, dayIdx)
        adjustedLens = ((X_len - model.kernelLen) / model.strideLen).to(torch.int32)

        for iterIdx in range(pred.shape[0]):
            trueSeq = np.array(y[iterIdx][0 : y_len[iterIdx]].cpu().detach())

            rnn_outputs["logits"].append(pred[iterIdx].cpu().detach().numpy())
            rnn_outputs["logitLengths"].append(
                adjustedLens[iterIdx].cpu().detach().item()
            )
            rnn_outputs["trueSeqs"].append(trueSeq)

        transcript = inp_dataset["transcriptions"][j].strip()
        transcript = re.sub(r"[^a-zA-Z\- \']", "", transcript)
        transcript = transcript.replace("--", "").lower()
        rnn_outputs["transcriptions"].append(transcript)

In [194]:
llm, tokenizer = build_gpt2()

decoded_transcriptions = decode_rnn_outputs(
        rnn_outputs,
        model=llm,
        tokenizer=tokenizer,
        acoustic_scale=1.5,
        length_penalty=0.3,
        alpha=0.8,
        n_best=5
    )

All PyTorch model weights were used when initializing TFGPT2LMHeadModel.

All the weights of TFGPT2LMHeadModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.
100%|██████████| 1/1 [00:00<00:00,  1.50it/s]


In [None]:
text = decoded_transcriptions[0]

import openai
openai.api_key = # hidden key
completion = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "user",
                "content": "From the natural language thought stream, isolate the web-application prompt: " + text
            },
        ],
    )

In [None]:
db.collection("commands").document("new").set({"command": completion.choices[0].message.content})