In [2]:
# Install dependencies.
# !pip install asyncio==3.4.3 asyncpg==0.27.0 cloud-sql-python-connector["asyncpg"]==1.2.3
# !pip install numpy==1.22.4 pandas==1.5.3
# !pip install pgvector==0.1.8
# !pip install langchain==0.0.196 transformers
# !pip install google-cloud-aiplatform==1.26.0

In [3]:
# Automatically restart kernel after installs so that your environment
# can access the new packages.
# import IPython

# app = IPython.Application.instance()
# app.kernel.do_shutdown(True)

In [4]:
import os
# Please fill in these values.
project_id = "engaged-plasma-439804-k0"  # @param {type:"string"}
database_password = os.getenv('DATABASE_PASSWORD')  # @param {type:"string"}
region = "us-central1"  # @param {type:"string"}
instance_name = "research-paper-db"  # @param {type:"string"}
database_name = "research-papers"  # @param {type:"string"}
database_user = "superuser"  # @param {type:"string"}


# Quick input validations.
assert project_id, "⚠️ Please provide a Google Cloud project ID"
assert region, "⚠️ Please provide a Google Cloud region"
assert instance_name, "⚠️ Please provide the name of your instance"
assert database_name, "⚠️ Please provide a database name"
assert database_user, "⚠️ Please provide a database user"
assert database_password, "⚠️ Please provide a database password"

In [5]:
# @markdown Verify that you are able to connect to the database. Executing this block should print the current PostgreSQL server version.

import asyncio
import asyncpg
from google.cloud.sql.connector import Connector


async def test_connection():
    # get current running event loop to be used with Connector
    print(1)
    loop = asyncio.get_running_loop()
    print(1)
    # initialize Connector object as async context manager
    async with Connector(loop=loop) as connector:
        print(1)
        # create connection to Cloud SQL database
        conn: asyncpg.Connection = await connector.connect_async(
            f"{project_id}:{region}:{instance_name}",  # Cloud SQL instance connection name
            "asyncpg",
            user=f"{database_user}",
            password=f"{database_password}",
            db=f"{database_name}"
            # ... additional database driver args
        )

        # query Cloud SQL database
        results = await conn.fetch("SELECT version()")
        print(results[0]["version"])

        # close asyncpg connection
        await conn.close()


# Test connection with `asyncio`
await test_connection()  # type: ignore

1
1
1


  expiration = x509.not_valid_after


PostgreSQL 16.4 on x86_64-pc-linux-gnu, compiled by Debian clang version 12.0.1, 64-bit


In [64]:
# Save the Pandas dataframe in a PostgreSQL table.

import asyncio
import asyncpg
from google.cloud.sql.connector import Connector


async def reset_database():
    loop = asyncio.get_running_loop()
    async with Connector(loop=loop) as connector:
        # Create connection to Cloud SQL database
        conn: asyncpg.Connection = await connector.connect_async(
            f"{project_id}:{region}:{instance_name}",  # Cloud SQL instance connection name
            "asyncpg",
            user=f"{database_user}",
            password=f"{database_password}",
            db=f"{database_name}",
        )

        await conn.execute("DROP TABLE IF EXISTS papers CASCADE")
        # Create the `products` table.
        await conn.execute(
            """CREATE TABLE papers(
                                doi TEXT PRIMARY KEY,
                                title TEXT,
                                abstract TEXT,
                                content TEXT,
                                citation_text TEXT,
                                embedding vector(1024))"""
        )

        # Copy the dataframe to the `products` table.
        # tuples = list(df.itertuples(index=False))
        # await conn.copy_records_to_table(
        #     "products", records=tuples, columns=list(df), timeout=10
        # )
        await conn.close()


# Run the SQL commands now.
# await reset_database()  # type: ignore

In [156]:
# Store the generated vector embeddings in a PostgreSQL table.
# This code may run for a few minutes.

import asyncio
import asyncpg
from google.cloud.sql.connector import Connector
import numpy as np
from pgvector.asyncpg import register_vector


async def insert_into_papers(papers_df = None):
    loop = asyncio.get_running_loop()
    async with Connector(loop=loop) as connector:
        # Create connection to Cloud SQL database.
        conn: asyncpg.Connection = await connector.connect_async(
            f"{project_id}:{region}:{instance_name}",  # Cloud SQL instance connection name
            "asyncpg",
            user=f"{database_user}",
            password=f"{database_password}",
            db=f"{database_name}",
        )

        await conn.execute("CREATE EXTENSION IF NOT EXISTS vector")
        await register_vector(conn)


        # res = await conn.execute("DROP TABLE IF EXISTS papers CASCADE")
        # print(res)
        # Create the `products` table.
        # await conn.execute(
        #     """CREATE TABLE papers(
        #                         doi TEXT PRIMARY KEY,
        #                         title TEXT,
        #                         abstract TEXT,
        #                         content TEXT,
        #                         citation_text TEXT,
        #                         embedding vector(1024))"""
        # )
        if type(df) == pd.DataFrame:
            # Store all the generated embeddings back into the database.
            insert_values = [
                (
                    str(row["doi"]),
                    row["title"],
                    row["abstract"],
                    row["content"],
                    row["citation_text"],
                    np.array(row["embedding"]),
                    row['id'],
                    np.array(row['title_embedding']),

                )
                for index, row in papers_df.iterrows()
            ]

            # Use executemany for bulk insert
            await conn.executemany(
                "INSERT INTO papers (doi, title, abstract, content, citation_text, embedding, id, title_embedding) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)",
                insert_values
            )

        await conn.close()

# import pandas as pd
# import numpy as np
# df = pd.read_csv("test.csv")
# df["embedding"] = [np.random.rand(1024) for _ in range(len(df))]
# df
# # Run the SQL commands now.
# await insert_into_papers(df)  # type: ignore

In [147]:
async def update_papers(papers_df = None):
    loop = asyncio.get_running_loop()
    async with Connector(loop=loop) as connector:
        # Create connection to Cloud SQL database.
        conn: asyncpg.Connection = await connector.connect_async(
            f"{project_id}:{region}:{instance_name}",  # Cloud SQL instance connection name
            "asyncpg",
            user=f"{database_user}",
            password=f"{database_password}",
            db=f"{database_name}",
        )

        await conn.execute("CREATE EXTENSION IF NOT EXISTS vector")
        await register_vector(conn)

        if type(df) == pd.DataFrame:
            # Store all the generated embeddings back into the database.
            update_values = [
                (
                    str(row["doi"]),
                    row["title_embedding"],
                    row["id"],
                )
                for index, row in papers_df.iterrows()
            ]

            # Use executemany for bulk insert
            await conn.executemany(
                """
                UPDATE papers 
                SET title_embedding = $2, id = $3 
                WHERE doi = $1
                """,
                update_values
            )

        await conn.close()

In [153]:
async def fetch(embeding, nbr_articles=2, DESC=True):
    loop = asyncio.get_running_loop()
    async with Connector(loop=loop) as connector:
        # Create connection to Cloud SQL database.
        conn: asyncpg.Connection = await connector.connect_async(
            f"{project_id}:{region}:{instance_name}",  # Cloud SQL instance connection name
            "asyncpg",
            user=f"{database_user}",
            password=f"{database_password}",
            db=f"{database_name}",
        )

        await register_vector(conn)
        # Find similar products to the query using cosine similarity search
        # over all vector embeddings. This new feature is provided by `pgvector`.
        results = await conn.fetch(
            f"""
                            SELECT title, 1 - (embedding <=> $1) AS similarity, abstract, doi, 1 - (title_embedding <=> $1) AS title_similarity
                            FROM papers
                            ORDER BY similarity {"DESC" if DESC else ""}
                            LIMIT $2
                            """,
            embeding,
            nbr_articles
        )

        if len(results) == 0:
            raise Exception("Did not find any results. Adjust the query parameters.")
        matches = []
        for r in results:
            # Collect the description for all the matched similar toy products.
            matches.append(
                {
                    "title": r["title"],
                    "sim": r["similarity"],
                    "title_similarity": r["title_similarity"],
                    "abstract": r["abstract"],
                }
            )

        await conn.close()
        return matches
if False:
    embeding = np.random.rand(768)

    # Run the SQL commands now.
    res = await fetch(embeding)  # type: ignore
    res

In [46]:
from transformers import AutoTokenizer, AutoModel
model_type = 'jina'
if model_type == "bert":
    tokenizer = AutoTokenizer.from_pretrained('allenai/scibert_scivocab_uncased')
    model = AutoModel.from_pretrained('allenai/scibert_scivocab_uncased')
elif model_type == "roberta":
    tokenizer = AutoTokenizer.from_pretrained('FacebookAI/roberta-base')
    model = AutoModel.from_pretrained('FacebookAI/roberta-base')  
elif model_type == 'jina':
    tokenizer = AutoTokenizer.from_pretrained('jinaai/jina-embeddings-v3')
    model = AutoModel.from_pretrained('jinaai/jina-embeddings-v3', trust_remote_code=True)
else:
    Exception("No model chosen")

TypeError: XLMRobertaLoRA.__init__() got an unexpected keyword argument 'task'

In [10]:
import torch
# Sample text
text = "SciBERT is a pretrained transformer model for scientific text."

# Tokenize the input text
inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True)

# Forward pass to get the embeddings
with torch.no_grad():
    outputs = model(**inputs)

# Extract the last hidden states (embeddings)
last_hidden_states = outputs.last_hidden_state

# Optionally, get the embeddings for the [CLS] token
cls_embedding = last_hidden_states[:, 0, :]

# Print the shape of the embeddings
print("Shape of last hidden states:", last_hidden_states.shape)
print("Shape of CLS embedding:", cls_embedding.shape)


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Shape of last hidden states: torch.Size([1, 14, 768])
Shape of CLS embedding: torch.Size([1, 768])


In [11]:
import pandas as pd
import numpy as np
df = pd.read_csv("test.csv")
df["embedding"] = [np.random.rand(768) for _ in range(len(df))]
with torch.no_grad():
    embeds = model(**tokenizer(df["abstract"].to_list(), return_tensors='pt', padding=True, truncation=True)).last_hidden_state[:, 0, :]
    for idx, emb in enumerate(embeds):
        df.at[idx, "embedding"] = emb.numpy()
df

Unnamed: 0,doi,title,abstract,content,citation_text,embedding
0,1a,test10,An article on NLP discussing transformers,the long article1,citation,"[-0.1714096, 0.5485956, -0.2250522, 0.00396031..."
1,2a,test20,An article on biology discussing frogs,the long article2,citation,"[-0.6305636, -0.57029116, 0.019760396, 0.61680..."
2,3a,test30,An article on AI discussing Neural networks,the long article3,citation,"[-0.7799448, -0.13999197, 0.41758755, 0.049859..."


In [103]:
# await reset_database() #[-0.1714102, 0.548595, -0.22505158, 0.003960
# await insert_into_papers(df) #[-0.1714096, 0.5485956, -0.22505

In [159]:
async def fetch_all():
    loop = asyncio.get_running_loop()
    async with Connector(loop=loop) as connector:
        # Create connection to Cloud SQL database.
        conn: asyncpg.Connection = await connector.connect_async(
            f"{project_id}:{region}:{instance_name}",  # Cloud SQL instance connection name
            "asyncpg",
            user=f"{database_user}",
            password=f"{database_password}",
            db=f"{database_name}",
        )

        await register_vector(conn)
        # Find similar products to the query using cosine similarity search
        # over all vector embeddings. This new feature is provided by `pgvector`.
        results = await conn.fetch(
            """
                            SELECT *
                            FROM papers
                            """
        )

        if len(results) == 0:
            return []
        matches = []
        for r in results:
            # Collect the description for all the matched similar toy products.
            matches.append(
                {
                    "doi": r["doi"],
                    "title": r["title"],
                    "abstract": r["abstract"],
                    "content": r["content"],
                    "emb": r["embedding"],
                    "id": r["id"],
                }
            )

        await conn.close()
        return matches
matches = await fetch_all()
len(matches)

10000

In [106]:
df_abstract = pd.read_csv("./data/ml_100_abstracts.csv")
df_abstract["doi"] = None
df_abstract["title"] = None
df_abstract["content"] = "None"
df_abstract["citation_text"] = "None"
for i in range(len(df_abstract)):
    df_abstract.loc[i, "doi"] = "paper" + str(i)
    df_abstract.loc[i, "title"] = str(i)
    df_abstract.loc[i, "content"] = str(i)
    df_abstract.loc[i, "citation_text"] = str(i)
df_abstract.rename(columns={'Abstract': 'abstract'}, inplace=True)
df_abstract

Unnamed: 0.1,Unnamed: 0,abstract,doi,title,content,citation_text
0,0,The problem of statistical learning is to co...,paper0,0,0,0
1,1,"In a sensor network, in practice, the commun...",paper1,1,1,1
2,2,We analyze the generalization performance of...,paper2,2,2,2
3,3,In the process of training Support Vector Ma...,paper3,3,3,3
4,4,Probabilistic graphical models (PGMs) have b...,paper4,4,4,4
...,...,...,...,...,...,...
95,95,PRISM is an extension of Prolog with probabi...,paper95,95,95,95
96,96,"Recently, applying the novel data mining tec...",paper96,96,96,96
97,97,The typical behavior of optimal solutions to...,paper97,97,97,97
98,98,In this paper we analyze judgement aggregati...,paper98,98,98,98


In [149]:
import pandas as pd
def add_embedings(df: pd.DataFrame, model, tokenizer, field_to_embedd="abstract", embedding_field_name="embedding"):
    # df["embedding"] = None
    with torch.no_grad():
        inputs = tokenizer(df[field_to_embedd].to_list(), return_tensors='pt', padding=True, truncation=True)
        embeds = model(**inputs, task="retrieval.passage").last_hidden_state[:, 0, :]
        df.insert(len(df), embedding_field_name, embeds.tolist())
        # assert len(df) == len(embeds)
        # for idx, emb in enumerate(embeds):
        #     df.at[idx, "embedding"] = emb.numpy()
df_abstract

Unnamed: 0.1,Unnamed: 0,abstract,doi,title,content,citation_text
0,0,The problem of statistical learning is to co...,paper0,0,0,0
1,1,"In a sensor network, in practice, the commun...",paper1,1,1,1
2,2,We analyze the generalization performance of...,paper2,2,2,2
3,3,In the process of training Support Vector Ma...,paper3,3,3,3
4,4,Probabilistic graphical models (PGMs) have b...,paper4,4,4,4
...,...,...,...,...,...,...
95,95,PRISM is an extension of Prolog with probabi...,paper95,95,95,95
96,96,"Recently, applying the novel data mining tec...",paper96,96,96,96
97,97,The typical behavior of optimal solutions to...,paper97,97,97,97
98,98,In this paper we analyze judgement aggregati...,paper98,98,98,98


In [52]:
# reset_database() #[-0.1714102, 0.548595, -0.22505158, 0.003960
# await insert_into_papers(df_abstract) #[-0.1714096, 0.5485956, -0.22505

  reset_database() #[-0.1714102, 0.548595, -0.22505158, 0.003960
  expiration = x509.not_valid_after


DROP TABLE


In [142]:
text = ["summarize the papers related to linear regression",
        "quantum mechanics is a very interesting field plese get mo som epapers related to that subject",
        "What are transformers what are examples of transformer, encoder decoder models"
        ]

async def get_similar(text, nbr_articles=5):
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True)

    # Forward pass to get the embeddings
    with torch.no_grad():
        outputs = model(**inputs, task="retrieval.query")

    # Extract the last hidden states (embeddings)
    last_hidden_states = outputs.last_hidden_state

    # Optionally, get the embeddings for the [CLS] token
    cls_embedding = last_hidden_states[:, 0, :]
    return await fetch(cls_embedding[0], nbr_articles=nbr_articles, DESC=True)

for sent in text:
    print(sent)
    res = await get_similar(sent)
    for i in res:
        print(i)
    

Flash attention implementation does not support kwargs: task


summarize the papers related to linear regression


Flash attention implementation does not support kwargs: task


{'title': 'Near-optimal Coresets For Least-Squares Regression', 'sim': 0.678784746859353, 'title_similarity': None, 'abstract': '  We study (constrained) least-squares regression as well as multiple response\nleast-squares regression and ask the question of whether a subset of the data,\na coreset, suffices to compute a good approximate solution to the regression.\nWe give deterministic, low order polynomial-time algorithms to construct such\ncoresets with approximation guarantees, together with lower bounds indicating\nthat there is not much room for improvement upon our results.\n'}
{'title': 'Low rank approximation and decomposition of large matrices using error\n  correcting codes', 'sim': 0.6493639302953358, 'title_similarity': None, 'abstract': '  Low rank approximation is an important tool used in many applications of\nsignal processing and machine learning. Recently, randomized sketching\nalgorithms were proposed to effectively construct low rank approximations and\nobtain appr

Flash attention implementation does not support kwargs: task


{'title': 'Strange Bedfellows: Quantum Mechanics and Data Mining', 'sim': 0.717725049345647, 'title_similarity': None, 'abstract': '  Last year, in 2008, I gave a talk titled {\\it Quantum Calisthenics}. This\nyear I am going to tell you about how the work I described then has spun off\ninto a most unlikely direction. What I am going to talk about is how one maps\nthe problem of finding clusters in a given data set into a problem in quantum\nmechanics. I will then use the tricks I described to let quantum evolution lets\nthe clusters come together on their own.\n'}
{'title': 'Quantum machine learning: a classical perspective', 'sim': 0.7173719536168784, 'title_similarity': None, 'abstract': '  Recently, increased computational power and data availability, as well as\nalgorithmic advances, have led machine learning techniques to impressive\nresults in regression, classification, data-generation and reinforcement\nlearning tasks. Despite these successes, the proximity to the physical lim

In [58]:
df_json = pd.read_json("./data/ml_papers.json")

In [144]:
df_json_filterd = df_json[['doi', 'title', 'abstract', 'id']]
df_json_filterd["content"] = "None"
df_json_filterd["citation_text"] = "None"
df_json_filterd = df_json_filterd.drop_duplicates(subset='doi', keep='first')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_json_filterd["content"] = "None"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_json_filterd["citation_text"] = "None"


In [157]:
# import warnings
# from cryptography.utils import CryptographyDeprecationWarning 
# warnings.filterwarnings("ignore", category=CryptographyDeprecationWarning)
def chunker(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))
dois = [item['doi'] for item in await fetch_all()]
print(dois)
chunk_size = 5
seq = df_json_filterd[~df_json_filterd['doi'].isna()][5000:10000]
seq = seq[~seq['doi'].isin(dois)]
for idx, chunk in enumerate(chunker(seq=seq, size=chunk_size)):
    print("processing ", idx*chunk_size, "to", (idx+1)*chunk_size)
    add_embedings(chunk, model=model, tokenizer=tokenizer)
    add_embedings(chunk, model=model, tokenizer=tokenizer, field_to_embedd="title", embedding_field_name="title_embedding")
    await insert_into_papers(chunk)


['10.1109/TSMC.2021.3074496', '10.1109/TWC.2016.2636139', '10.1109/TSP.2016.2628348', '10.1145/2978578', '10.1109/TSP.2017.2656847', '10.1109/TAC.2018.2813009', '10.1016/j.csda.2018.03.015', '10.1016/j.jides.2016.11.001', '10.1145/1569901.1570100', '10.4236/ijmnta.2014.34020', '10.1186/s13062-017-0203-4', '10.1016/j.cviu.2017.05.007', '10.18653/v1/W16-0533', '10.3390/e18120442', '10.1209/0295-5075/117/38002', '10.1109/ICASSP.2017.7952599', '10.1007/s10710-010-9103-4', '10.1109/LSP.2016.2589962', '10.1109/ACSSC.2017.8335713', '10.1109/ACCESS.2020.2976199', '10.1007/s00365-019-09489-8', '10.1109/TNNLS.2016.2572310', '10.13140/RG.2.1.2436.5683', '10.1142/S0129183109013613', '10.1111/j.1467-9868.2009.00698.x', '10.1143/JPSJ.77.094801', '10.1109/TIT.2010.2090235', '10.1016/j.isprsjprs.2015.01.006', '10.1109/JSTSP.2018.2846218', '10.1007/s10994-019-05856-5', '10.18653/v1/W19-8630', '10.1103/PhysRevC.80.044332', '10.1109/ISCC.2008.4625611', '10.3847/2041-8213/aa603d', '10.4204/EPTCS.215.7', '

Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  5 to 10


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  10 to 15


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  15 to 20


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  20 to 25


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  25 to 30


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  30 to 35


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  35 to 40


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  40 to 45


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  45 to 50


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  50 to 55


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  55 to 60


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  60 to 65


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  65 to 70


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  70 to 75


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  75 to 80


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  80 to 85


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  85 to 90


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  90 to 95


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  95 to 100


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  100 to 105


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  105 to 110


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  110 to 115


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  115 to 120


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  120 to 125


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  125 to 130


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  130 to 135


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  135 to 140


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  140 to 145


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  145 to 150


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  150 to 155


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  155 to 160


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  160 to 165


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  165 to 170


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  170 to 175


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  175 to 180


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  180 to 185


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  185 to 190


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  190 to 195


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  195 to 200


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  200 to 205


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  205 to 210


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  210 to 215


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  215 to 220


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  220 to 225


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  225 to 230


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  230 to 235


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  235 to 240


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  240 to 245


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  245 to 250


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  250 to 255


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  255 to 260


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  260 to 265


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  265 to 270


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  270 to 275


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  275 to 280


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  280 to 285


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  285 to 290


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  290 to 295


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  295 to 300


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  300 to 305


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  305 to 310


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  310 to 315


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  315 to 320


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  320 to 325


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  325 to 330


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  330 to 335


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  335 to 340


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  340 to 345


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  345 to 350


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  350 to 355


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  355 to 360


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  360 to 365


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  365 to 370


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  370 to 375


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  375 to 380


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  380 to 385


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  385 to 390


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  390 to 395


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  395 to 400


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  400 to 405


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  405 to 410


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  410 to 415


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  415 to 420


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  420 to 425


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  425 to 430


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  430 to 435


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  435 to 440


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  440 to 445


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  445 to 450


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  450 to 455


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  455 to 460


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  460 to 465


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  465 to 470


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  470 to 475


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  475 to 480


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  480 to 485


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  485 to 490


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  490 to 495


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  495 to 500


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  500 to 505


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  505 to 510


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  510 to 515


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  515 to 520


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  520 to 525


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  525 to 530


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  530 to 535


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  535 to 540


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  540 to 545


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  545 to 550


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  550 to 555


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  555 to 560


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  560 to 565


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  565 to 570


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  570 to 575


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  575 to 580


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  580 to 585


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  585 to 590


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  590 to 595


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  595 to 600


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  600 to 605


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  605 to 610


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  610 to 615


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  615 to 620


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  620 to 625


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  625 to 630


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  630 to 635


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  635 to 640


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  640 to 645


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  645 to 650


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  650 to 655


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  655 to 660


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  660 to 665


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  665 to 670


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  670 to 675


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  675 to 680


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  680 to 685


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  685 to 690


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  690 to 695


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  695 to 700


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  700 to 705


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  705 to 710


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  710 to 715


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  715 to 720


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  720 to 725


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  725 to 730


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  730 to 735


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  735 to 740


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  740 to 745


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  745 to 750


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  750 to 755


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  755 to 760


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  760 to 765


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  765 to 770


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  770 to 775


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  775 to 780


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  780 to 785


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  785 to 790


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  790 to 795


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  795 to 800


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  800 to 805


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  805 to 810


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  810 to 815


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  815 to 820


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  820 to 825


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  825 to 830


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  830 to 835


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  835 to 840


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  840 to 845


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  845 to 850


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  850 to 855


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  855 to 860


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  860 to 865


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  865 to 870


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  870 to 875


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  875 to 880


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  880 to 885


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  885 to 890


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  890 to 895


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  895 to 900


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  900 to 905


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  905 to 910


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  910 to 915


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  915 to 920


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  920 to 925


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  925 to 930


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  930 to 935


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  935 to 940


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  940 to 945


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  945 to 950


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  950 to 955


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  955 to 960


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  960 to 965


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  965 to 970


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  970 to 975


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  975 to 980


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  980 to 985


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  985 to 990


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  990 to 995


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  995 to 1000


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1000 to 1005


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1005 to 1010


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1010 to 1015


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1015 to 1020


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1020 to 1025


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1025 to 1030


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1030 to 1035


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1035 to 1040


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1040 to 1045


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1045 to 1050


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1050 to 1055


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1055 to 1060


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1060 to 1065


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1065 to 1070


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1070 to 1075


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1075 to 1080


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1080 to 1085


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1085 to 1090


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1090 to 1095


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1095 to 1100


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1100 to 1105


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1105 to 1110


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1110 to 1115


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1115 to 1120


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1120 to 1125


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1125 to 1130


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1130 to 1135


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1135 to 1140


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1140 to 1145


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1145 to 1150


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1150 to 1155


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1155 to 1160


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1160 to 1165


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1165 to 1170


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1170 to 1175


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1175 to 1180


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1180 to 1185


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1185 to 1190


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1190 to 1195


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1195 to 1200


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1200 to 1205


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1205 to 1210


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1210 to 1215


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1215 to 1220


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1220 to 1225


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1225 to 1230


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1230 to 1235


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1235 to 1240


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1240 to 1245


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1245 to 1250


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1250 to 1255


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1255 to 1260


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1260 to 1265


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1265 to 1270


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1270 to 1275


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1275 to 1280


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1280 to 1285


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1285 to 1290


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1290 to 1295


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1295 to 1300


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1300 to 1305


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1305 to 1310


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1310 to 1315


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1315 to 1320


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1320 to 1325


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1325 to 1330


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1330 to 1335


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1335 to 1340


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1340 to 1345


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1345 to 1350


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1350 to 1355


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1355 to 1360


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1360 to 1365


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1365 to 1370


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1370 to 1375


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1375 to 1380


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1380 to 1385


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1385 to 1390


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1390 to 1395


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1395 to 1400


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1400 to 1405


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1405 to 1410


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1410 to 1415


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1415 to 1420


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1420 to 1425


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1425 to 1430


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1430 to 1435


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1435 to 1440


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1440 to 1445


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1445 to 1450


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1450 to 1455


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1455 to 1460


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1460 to 1465


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1465 to 1470


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1470 to 1475


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1475 to 1480


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1480 to 1485


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1485 to 1490


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1490 to 1495


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1495 to 1500


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1500 to 1505


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1505 to 1510


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1510 to 1515


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1515 to 1520


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1520 to 1525


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1525 to 1530


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1530 to 1535


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1535 to 1540


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1540 to 1545


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1545 to 1550


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1550 to 1555


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1555 to 1560


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1560 to 1565


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1565 to 1570


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1570 to 1575


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1575 to 1580


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1580 to 1585


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1585 to 1590


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1590 to 1595


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1595 to 1600


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1600 to 1605


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1605 to 1610


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1610 to 1615


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1615 to 1620


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1620 to 1625


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1625 to 1630


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1630 to 1635


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1635 to 1640


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1640 to 1645


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1645 to 1650


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1650 to 1655


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1655 to 1660


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1660 to 1665


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1665 to 1670


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1670 to 1675


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1675 to 1680


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1680 to 1685


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1685 to 1690


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1690 to 1695


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1695 to 1700


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1700 to 1705


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1705 to 1710


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1710 to 1715


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1715 to 1720


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1720 to 1725


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1725 to 1730


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1730 to 1735


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1735 to 1740


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1740 to 1745


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1745 to 1750


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1750 to 1755


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1755 to 1760


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1760 to 1765


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1765 to 1770


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1770 to 1775


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1775 to 1780


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1780 to 1785


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1785 to 1790


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1790 to 1795


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1795 to 1800


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1800 to 1805


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1805 to 1810


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1810 to 1815


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1815 to 1820


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1820 to 1825


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1825 to 1830


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1830 to 1835


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1835 to 1840


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1840 to 1845


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1845 to 1850


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1850 to 1855


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1855 to 1860


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1860 to 1865


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1865 to 1870


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1870 to 1875


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1875 to 1880


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1880 to 1885


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1885 to 1890


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1890 to 1895


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1895 to 1900


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1900 to 1905


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1905 to 1910


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1910 to 1915


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1915 to 1920


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1920 to 1925


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1925 to 1930


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1930 to 1935


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1935 to 1940


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1940 to 1945


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1945 to 1950


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1950 to 1955


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1955 to 1960


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1960 to 1965


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1965 to 1970


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1970 to 1975


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1975 to 1980


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1980 to 1985


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1985 to 1990


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1990 to 1995


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  1995 to 2000


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2000 to 2005


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2005 to 2010


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2010 to 2015


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2015 to 2020


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2020 to 2025


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2025 to 2030


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2030 to 2035


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2035 to 2040


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2040 to 2045


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2045 to 2050


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2050 to 2055


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2055 to 2060


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2060 to 2065


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2065 to 2070


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2070 to 2075


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2075 to 2080


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2080 to 2085


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2085 to 2090


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2090 to 2095


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2095 to 2100


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2100 to 2105


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2105 to 2110


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2110 to 2115


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2115 to 2120


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2120 to 2125


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2125 to 2130


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2130 to 2135


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2135 to 2140


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2140 to 2145


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2145 to 2150


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2150 to 2155


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2155 to 2160


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2160 to 2165


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2165 to 2170


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2170 to 2175


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2175 to 2180


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2180 to 2185


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2185 to 2190


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2190 to 2195


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2195 to 2200


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2200 to 2205


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2205 to 2210


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2210 to 2215


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2215 to 2220


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2220 to 2225


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2225 to 2230


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2230 to 2235


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2235 to 2240


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2240 to 2245


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2245 to 2250


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2250 to 2255


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2255 to 2260


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2260 to 2265


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2265 to 2270


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2270 to 2275


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2275 to 2280


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2280 to 2285


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2285 to 2290


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2290 to 2295


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2295 to 2300


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2300 to 2305


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2305 to 2310


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2310 to 2315


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2315 to 2320


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2320 to 2325


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2325 to 2330


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2330 to 2335


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2335 to 2340


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2340 to 2345


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2345 to 2350


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2350 to 2355


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2355 to 2360


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2360 to 2365


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2365 to 2370


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2370 to 2375


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2375 to 2380


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2380 to 2385


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2385 to 2390


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2390 to 2395


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2395 to 2400


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2400 to 2405


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2405 to 2410


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2410 to 2415


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2415 to 2420


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2420 to 2425


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2425 to 2430


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2430 to 2435


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2435 to 2440


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2440 to 2445


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2445 to 2450


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2450 to 2455


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2455 to 2460


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2460 to 2465


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2465 to 2470


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2470 to 2475


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2475 to 2480


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2480 to 2485


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2485 to 2490


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2490 to 2495


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2495 to 2500


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2500 to 2505


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2505 to 2510


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2510 to 2515


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2515 to 2520


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2520 to 2525


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2525 to 2530


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2530 to 2535


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2535 to 2540


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2540 to 2545


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2545 to 2550


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2550 to 2555


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2555 to 2560


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2560 to 2565


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2565 to 2570


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2570 to 2575


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2575 to 2580


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2580 to 2585


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2585 to 2590


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2590 to 2595


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2595 to 2600


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2600 to 2605


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2605 to 2610


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2610 to 2615


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2615 to 2620


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2620 to 2625


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2625 to 2630


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2630 to 2635


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2635 to 2640


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2640 to 2645


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2645 to 2650


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2650 to 2655


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2655 to 2660


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2660 to 2665


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2665 to 2670


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2670 to 2675


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2675 to 2680


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2680 to 2685


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2685 to 2690


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2690 to 2695


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2695 to 2700


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2700 to 2705


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2705 to 2710


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2710 to 2715


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2715 to 2720


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2720 to 2725


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2725 to 2730


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2730 to 2735


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2735 to 2740


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2740 to 2745


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2745 to 2750


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2750 to 2755


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2755 to 2760


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2760 to 2765


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2765 to 2770


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2770 to 2775


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2775 to 2780


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2780 to 2785


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2785 to 2790


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2790 to 2795


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2795 to 2800


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2800 to 2805


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2805 to 2810


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2810 to 2815


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2815 to 2820


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2820 to 2825


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2825 to 2830


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2830 to 2835


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2835 to 2840


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2840 to 2845


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2845 to 2850


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2850 to 2855


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2855 to 2860


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2860 to 2865


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2865 to 2870


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2870 to 2875


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2875 to 2880


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2880 to 2885


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2885 to 2890


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2890 to 2895


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2895 to 2900


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2900 to 2905


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2905 to 2910


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2910 to 2915


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2915 to 2920


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2920 to 2925


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2925 to 2930


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2930 to 2935


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2935 to 2940


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2940 to 2945


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2945 to 2950


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2950 to 2955


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2955 to 2960


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2960 to 2965


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2965 to 2970


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2970 to 2975


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2975 to 2980


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2980 to 2985


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2985 to 2990


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2990 to 2995


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  2995 to 3000


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  3000 to 3005


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  3005 to 3010


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  3010 to 3015


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  3015 to 3020


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  3020 to 3025


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  3025 to 3030


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  3030 to 3035


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  3035 to 3040


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  3040 to 3045


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  3045 to 3050


Flash attention implementation does not support kwargs: task
Flash attention implementation does not support kwargs: task


processing  3050 to 3055


Flash attention implementation does not support kwargs: task


In [None]:

def chunker(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))
dois = [item['doi'] for item in await fetch_all() if item['id'] == None]
print(len(dois))
chunk_size = 5
seq = df_json_filterd[~df_json_filterd['doi'].isna()][5000:10000]
seq = seq[seq['doi'].isin(dois)]
for idx, chunk in enumerate(chunker(seq=seq, size=chunk_size)):
    print("processing ", idx*chunk_size, "to", (idx+1)*chunk_size)
    add_embedings(chunk, model=model, tokenizer=tokenizer, field_to_embedd="title", embedding_field_name="title_embedding")
    await update_papers(chunk)

Flash attention implementation does not support kwargs: task


1945
processing  0 to 5


Flash attention implementation does not support kwargs: task


processing  5 to 10


Flash attention implementation does not support kwargs: task


processing  10 to 15


Flash attention implementation does not support kwargs: task


processing  15 to 20


Flash attention implementation does not support kwargs: task


processing  20 to 25


Flash attention implementation does not support kwargs: task


processing  25 to 30


Flash attention implementation does not support kwargs: task


processing  30 to 35


Flash attention implementation does not support kwargs: task


processing  35 to 40


Flash attention implementation does not support kwargs: task


processing  40 to 45


Flash attention implementation does not support kwargs: task


processing  45 to 50


Flash attention implementation does not support kwargs: task


processing  50 to 55


Flash attention implementation does not support kwargs: task


processing  55 to 60


Flash attention implementation does not support kwargs: task


processing  60 to 65


Flash attention implementation does not support kwargs: task


processing  65 to 70


Flash attention implementation does not support kwargs: task


processing  70 to 75


Flash attention implementation does not support kwargs: task


processing  75 to 80


Flash attention implementation does not support kwargs: task


processing  80 to 85


Flash attention implementation does not support kwargs: task


processing  85 to 90


Flash attention implementation does not support kwargs: task


processing  90 to 95


Flash attention implementation does not support kwargs: task


processing  95 to 100


Flash attention implementation does not support kwargs: task


processing  100 to 105


Flash attention implementation does not support kwargs: task


processing  105 to 110


Flash attention implementation does not support kwargs: task


processing  110 to 115


Flash attention implementation does not support kwargs: task


processing  115 to 120


Flash attention implementation does not support kwargs: task


processing  120 to 125


Flash attention implementation does not support kwargs: task


processing  125 to 130


Flash attention implementation does not support kwargs: task


processing  130 to 135


Flash attention implementation does not support kwargs: task


processing  135 to 140


Flash attention implementation does not support kwargs: task


processing  140 to 145


Flash attention implementation does not support kwargs: task


processing  145 to 150


Flash attention implementation does not support kwargs: task


processing  150 to 155


Flash attention implementation does not support kwargs: task


processing  155 to 160


Flash attention implementation does not support kwargs: task


processing  160 to 165


Flash attention implementation does not support kwargs: task


processing  165 to 170


Flash attention implementation does not support kwargs: task


processing  170 to 175


Flash attention implementation does not support kwargs: task


processing  175 to 180


Flash attention implementation does not support kwargs: task


processing  180 to 185


Flash attention implementation does not support kwargs: task


processing  185 to 190


Flash attention implementation does not support kwargs: task


processing  190 to 195


Flash attention implementation does not support kwargs: task


processing  195 to 200


Flash attention implementation does not support kwargs: task


processing  200 to 205


Flash attention implementation does not support kwargs: task


processing  205 to 210


Flash attention implementation does not support kwargs: task


processing  210 to 215


Flash attention implementation does not support kwargs: task


processing  215 to 220


Flash attention implementation does not support kwargs: task


processing  220 to 225


Flash attention implementation does not support kwargs: task


processing  225 to 230


Flash attention implementation does not support kwargs: task


processing  230 to 235


Flash attention implementation does not support kwargs: task


processing  235 to 240


Flash attention implementation does not support kwargs: task


processing  240 to 245


Flash attention implementation does not support kwargs: task


processing  245 to 250


Flash attention implementation does not support kwargs: task


processing  250 to 255


Flash attention implementation does not support kwargs: task


processing  255 to 260


Flash attention implementation does not support kwargs: task


processing  260 to 265


Flash attention implementation does not support kwargs: task


processing  265 to 270


Flash attention implementation does not support kwargs: task


processing  270 to 275


Flash attention implementation does not support kwargs: task


processing  275 to 280


Flash attention implementation does not support kwargs: task


processing  280 to 285


Flash attention implementation does not support kwargs: task


processing  285 to 290


Flash attention implementation does not support kwargs: task


processing  290 to 295


Flash attention implementation does not support kwargs: task


processing  295 to 300


Flash attention implementation does not support kwargs: task


processing  300 to 305


Flash attention implementation does not support kwargs: task


processing  305 to 310


Flash attention implementation does not support kwargs: task


processing  310 to 315


Flash attention implementation does not support kwargs: task


processing  315 to 320


Flash attention implementation does not support kwargs: task


processing  320 to 325


Flash attention implementation does not support kwargs: task


processing  325 to 330


Flash attention implementation does not support kwargs: task


processing  330 to 335


Flash attention implementation does not support kwargs: task


processing  335 to 340


Flash attention implementation does not support kwargs: task


processing  340 to 345


Flash attention implementation does not support kwargs: task


processing  345 to 350


Flash attention implementation does not support kwargs: task


processing  350 to 355


Flash attention implementation does not support kwargs: task


processing  355 to 360


Flash attention implementation does not support kwargs: task


processing  360 to 365


Flash attention implementation does not support kwargs: task


processing  365 to 370


Flash attention implementation does not support kwargs: task


processing  370 to 375


Flash attention implementation does not support kwargs: task


processing  375 to 380


Flash attention implementation does not support kwargs: task


processing  380 to 385


Flash attention implementation does not support kwargs: task


processing  385 to 390


Flash attention implementation does not support kwargs: task


processing  390 to 395


Flash attention implementation does not support kwargs: task


processing  395 to 400


Flash attention implementation does not support kwargs: task


processing  400 to 405


Flash attention implementation does not support kwargs: task


processing  405 to 410


Flash attention implementation does not support kwargs: task


processing  410 to 415


Flash attention implementation does not support kwargs: task


processing  415 to 420


Flash attention implementation does not support kwargs: task


processing  420 to 425


Flash attention implementation does not support kwargs: task


processing  425 to 430


Flash attention implementation does not support kwargs: task


processing  430 to 435


Flash attention implementation does not support kwargs: task


processing  435 to 440


Flash attention implementation does not support kwargs: task


processing  440 to 445


Flash attention implementation does not support kwargs: task


processing  445 to 450


Flash attention implementation does not support kwargs: task


processing  450 to 455


Flash attention implementation does not support kwargs: task


processing  455 to 460


Flash attention implementation does not support kwargs: task


processing  460 to 465


Flash attention implementation does not support kwargs: task


processing  465 to 470


Flash attention implementation does not support kwargs: task


processing  470 to 475


Flash attention implementation does not support kwargs: task


processing  475 to 480


Flash attention implementation does not support kwargs: task


processing  480 to 485


Flash attention implementation does not support kwargs: task


processing  485 to 490


Flash attention implementation does not support kwargs: task


processing  490 to 495


Flash attention implementation does not support kwargs: task


processing  495 to 500


Flash attention implementation does not support kwargs: task


processing  500 to 505


Flash attention implementation does not support kwargs: task


processing  505 to 510


Flash attention implementation does not support kwargs: task


processing  510 to 515


Flash attention implementation does not support kwargs: task


processing  515 to 520


Flash attention implementation does not support kwargs: task


processing  520 to 525


Flash attention implementation does not support kwargs: task


processing  525 to 530


Flash attention implementation does not support kwargs: task


processing  530 to 535


Flash attention implementation does not support kwargs: task


processing  535 to 540


Flash attention implementation does not support kwargs: task


processing  540 to 545


Flash attention implementation does not support kwargs: task


processing  545 to 550


Flash attention implementation does not support kwargs: task


processing  550 to 555


Flash attention implementation does not support kwargs: task


processing  555 to 560


Flash attention implementation does not support kwargs: task


processing  560 to 565


Flash attention implementation does not support kwargs: task


processing  565 to 570


Flash attention implementation does not support kwargs: task


processing  570 to 575


Flash attention implementation does not support kwargs: task


processing  575 to 580


Flash attention implementation does not support kwargs: task


processing  580 to 585


Flash attention implementation does not support kwargs: task


processing  585 to 590


Flash attention implementation does not support kwargs: task


processing  590 to 595


Flash attention implementation does not support kwargs: task


processing  595 to 600


Flash attention implementation does not support kwargs: task


processing  600 to 605


Flash attention implementation does not support kwargs: task


processing  605 to 610


Flash attention implementation does not support kwargs: task


processing  610 to 615


Flash attention implementation does not support kwargs: task


processing  615 to 620


Flash attention implementation does not support kwargs: task


processing  620 to 625


Flash attention implementation does not support kwargs: task


processing  625 to 630


Flash attention implementation does not support kwargs: task


processing  630 to 635


Flash attention implementation does not support kwargs: task


processing  635 to 640


Flash attention implementation does not support kwargs: task


processing  640 to 645


Flash attention implementation does not support kwargs: task


processing  645 to 650


Flash attention implementation does not support kwargs: task


processing  650 to 655


Flash attention implementation does not support kwargs: task


processing  655 to 660


Flash attention implementation does not support kwargs: task


processing  660 to 665


Flash attention implementation does not support kwargs: task


processing  665 to 670


Flash attention implementation does not support kwargs: task


processing  670 to 675


Flash attention implementation does not support kwargs: task


processing  675 to 680


Flash attention implementation does not support kwargs: task


processing  680 to 685


Flash attention implementation does not support kwargs: task


processing  685 to 690


Flash attention implementation does not support kwargs: task


processing  690 to 695


Flash attention implementation does not support kwargs: task


processing  695 to 700


Flash attention implementation does not support kwargs: task


processing  700 to 705


Flash attention implementation does not support kwargs: task


processing  705 to 710


Flash attention implementation does not support kwargs: task


processing  710 to 715


Flash attention implementation does not support kwargs: task


processing  715 to 720


Flash attention implementation does not support kwargs: task


processing  720 to 725


Flash attention implementation does not support kwargs: task


processing  725 to 730


Flash attention implementation does not support kwargs: task


processing  730 to 735


Flash attention implementation does not support kwargs: task


processing  735 to 740


Flash attention implementation does not support kwargs: task


processing  740 to 745


Flash attention implementation does not support kwargs: task


processing  745 to 750


Flash attention implementation does not support kwargs: task


processing  750 to 755


Flash attention implementation does not support kwargs: task


processing  755 to 760


Flash attention implementation does not support kwargs: task


processing  760 to 765


Flash attention implementation does not support kwargs: task


processing  765 to 770


In [145]:
df_json_filterd


Unnamed: 0,doi,title,abstract,id,content,citation_text
0,10.1109/ITW.2007.4313111,Learning from compressed observations,The problem of statistical learning is to co...,0704.0671,,
1,10.1109/TSP.2008.920143,Sensor Networks with Random Links: Topology De...,"In a sensor network, in practice, the commun...",0704.0954,,
2,10.1143/JPSJ.76.114001,Statistical Mechanics of Nonlinear On-line Lea...,We analyze the generalization performance of...,0705.2318,,
3,10.1109/ICTAI.2007.99,A Novel Model of Working Set Selection for SMO...,In the process of training Support Vector Ma...,0706.0585,,
4,10.1371/journal.pcbi.0030252,Getting started in probabilistic graphical models,Probabilistic graphical models (PGMs) have b...,0706.2040,,
...,...,...,...,...,...,...
24983,10.1016/j.amc.2004.09.003,A Model for Prejudiced Learning in Noisy Envir...,Based on the heuristics that maintaining pre...,nlin/0306055,,
24984,10.1016/j.physd.2005.06.031,Stability and Diversity in Collective Adaptation,We derive a class of macroscopic differentia...,nlin/0408039,,
24985,10.1007/978-3-540-73133-7_1,Structural Inference of Hierarchies in Networks,One property of networks that has received c...,physics/0610051,,
24986,10.1073/pnas.0406011101,Parametric Inference for Biological Sequence A...,One of the major successes in computational ...,q-bio/0401033,,
