# Feedback RAG system with code analyzer

In [None]:
# 1, process SEAT
# 2, embedding them
# 3, ResLLM checks if the information is valuable

In [1]:
!pip install datasets
!pip install transformers
!pip install accelerate
!pip install pandas
!pip install numpy
!pip install matplotlib
!pip install sentence_transformers
!pip install lm-format-enforcer
!pip install --upgrade typing_extensions

Collecting datasets
  Downloading datasets-2.19.0-py3-none-any.whl.metadata (19 kB)
Collecting pyarrow>=12.0.0 (from datasets)
  Downloading pyarrow-16.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.0 kB)
Collecting pyarrow-hotfix (from datasets)
  Downloading pyarrow_hotfix-0.6-py3-none-any.whl.metadata (3.6 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting pandas (from datasets)
  Downloading pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Collecting tqdm>=4.62.1 (from datasets)
  Downloading tqdm-4.66.2-py3-none-any.whl.metadata (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting xxhash (from datasets)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.1

In [2]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

## Make chunks

In [4]:
# parameters

database_name = "python_game" # a directory named this must be in "data" folder
extensions = [".py", ".h", ".cpp", ".cs"]
model_id = "mistralai/Mistral-7B-Instruct-v0.2"  #"cyberagent/calm2-7b-chat"
max_tokens = 1000
min_tokens = 30

# about where the key starts to split the text

# index : words to be where text should be split
# first element(0 to 1): process_text_size * element is the start point of the key splitting. the samller the element is, the more likely it is for the key to split the text.
# second element(0 or 1): the first element should become   if 0: <text1><key> | <text2>,  if 1: <text1> | <key><text2>
rules = [
    {
        "namespace " : 1,
        "class " : 1,
    },

    {
        "def " : 1,
        "void " : 1,
    },

    {
        'if ' : 1,
    },

    {
        "else " : 1,
        "elif " : 1,
    },
    

    {
        "\n\n" : 0,
        "<0x0A><0x0A>" : 0,
        "\x0A\x0A" : 0,
    },

    {
        "\n" : 0,
        "<0x0A>" : 0,
        "\x0A" : 0,
    },
]


In [5]:
import os
import json

from transformers import AutoModelForCausalLM, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, 
    padding_side="left",
    add_eos_token=True,
    add_bos_token=True,)

if not os.path.exists("chunks"):
    os.makedirs("chunks")

if not os.path.exists("file_paths"):
    os.makedirs("file_paths")

file_paths = []
directory = "data/" + database_name
for root, dirnames, filenames in os.walk(directory):
    for filename in filenames:
        for extension in extensions:
            if filename.endswith(extension):
                file_paths.append(os.path.join(root, filename))


def split_into_chunks(tokenizer, text, max_tokens, min_tokens, rules):
    chunk_list = []
    instruction_list = []
    
    start_id = 0
    tokenized_text = tokenizer(text, return_tensors="pt", add_special_tokens = False)
    num_tokens = len(tokenized_text["input_ids"][0])
    text_size = len(text)
    
    for i in range(int(num_tokens/min_tokens)+1):
        if(start_id + max_tokens >= num_tokens):
            process_tokenized_text = tokenized_text["input_ids"][0][start_id:]
            processed_text = tokenizer.decode(process_tokenized_text, skip_special_tokens=True)
            chunk_list.append(processed_text)
            instruction_list.append(processed_text)
            break
            
        process_tokenized_text = tokenized_text["input_ids"][0][start_id : start_id + max_tokens]
        process_text = tokenizer.decode(process_tokenized_text, skip_special_tokens=True) # this should be decoded since subword token is difficult to handle
        process_text_size = len(process_text)

        #determine where should be split
        min_split_text = process_text_size
        is_text_split = False
        for rule_group in rules:
            for rule_key in rule_group.keys():
                split_process_text = process_text.split(rule_key)
                if len(split_process_text) > 1:
                    size_last_split_process_text = len(split_process_text[-1])
                    if (size_last_split_process_text < min_split_text) and (size_last_split_process_text < (1 - min_tokens/max_tokens)*process_text_size) and (size_last_split_process_text!=process_text_size):
                        is_text_split = True
                        min_split_text = size_last_split_process_text + len(rule_key) * rule_group[rule_key]

            if is_text_split:
                break
                        
        if is_text_split:
            processed_text = process_text[:-min_split_text]
        else:
            processed_text = process_text

        
        processed_tokenized_text = tokenizer(processed_text, return_tensors="pt", add_special_tokens = False)
        len_processed_text = len(processed_tokenized_text["input_ids"][0])  #this could be more than max_tokens without min sentence, which caused fatal error

        if len(processed_text)==0:
            break
            
        chunk_list.append(processed_text)  
        instruction_list.append(processed_text)
        
        start_id += len_processed_text # taking from process_tokenized_text to prevent the id from getting wrong

    return chunk_list, instruction_list

from datasets import Dataset, DatasetDict
import time

start = time.time()

all_chunks = []
all_file_paths = []
for file_path in file_paths:
    with open(file_path) as f:
        text = f.read()
    
    chunks, insts = split_into_chunks(tokenizer, text, max_tokens, min_tokens, rules)
    
    all_chunks += chunks
    fp = [file_path for i in range(len(chunks))]
    all_file_paths += fp
    
end = time.time()

print("total num chunk: ", len(all_chunks))
print("process time: ", end - start)


input_file_path = "chunks/" + database_name + ".json"
with open(input_file_path, 'w') as json_file:
    json.dump(all_chunks, json_file)

file_path_json = "file_paths/" + database_name + ".json"
with open(file_path_json, 'w') as json_file:
    json.dump(all_file_paths, json_file)
    
print("file saved")

# chunks: [<str> chunk of the text, ...]
# insts: [<str> instructions corresponds to chunk, ...]



total num chunk:  75
process time:  0.07407999038696289
file saved


### By algorithm

In [11]:
model_id = "cyberagent/calm2-7b-chat"
max_tokens = 100
min_tokens = 30  # this should be longer enough than keys of rules bellow since this should play an role to exclude the case of the process text starts with the key.(if the process text starts with the key, that can cause the "shit happend" in the code


In [12]:
from transformers import AutoModelForCausalLM, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, 
    padding_side="left",
    add_eos_token=True,
    add_bos_token=True,)

In [13]:
with open("./doc_KIKYO.txt", 'r', encoding='utf-8') as f:
    text = f.read()

# about where the key starts to split the text

# index : words to be where text should be splited
# first element(0 to 1): process_text_size*element is the start point of the key splitting. the samller the element is, the more likely it is for the key to split the text.
# second element(0 or 1): the first element should become   if 0: <text1><key> | <text2>,  if 1: <text1> | <key><text2>
"""
rules = [
    {
        "\\\\section*" : 1,
    },

    {
        "\\\\subsection*" : 1,
    },

    {
        "\\\\begin{center}" : 1,
        "\\\\end{gather*}" : 0,
        "\\\\end{align*}" : 0,   
        "\\\\end{equation*}" : 0,
        "\\\\end{enumerate}" : 0,
    },

    {
        "\n\n" : 0,
        "<0x0A><0x0A>" : 0,
        "\x0A\x0A" : 0,
    },

    {
        "\n" : 0,
        "<0x0A>" : 0,
        "\x0A" : 0,
    },
]
"""
# 注：latexで\\となっているところでsplitしたい場合、ruleのkeyには\\\\と記述しないといけない

In [None]:
import json

def split_into_chunks(tokenizer, text, max_tokens, min_tokens, rules):
    chunk_list = []
    instruction_list = []
    
    start_id = 0
    tokenized_text = tokenizer(text, return_tensors="pt", add_special_tokens = False)
    num_tokens = len(tokenized_text["input_ids"][0])
    text_size = len(text)
    
    for i in range(int(num_tokens/min_tokens)+1):
        if(start_id + max_tokens >= num_tokens):
            process_tokenized_text = tokenized_text["input_ids"][0][start_id:]
            processed_text = tokenizer.decode(process_tokenized_text, skip_special_tokens=True)
            chunk_list.append(processed_text)
            instruction_list.append(processed_text)
            break
            
        process_tokenized_text = tokenized_text["input_ids"][0][start_id : start_id + max_tokens]
        process_text = tokenizer.decode(process_tokenized_text, skip_special_tokens=True) # this should be decoded since subword token is difficult to handle
        process_text_size = len(process_text)

        #determine where should be split
        min_split_text = process_text_size
        is_text_split = False
        for rule_group in rules:
            for rule_key in rule_group.keys():
                split_process_text = process_text.split(rule_key)
                if len(split_process_text) > 1:
                    size_last_split_process_text = len(split_process_text[-1])
                    if (size_last_split_process_text < min_split_text) and (size_last_split_process_text < (1 - min_tokens/max_tokens)*process_text_size) and (size_last_split_process_text!=process_text_size):
                        is_text_split = True
                        min_split_text = size_last_split_process_text + len(rule_key) * rule_group[rule_key]

            if is_text_split:
                break
                        
        if is_text_split:
            processed_text = process_text[:-min_split_text]
        else:
            processed_text = process_text

        
        processed_tokenized_text = tokenizer(processed_text, return_tensors="pt", add_special_tokens = False)
        len_processed_text = len(processed_tokenized_text["input_ids"][0])  #this could be more than max_tokens without min sentence, which caused fatal error

        if len(processed_text)==0:
            break
            
        chunk_list.append(processed_text)  
        instruction_list.append(processed_text)
        
        start_id += len_processed_text # taking from process_tokenized_text to prevent the id from getting wrong

    return chunk_list, instruction_list

from datasets import Dataset, DatasetDict
import time

start = time.time()
chunks, insts = split_into_chunks(tokenizer, text, max_tokens, min_tokens, rules)
end = time.time()

print("process time: ", end - start)

input_file_path = "chunks.json"
with open(input_file_path, 'w') as json_file:
    json.dump(chunks, json_file)

print("file saved")

# chunks: [<str> chunk of the text, ...]
# insts: [<str> instructions corresponds to chunk, ...]



### Manually

In [None]:
# make sure all datasets with .tex, .txt are in "./database" directory

#### gkvp_advanc.f90 & gkvp_bndry

In [3]:
chunks = [
    """
MODULE GKV_advnc
!-------------------------------------------------------------------------------
!
!    Calculate df/dt and time advance by Runge-Kutta-Gill method
!
!    Update history of gkvp_advnc.f90
!    --------------
!      gkvp_f0.62 (S. Maeyama, Mar 2023)
!        - Time-dependent metrics for rotating flux-tube model is implemented.
!          See lines at "!%%% For shearflow rotating flux tube model %%%".
!      gkvp_f0.57 (S. Maeyama, Oct 2020)
!        - Version number f0.57 is removed from filename.
!        - Unitialized access for padding iend_y<my is removed.
!
!-------------------------------------------------------------------------------

  use GKV_header
  use GKV_mpienv
  use GKV_fld,   only: fld_esfield, fld_emfield_hh, fld_hh2ff
  use GKV_exb,   only: exb_NL_term
  use GKV_colli, only: colli_LB!, colli_full
  use GKV_colliimp, only: colliimp_calc_colli_full, colliimp_set_param
  use GKV_bndry, only: bndry_bound_e,  &
                       bndry_zv_buffin, bndry_zv_sendrecv, bndry_zv_buffout
  use GKV_clock, only: clock_sta, clock_end
  use GKV_zfilter, only: zfilter
  use GKV_tips,  only: tips_reality
  use GKV_geom, only: geom_increment_time

  implicit none

  private

  integer, save :: nchunk_zv = 1, nchunk_yzv = 1, nchunk_yz = 1

  public   advnc_rkgsteps_rev, caldlt_rev


CONTAINS

    """,

    """
!--------------------------------------
  SUBROUTINE advnc_rkgsteps_rev( colliflag, ff, phi, Al, hh )
!--------------------------------------
!     time integration of GK equation using RKG method

    character(15), intent(in) :: colliflag ! = "collisional"
                                           ! = "collisionless"
    complex(kind=DP), intent(inout), &
      dimension(-nx:nx,0:ny,-nz-nzb:nz-1+nzb,1-nvb:2*nv+nvb,0-nvb:nm+nvb) :: ff
    complex(kind=DP), intent(inout), &
      dimension(-nx:nx,0:ny,-nz:nz-1) :: phi, Al
    complex(kind=DP), intent(inout), &
      dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nv,0:nm) :: hh

    complex(kind=DP), save,  &
      dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nv,0:nm) :: qh
    complex(kind=DP), dimension(:,:,:,:,:), allocatable :: dh, cf, ef
    integer :: mx, my, iz, iv, im, istep
    integer, save :: iflg
    data iflg / 0 /
!$  integer :: nthreads, omp_get_num_threads

      if ( iflg == 0 ) then
        iflg = 1
!$OMP parallel
        do im = 0, nm
!$OMP do
          do iv = 1, 2*nv
            do iz = -nz, nz-1
              do my = 0, ny
                do mx = -nx, nx
                  qh(mx,my,iz,iv,im) = ( 0._DP, 0._DP )
                end do
              end do
            end do
          end do
!$OMP end do nowait
        end do
!$OMP end parallel

!$OMP parallel default(shared)
!$OMP master
!$    nthreads = omp_get_num_threads()
!$    if (nthreads > 1) then
!$      nchunk_zv = ((2*nz)*(2*nv)-1) / (nthreads-1) + 1
!$      nchunk_yzv = ((iend_y-ist_y+1)*(2*nz)*(2*nv)-1) / (nthreads-1) + 1
!$      nchunk_yz = ((iend_y-ist_y+1)*(2*nz)-1) / (nthreads-1) + 1
!$    end if
!$OMP end master
!$OMP end parallel
      end if
    """,

    """
      allocate( dh(-nx:nx,0:ny,-nz:nz-1,1:2*nv,0:nm) )
      allocate( cf(-nx:nx,0:ny,-nz:nz-1,1:2*nv,0:nm) )
      allocate( ef(-nx:nx,0:ny,-nz:nz-1,1:2*nv,0:nm) )

      do istep = 1, 4

       !%%% For shearflow rotating flux tube model %%%
        if (gamma_e /= 0._DP .and. trim(flag_shearflow) == "rotating") then
          if (istep == 2 .or. istep == 4) then
            call geom_increment_time(0.5_DP * dt)
            if (trim(col_type) == "full" .or. trim(col_type) == "lorentz" .or. trim(time_advnc) == "imp_colli") then
              call colliimp_set_param
            end if
          end if
        end if
       !%%%

        call caldlt_rev( colliflag, ff, phi, Al, hh, dh, cf, ef )

                                           call clock_sta(11)
                                         ! call fapp_start("rkg",11,1)
        call rkg( hh, dh, qh, istep )
                                         ! call fapp_stop("rkg",11,1)
                                           call clock_end(11)

        call tips_reality ( hh )

                                           call clock_sta(12)
                                         ! call fapp_start("esfield",12,1)
        if ( beta > 0._DP ) then
          call fld_emfield_hh( hh, Al )
        end if
        call fld_hh2ff( hh, Al, ff )
        call fld_esfield( ff, phi )
                                         ! call fapp_stop("esfield",12,1)
                                           call clock_end(12)
      end do

      deallocate( dh )
      deallocate( cf )
      deallocate( ef )


  END SUBROUTINE advnc_rkgsteps_rev

    """,

    """
!--------------------------------------
  SUBROUTINE rkg( hh, dh, qh, istep )
!--------------------------------------
!     Runge-Kutta-Gill

    complex(kind=DP), intent(inout), &
      dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nv,0:nm) :: hh, qh
    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nv,0:nm) :: dh
    integer, intent(in) :: istep

    real(kind=DP) :: c1, c2, cq, c0
    integer :: mx, my, iz, iv, im


      if      ( istep == 1 ) then
        c1   =  0.5_DP
        c2   = -1._DP
        cq   = -2._DP
        c0   =  1._DP
      else if ( istep == 2 ) then
        c1   =  1._DP - sqrt( 0.5_DP )
        c2   = -c1
        cq   =  1._DP - 3._DP * c1
        c0   =  2._DP * c1
      else if ( istep == 3 ) then
        c1   =  1._DP + sqrt( 0.5_DP )
        c2   = -c1
        cq   =  1._DP - 3._DP * c1
        c0   =  2._DP * c1
      else if ( istep == 4 ) then
        c1   =  1._DP / 6._DP
        c2   = -1._DP / 3._DP
        cq   =  0._DP
        c0   =  0._DP
      end if

!$OMP parallel do collapse(3)
      do im = 0, nm
        do iv = 1, 2*nv
          do iz = -nz, nz-1
            do my = ist_y, iend_y
              do mx = -nx, nx
                hh(mx,my,iz,iv,im) = hh(mx,my,iz,iv,im)           &
                                   + c1 * dt * dh(mx,my,iz,iv,im) &
                                   + c2 * qh(mx,my,iz,iv,im)
                qh(mx,my,iz,iv,im) = cq * qh(mx,my,iz,iv,im) &
                                   + c0 * dt * dh(mx,my,iz,iv,im)
              end do
            end do
          end do
        end do
      end do


  END SUBROUTINE rkg


    """,

    """
!--------------------------------------
  SUBROUTINE caldlt_rev( colliflag, ff, phi, Al, hh, dh, cf, ef )
!--------------------------------------
!     increment of delta-f within a time step

    character(15), intent(in) :: colliflag ! = "collisional"
                                           ! = "collisionless"
    complex(kind=DP), intent(inout), &
      dimension(-nx:nx,0:ny,-nz-nzb:nz-1+nzb,1-nvb:2*nv+nvb,0-nvb:nm+nvb) :: ff
    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,-nz:nz-1)                       :: phi, Al
    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nv,0:nm)           :: hh
    complex(kind=DP), intent(out), &
      dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nv,0:nm)           :: dh, cf, ef

    complex(kind=DP), dimension(:,:,:,:), allocatable :: psi, chi
    integer :: mx, my, iz, iv, im

      allocate( psi(-nx:nx,0:ny,-nz-nzb:nz-1+nzb,0:nm) )
      allocate( chi(-nx:nx,0:ny,-nz-nzb:nz-1+nzb,0:nm) )

!$OMP parallel default(none) &
!$OMP shared(psi,chi,phi,Al,j0,ist_y,iend_y) &
!$OMP private(mx,my,iz,im)
!$OMP do collapse(2)
      do im = 0, nm
        do iz = -nz, nz-1
          do my = iend_y, ny
            psi(:,my,iz,im) = (0._DP, 0._DP)
            chi(:,my,iz,im) = (0._DP, 0._DP)
          end do
          do my = ist_y, iend_y
            do mx = -nx, nx
              psi(mx,my,iz,im) = j0(mx,my,iz,im) * phi(mx,my,iz)
              chi(mx,my,iz,im) = j0(mx,my,iz,im) * Al(mx,my,iz)
            end do
          end do
        end do
      end do
!$OMP end do
!$OMP end parallel

    """,

    """
                                           call clock_sta(13)
                                         ! call fapp_start("literm",13,1)
     !%%% Linear collisionless term %%%
      call caldlt_linear( ff, psi, chi, dh )

     !%%% Collision term %%%
      if ( trim(colliflag) == "collisional" ) then

        if ( trim(col_type) == "LB" ) then
          call colli_LB( ff, phi, cf )
        else if ( trim(col_type) == "full" .or. &
                  trim(col_type) == "lorentz" ) then
          !call colli_full( ff, phi, cf )
          call colliimp_calc_colli_full( ff, phi, cf )
        else 
          write(olog,*) "## Illegal choice for col_type!! ---> stop"
          call flush(olog)
          call MPI_Finalize(ierr_mpi)
          stop
        end if

      else if ( trim(colliflag) == "collisionless" ) then

!!$OMP parallel workshare
!        cf(:,:,:,:,:) = (0._DP, 0._DP)
!!$OMP end parallel workshare

      else 

        write(olog,*) "## Illegal choice for colliflag:", colliflag
        call flush(olog)
        call MPI_Finalize(ierr_mpi)
        stop

      end if
    """,

    """
                                         ! call fapp_stop("literm",13,1)
                                           call clock_end(13)

                                           call clock_sta(14)
                                         ! call fapp_start("nlterm",14,1)
     !%%% Nonlinear term %%%
      call exb_NL_term( hh, psi, chi, ef )
                                         ! call fapp_stop("nlterm",14,1)
                                           call clock_end(14)

     !%%% dh/dt = (Linear collisionless) + (Collision) - (Nonlinear) %%%
      if ( trim(colliflag) == "collisional" ) then
!$OMP parallel
        do im = 0, nm
!$OMP do
          do iv = 1, 2*nv
            do iz = -nz, nz-1
              !do my = 0, ny
              do my = ist_y, iend_y
                do mx = -nx, nx
                  dh(mx,my,iz,iv,im) = dh(mx,my,iz,iv,im) &
                                     + cf(mx,my,iz,iv,im) &
                                     - ef(mx,my,iz,iv,im)
                end do
              end do
            end do
          end do
!$OMP end do nowait
        end do
!$OMP end parallel
    """,

    """
      else if ( trim(colliflag) == "collisionless" ) then
!$OMP parallel
        do im = 0, nm
!$OMP do
          do iv = 1, 2*nv
            do iz = -nz, nz-1
              !do my = 0, ny
              do my = ist_y, iend_y
                do mx = -nx, nx
                  dh(mx,my,iz,iv,im) = dh(mx,my,iz,iv,im) &
                                     - ef(mx,my,iz,iv,im)
                end do
              end do
            end do
          end do
!$OMP end do nowait
        end do
!$OMP end parallel
      end if
                                           call clock_sta(15)
                                         ! call fapp_start("zfilter",15,1)
      if ( trim(z_filt) == "on" ) then
        call zfilter( dh )
      end if                                
                                         ! call fapp_stop("zfilter",15,1)
                                           call clock_end(15)

      deallocate( psi )
      deallocate( chi )

  END SUBROUTINE caldlt_rev

    """,

    """

!--------------------------------------
  SUBROUTINE caldlt_linear( ff, psi, chi, dh )
!--------------------------------------
!     increment of delta-f within a time step

    complex(kind=DP), intent(inout), &
      dimension(-nx:nx,0:ny,-nz-nzb:nz-1+nzb,1-nvb:2*nv+nvb,0-nvb:nm+nvb) :: ff
    complex(kind=DP), intent(inout), &
      dimension(-nx:nx,0:ny,-nz-nzb:nz-1+nzb,0:nm) :: psi, chi
    complex(kind=DP), intent(out), &
      dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nv,0:nm) :: dh

    complex(kind=DP), dimension(:,:,:,:), allocatable :: &
                         zb1be, zb1te, zb2be, zb2te, zb1bo, zb1to, zb2bo, zb2to
    complex(kind=DP), dimension(:,:,:,:), allocatable :: vb1e, vb2e, vb1o, vb2o
    integer :: im


      allocate( zb1be(-nx:nx,0:ny,0:nzb-1,1:2*nv) )
      allocate( zb1te(-nx:nx,0:ny,0:nzb-1,1:2*nv) )
      allocate( zb2be(-nx:nx,0:ny,0:nzb-1,1:2*nv) )
      allocate( zb2te(-nx:nx,0:ny,0:nzb-1,1:2*nv) )
      allocate( zb1bo(-nx:nx,0:ny,0:nzb-1,1:2*nv) )
      allocate( zb1to(-nx:nx,0:ny,0:nzb-1,1:2*nv) )
      allocate( zb2bo(-nx:nx,0:ny,0:nzb-1,1:2*nv) )
      allocate( zb2to(-nx:nx,0:ny,0:nzb-1,1:2*nv) )
      allocate( vb1e(-nx:nx,0:ny,-nz:nz-1,1:2*nvb) )
      allocate( vb2e(-nx:nx,0:ny,-nz:nz-1,1:2*nvb) )
      allocate( vb1o(-nx:nx,0:ny,-nz:nz-1,1:2*nvb) )
      allocate( vb2o(-nx:nx,0:ny,-nz:nz-1,1:2*nvb) )

      call bndry_bound_e( psi )

      call literm_k_rev( ff, psi, chi, dh )

    """,

    """

!$OMP parallel default(none) &
!$OMP shared(ff,psi,chi,dh) &
!$OMP shared(zb1be,zb1te,zb2be,zb2te,zb1bo,zb1to,zb2bo,zb2to,vb1e,vb2e,vb1o,vb2o) &
!$OMP private(im)

!!%%% Without overlap %%%
!      do im = 0, nm
!        call bndry_zv_buffin( ff(:,:,:,:,im), zb1be, zb1te, vb1e )
!!$OMP barrier
!!$OMP master
!        call bndry_zv_sendrecv( zb1be, zb1te, zb2be, zb2te, vb1e, vb2e )
!!$OMP end master
!!$OMP barrier
!        call bndry_zv_buffout( zb2be, zb2te, vb2e, ff(:,:,:,:,im) )
!!$OMP barrier
!        call literm_zv( ff(:,:,:,:,im), psi(:,:,:,im), im, dh(:,:,:,:,im) )
!!$OMP barrier
!      end do
!!%%%%%%%%%%%%%%%%%%%%%%%


!!%%% With overlap %%%
      do im = 0, nm+3
        if (mod(im,2) == 0) then ! even
!$OMP master
          if (0+1<=im .and. im<=nm+1) call bndry_zv_sendrecv( zb1bo, zb1to, zb2bo, zb2to, vb1o, vb2o )
!$OMP end master
          if (0  <=im .and. im<=nm  ) call bndry_zv_buffin( ff(:,:,:,:,im), zb1be, zb1te, vb1e )
          if (0+2<=im .and. im<=nm+2) call bndry_zv_buffout( zb2be, zb2te, vb2e, ff(:,:,:,:,im-2) )
          if (0+3<=im .and. im<=nm+3) call literm_zv( ff(:,:,:,:,im-3), psi(:,:,:,im-3), im-3, dh(:,:,:,:,im-3) )
        else                     ! odd
!$OMP master
          if (0+1<=im .and. im<=nm+1) call bndry_zv_sendrecv( zb1be, zb1te, zb2be, zb2te, vb1e, vb2e )
!$OMP end master
          if (0  <=im .and. im<=nm  ) call bndry_zv_buffin( ff(:,:,:,:,im), zb1bo, zb1to, vb1o )
          if (0+2<=im .and. im<=nm+2) call bndry_zv_buffout( zb2bo, zb2to, vb2o, ff(:,:,:,:,im-2) )
          if (0+3<=im .and. im<=nm+3) call literm_zv( ff(:,:,:,:,im-3), psi(:,:,:,im-3), im-3, dh(:,:,:,:,im-3) )
        end if
!$OMP barrier
      end do
!!%%%%%%%%%%%%%%%%%%%%

    """,

    """
!$OMP end parallel

      deallocate( zb1be )
      deallocate( zb1te )
      deallocate( zb2be )
      deallocate( zb2te )
      deallocate( zb1bo )
      deallocate( zb1to )
      deallocate( zb2bo )
      deallocate( zb2to )
      deallocate( vb1e )
      deallocate( vb2e )
      deallocate( vb1o )
      deallocate( vb2o )


  END SUBROUTINE caldlt_linear

    """,

    """

!--------------------------------------
  SUBROUTINE literm_k_rev ( ff, psi, chi, lf )
!--------------------------------------
!     z-derivative of ff

    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,-nz-nzb:nz-1+nzb,1-nvb:2*nv+nvb,0-nvb:nm+nvb) :: ff
    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,-nz-nzb:nz-1+nzb,0:nm) :: psi, chi
    complex(kind=DP), intent(out), &
      dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nv,0:nm) :: lf

    real(kind=DP) :: cs1, cs2, kvd, kvs
    integer  ::  mx, my, iz, iv, im


                                           call clock_sta(1320)
                                         ! call fapp_start("literm_perp",1320,1)

      cs1    = sgn(ranks) * Znum(ranks) / tau(ranks)
      cs2    = sqrt( tau(ranks) / Anum(ranks) )

!$OMP parallel do collapse(3) private(kvd,kvs)
      do im = 0, nm
        do iv = 1, 2*nv
          do iz = -nz, nz-1
            do my = ist_y, iend_y
              do mx = -nx, nx
                kvd = kx(mx) * vdx(iz,iv,im) + ky(my) * vdy(iz,iv,im)
                kvs = ky(my) * vsy(iz,iv,im)
                lf(mx,my,iz,iv,im) =                  &
                   - ui * kvd * ff(mx,my,iz,iv,im)    &
                   - cs1 * fmx(iz,iv,im) * (          &
                       + ui * kvd * psi(mx,my,iz,im)  &
                       - ui * kvs                     &
                            * ( psi(mx,my,iz,im) - cs2 * vl(iv) * chi(mx,my,iz,im) ) )
              end do
            end do
          end do
        end do
      end do
                                         ! call fapp_stop("literm_perp",1320,1)
                                           call clock_end(1320)


  END SUBROUTINE literm_k_rev

    """,

    """
!--------------------------------------
  SUBROUTINE literm_zv ( ff, psi, im, lf )
!--------------------------------------
!     (z,v)-derivative of ff

    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,-nz-nzb:nz-1+nzb,1-nvb:2*nv+nvb) :: ff
    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,-nz-nzb:nz-1+nzb)            :: psi
    integer, intent(in) :: im
    complex(kind=DP), intent(inout), &
      dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nv)             :: lf

    real(kind=DP), dimension(-nz:nz-1) :: cefz, cefz2
    real(kind=DP) :: cefv, cs1, rotating_cf4, rotating_up5
    integer  ::  mx, my, iz, iv


!$OMP master
                                           call clock_sta(1330)
                                         ! call fapp_start("literm_para",1330,1)
!$OMP end master

      cs1    = sgn(ranks) * Znum(ranks) / tau(ranks)
      do iz = -nz, nz-1
        cefz(iz)   = 1._DP / ( 12._DP * dpara(iz) ) * sqrt( tau(ranks) / Anum(ranks) )
        cefz2(iz)  = 1._DP / ( 60._DP * dpara(iz) ) * sqrt( tau(ranks) / Anum(ranks) )
      end do
      cefv   = 1._DP / ( 12._DP * dv ) * sqrt( tau(ranks) / Anum(ranks) )
     !%%% For shearflow rotating flux tube model %%%
      if (gamma_e /= 0._DP .and. trim(flag_shearflow) == "rotating") then
        rotating_cf4 = - gamma_e / (s_hat_g * 12._DP * (zz(0)-zz(-1)))
        rotating_up5 = - gamma_e / (s_hat_g * 60._DP * (zz(0)-zz(-1)))
      else
        rotating_cf4 = 0._DP
        rotating_up5 = 0._DP
      end if
     !%%%

      if (trim(z_calc) == "cf4") then

!$OMP do collapse(2) schedule(dynamic,nchunk_zv)
      do iv = 1, 2*nv
        do iz = -nz, nz-1
          do my = ist_y, iend_y
            do mx = -nx, nx
              lf(mx,my,iz,iv) = lf(mx,my,iz,iv)       &
              !%%% For shearflow rotating flux tube model %%%
              !!!- vl(iv) * cefz(iz) * (              &
                 - (vl(iv) * cefz(iz) + rotating_cf4) * ( &
              !%%%
                     -         ff(mx,my,iz+2,iv)      &
                     + 8._DP * ff(mx,my,iz+1,iv)      &
                     - 8._DP * ff(mx,my,iz-1,iv)      &
                     +         ff(mx,my,iz-2,iv) )    &
                 + mir(iz,im) * cefv * (              &
                     -         ff(mx,my,iz,iv+2)      &
                     + 8._DP * ff(mx,my,iz,iv+1)      &
                     - 8._DP * ff(mx,my,iz,iv-1)      &
                     +         ff(mx,my,iz,iv-2) )    &
                 - cs1 * fmx(iz,iv,im) * (            &
                       vl(iv) * cefz(iz) * (          &
                         -         psi(mx,my,iz+2)    &
                         + 8._DP * psi(mx,my,iz+1)    &
                         - 8._DP * psi(mx,my,iz-1)    &
                         +         psi(mx,my,iz-2) ) )&
                 - art_diff * (                       &
                     +         ff(mx,my,iz+2,iv)      &
                     - 4._DP * ff(mx,my,iz+1,iv)      &
                     + 6._DP * ff(mx,my,iz  ,iv)      &
                     - 4._DP * ff(mx,my,iz-1,iv)      &
                     +         ff(mx,my,iz-2,iv) )
            end do
          end do
        end do
      end do
!$OMP end do nowait

    """,

    """
      else if (trim(z_calc) == "up5") then

        do iv = 1, 2*nv
          if ( vl(iv) > 0._DP ) then
!$OMP do collapse(2) schedule(dynamic,nchunk_yz)
            do iz = -nz, nz-1
              do my = ist_y, iend_y
                do mx = -nx, nx
                  lf(mx,my,iz,iv) = lf(mx,my,iz,iv)       &
                    !%%% For shearflow rotating flux tube model %%%
                     - rotating_cf4 * (                   &
                         -         ff(mx,my,iz+2,iv)      &
                         + 8._DP * ff(mx,my,iz+1,iv)      &
                         - 8._DP * ff(mx,my,iz-1,iv)      &
                         +         ff(mx,my,iz-2,iv) )    &
                    !%%%
                     - vl(iv) * cefz2(iz) * (             &
                         - 3._DP * ff(mx,my,iz+2,iv)      &
                         +30._DP * ff(mx,my,iz+1,iv)      &
                         +20._DP * ff(mx,my,iz  ,iv)      &
                         -60._DP * ff(mx,my,iz-1,iv)      &
                         +15._DP * ff(mx,my,iz-2,iv)      &
                         - 2._DP * ff(mx,my,iz-3,iv) )    &
                     + mir(iz,im) * cefv * (              &
                         -         ff(mx,my,iz,iv+2)      &
                         + 8._DP * ff(mx,my,iz,iv+1)      &
                         - 8._DP * ff(mx,my,iz,iv-1)      &
                         +         ff(mx,my,iz,iv-2) )    &
                     - cs1 * fmx(iz,iv,im) * (            &
                           vl(iv) * cefz(iz) * (          &
                             -         psi(mx,my,iz+2)    &
                             + 8._DP * psi(mx,my,iz+1)    &
                             - 8._DP * psi(mx,my,iz-1)    &
                             +         psi(mx,my,iz-2) ) )
                end do
              end do
            end do
!$OMP end do nowait
    """,

    """
          else
!$OMP do collapse(2) schedule(dynamic,nchunk_yz)
            do iz = -nz, nz-1
              do my = ist_y, iend_y
                do mx = -nx, nx
                  lf(mx,my,iz,iv) = lf(mx,my,iz,iv)       &
                    !%%% For shearflow rotating flux tube model %%%
                     - rotating_cf4 * (                   &
                         -         ff(mx,my,iz+2,iv)      &
                         + 8._DP * ff(mx,my,iz+1,iv)      &
                         - 8._DP * ff(mx,my,iz-1,iv)      &
                         +         ff(mx,my,iz-2,iv) )    &
                    !%%%
                     - vl(iv) * cefz2(iz) * (             &
                         + 2._DP * ff(mx,my,iz+3,iv)      &
                         -15._DP * ff(mx,my,iz+2,iv)      &
                         +60._DP * ff(mx,my,iz+1,iv)      &
                         -20._DP * ff(mx,my,iz  ,iv)      &
                         -30._DP * ff(mx,my,iz-1,iv)      &
                         + 3._DP * ff(mx,my,iz-2,iv) )    &
                     + mir(iz,im) * cefv * (              &
                         -         ff(mx,my,iz,iv+2)      &
                         + 8._DP * ff(mx,my,iz,iv+1)      &
                         - 8._DP * ff(mx,my,iz,iv-1)      &
                         +         ff(mx,my,iz,iv-2) )    &
                     - cs1 * fmx(iz,iv,im) * (            &
                           vl(iv) * cefz(iz) * (          &
                             -         psi(mx,my,iz+2)    &
                             + 8._DP * psi(mx,my,iz+1)    &
                             - 8._DP * psi(mx,my,iz-1)    &
                             +         psi(mx,my,iz-2) ) )
                end do
              end do
            end do
!$OMP end do nowait
          end if
        end do

      else

        write(olog,*) "## Illegal choice for z_calc!! ---> stop"
        call flush(olog)
        call MPI_Finalize(ierr_mpi)
        stop

      end if

!$OMP master
                                         ! call fapp_stop("literm_para",1330,1)
                                           call clock_end(1330)
!$OMP end master


  END SUBROUTINE literm_zv


END MODULE GKV_advnc

    """]

chunks2 = [
    """
MODULE GKV_bndry
!-------------------------------------------------------------------------------
!
!    MPI send/recv communications in zz,vl,mu
!
!    Update history of gkvp_bndry.f90
!    --------------
!      gkvp_f0.57 (S. Maeyama, Oct 2020)
!        - Version number f0.57 is removed from filename.
!
!-------------------------------------------------------------------------------

  use GKV_header
  use GKV_mpienv
  use GKV_clock, only: clock_sta, clock_end

  implicit none

  private

  public   bndry_zvm_bound_f, bndry_bound_e,  &
      bndry_bound_f_buffin, bndry_bound_f_sendrecv, bndry_bound_f_buffout,  &
      bndry_shifts_v_buffin, bndry_shifts_v_sendrecv, bndry_shifts_v_buffout,  &
      bndry_zv_buffin, bndry_zv_sendrecv, bndry_zv_buffout, &
      bndry_vm_buffin, bndry_vm_sendrecv, bndry_vm_buffout, &
      bndry_shifts_m_buffin, bndry_shifts_m_sendrecv, bndry_shifts_m_buffout


CONTAINS

    """,

    """

!--------------------------------------
  SUBROUTINE bndry_zvm_bound_f( ff )
!--------------------------------------

    complex(kind=DP), intent(inout), &
      dimension(-nx:nx,0:ny,-nz-nzb:nz-1+nzb,1-nvb:2*nv+nvb,0-nvb:nm+nvb) :: ff

    complex(kind=DP), dimension(:,:,:,:,:), allocatable :: zb1_bottom, zb1_top
    complex(kind=DP), dimension(:,:,:,:,:), allocatable :: zb2_bottom, zb2_top
    complex(kind=DP), dimension(:,:,:,:,:), allocatable :: vb1, vb2
    complex(kind=DP), dimension(:,:,:,:,:), allocatable :: mb1, mb2
    integer :: im

      allocate( zb1_bottom(-nx:nx,0:ny,0:nzb-1,1:2*nv,0:nm) )
      allocate( zb1_top(-nx:nx,0:ny,0:nzb-1,1:2*nv,0:nm) )
      allocate( zb2_bottom(-nx:nx,0:ny,0:nzb-1,1:2*nv,0:nm) )
      allocate( zb2_top(-nx:nx,0:ny,0:nzb-1,1:2*nv,0:nm) )
      allocate( vb1(-nx:nx,0:ny,-nz:nz-1,1:2*nvb,0:nm) )
      allocate( vb2(-nx:nx,0:ny,-nz:nz-1,1:2*nvb,0:nm) )
      allocate( mb1(-nx:nx,0:ny,-nz:nz-1,1:2*nv,1:2*nvb) )
      allocate( mb2(-nx:nx,0:ny,-nz:nz-1,1:2*nv,1:2*nvb) )

!$OMP parallel default (none) &
!$OMP shared(ff,zb1_bottom,zb1_top,zb2_bottom,zb2_top,vb1,vb2,mb1,mb2) &
!$OMP private(im)
      do im = 0, nm
        call bndry_bound_f_buffin ( ff(:,:,:,:,im), zb1_bottom(:,:,:,:,im), zb1_top(:,:,:,:,im) )
!$OMP barrier
!$OMP master
        call bndry_bound_f_sendrecv ( zb1_bottom(:,:,:,:,im), zb1_top(:,:,:,:,im),  &
                                      zb2_bottom(:,:,:,:,im), zb2_top(:,:,:,:,im) )
!$OMP end master
!$OMP barrier
        call bndry_bound_f_buffout ( zb2_bottom(:,:,:,:,im), zb2_top(:,:,:,:,im), ff(:,:,:,:,im) )
      end do
!$OMP barrier

      do im = 0, nm
        call bndry_shifts_v_buffin ( ff(:,:,:,:,im), vb1(:,:,:,:,im), vb2(:,:,:,:,im) )
!$OMP barrier
!$OMP master
        call bndry_shifts_v_sendrecv ( vb1(:,:,:,:,im), vb2(:,:,:,:,im) )
!$OMP end master
!$OMP barrier
        call bndry_shifts_v_buffout ( vb2(:,:,:,:,im), ff(:,:,:,:,im) )
      end do
!$OMP barrier
    """,

    """
      do im = 0, nm
        call bndry_shifts_v_buffin ( ff(:,:,:,:,im), vb1(:,:,:,:,im), vb2(:,:,:,:,im) )
!$OMP barrier
!$OMP master
        call bndry_shifts_v_sendrecv ( vb1(:,:,:,:,im), vb2(:,:,:,:,im) )
!$OMP end master
!$OMP barrier
        call bndry_shifts_v_buffout ( vb2(:,:,:,:,im), ff(:,:,:,:,im) )
      end do
!$OMP barrier

      call bndry_shifts_m_buffin ( ff, mb1, mb2 )
!$OMP barrier
!$OMP master
      call bndry_shifts_m_sendrecv ( mb1, mb2 )
!$OMP end master
!$OMP barrier
      call bndry_shifts_m_buffout ( mb2, ff )
!$OMP end parallel

      deallocate( zb1_bottom )
      deallocate( zb1_top )
      deallocate( zb2_bottom )
      deallocate( zb2_top )
      deallocate( vb1 )
      deallocate( vb2 )
      deallocate( mb1 )
      deallocate( mb2 )

  END SUBROUTINE bndry_zvm_bound_f

    """,

    """

!--------------------------------------
  SUBROUTINE bndry_bound_f_buffin( ff, zb1_bottom, zb1_top )
!--------------------------------------
!   Impose the modified periodic boundary condition 
!     in the z-direction for the distribution function

    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,-nz-nzb:nz-1+nzb,1-nvb:2*nv+nvb) :: ff
    complex(kind=DP), intent(out), &
      dimension(-nx:nx,0:ny,0:nzb-1,1:2*nv) :: zb1_bottom, zb1_top

    integer  ::  mx, my, iz, iv


!$OMP master
                                           call clock_sta(1351)
                                         ! call fapp_start("literm_boundf_bufferin",1351,1)
!$OMP end master

!$OMP do schedule (dynamic)
        do iv = 1, 2*nv
          do iz = 0, nzb-1
            do my = ist_y, iend_y
              do mx = -nx, nx
                zb1_bottom(mx,my,iz,iv) = ff(mx,my,-nz+iz  ,iv)
                zb1_top   (mx,my,iz,iv) = ff(mx,my, nz-nzb+iz,iv)
              end do
            end do
          end do
        end do
!$OMP end do nowait

!$OMP master
                                         ! call fapp_stop("literm_boundf_bufferin",1351,1)
                                           call clock_end(1351)
!$OMP end master


  END SUBROUTINE bndry_bound_f_buffin

    """,

    """

!--------------------------------------
  SUBROUTINE bndry_bound_f_sendrecv ( zb1_bottom, zb1_top, zb2_bottom, zb2_top )
!--------------------------------------
!   Impose the modified periodic boundary condition 
!     in the z-direction for the distribution function

    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,0:nzb-1,1:2*nv) :: zb1_bottom, zb1_top
    complex(kind=DP), intent(out), &
      dimension(-nx:nx,0:ny,0:nzb-1,1:2*nv) :: zb2_bottom, zb2_top
    integer  ::  slngz
    integer, dimension(4) :: ireq
    integer, dimension(MPI_STATUS_SIZE,4) :: istatus


      slngz  = (2*nx+1)*(ny+1)*(2*nv) * nzb

                                           call clock_sta(1352)
                                         ! call fapp_start("literm_boundf_sendrecv",1352,1)
!      call MPI_sendrecv( zb1_bottom, slngz, MPI_DOUBLE_COMPLEX, izdn, 1, &
!                         zb2_top,    slngz, MPI_DOUBLE_COMPLEX, izup, 1, &
!                         sub_comm_world, status, ierr_mpi )
!
!      call MPI_sendrecv( zb1_top,    slngz, MPI_DOUBLE_COMPLEX, izup, 2, &
!                         zb2_bottom, slngz, MPI_DOUBLE_COMPLEX, izdn, 2, &
!                         sub_comm_world, status, ierr_mpi )

      call MPI_irecv( zb2_top,    slngz, MPI_DOUBLE_COMPLEX, izup, 1, &
                      sub_comm_world, ireq(1), ierr_mpi )
      call MPI_irecv( zb2_bottom, slngz, MPI_DOUBLE_COMPLEX, izdn, 2, &
                      sub_comm_world, ireq(2), ierr_mpi )
      call MPI_isend( zb1_bottom, slngz, MPI_DOUBLE_COMPLEX, izdn, 1, &
                      sub_comm_world, ireq(3), ierr_mpi )
      call MPI_isend( zb1_top,    slngz, MPI_DOUBLE_COMPLEX, izup, 2, &
                      sub_comm_world, ireq(4), ierr_mpi )
      call MPI_waitall( 4, ireq, istatus, ierr_mpi )
                                         ! call fapp_stop("literm_boundf_sendrecv",1352,1)
                                           call clock_end(1352)


  END SUBROUTINE bndry_bound_f_sendrecv

    """,

    """

!--------------------------------------
  SUBROUTINE bndry_bound_f_buffout ( zb2_bottom, zb2_top, ff )
!--------------------------------------
!   Impose the modified periodic boundary condition 
!     in the z-direction for the distribution function

    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,0:nzb-1,1:2*nv) :: zb2_bottom, zb2_top
    complex(kind=DP), intent(inout), &
      dimension(-nx:nx,0:ny,-nz-nzb:nz-1+nzb,1-nvb:2*nv+nvb) :: ff

    integer  ::  mx, my, iz, iv, mwn, mwp


! --- substitution
!$OMP master
                                           call clock_sta(1353)
                                         ! call fapp_start("literm_boundf_bufferout",1353,1)
!$OMP end master

      if( rankz /= 0 ) then

!$OMP do schedule (dynamic)
          do iv = 1, 2*nv
            do iz = 0, nzb-1
              do my = ist_y, iend_y
                do mx = -nx, nx
                  ff(mx,my,-nz-nzb+iz,iv) = zb2_bottom(mx,my,iz,iv)
                end do
              end do
            end do
          end do
!$OMP end do nowait

      else  ! rankz==0

        if ( trim(z_bound) == "outflow" .OR. trim(z_bound) == "mixed") then

!$OMP do schedule (dynamic)
          do iv = 1, 2*nv
              do my = ist_y, iend_y
                do mx = -nx, nx
                  mwn   = mx + dj(my)          ! --- mw = mx + dj for the negative-z 

                  if( abs(mwn) > nx ) then
                    if ( vl(iv) > 0._DP ) then ! inflow
                      do iz = 0, nzb-1
                        ff(mx,my,-nz-nzb+iz,iv) = ( 0._DP, 0._DP )
                      end do
                    else                       ! outflow
                      ff(mx,my,-nz-1,iv) =   ff(mx,my,-nz  ,iv)
                      ff(mx,my,-nz-2,iv) = - ff(mx,my,-nz+1,iv) + 2._DP * ff(mx,my,-nz  ,iv)
                    end if
                  else
                    do iz = 0, nzb-1
                      ff(mx,my,-nz-nzb+iz,iv) = ck(my) * zb2_bottom(mwn,my,iz,iv)
                    end do
                  end if

                end do
              end do
          end do
!$OMP end do nowait

        else if ( trim(z_bound) == "zerofixed" ) then

!$OMP do schedule (dynamic)
          do iv = 1, 2*nv
            do iz = 0, nzb-1
              do my = ist_y, iend_y
                do mx = -nx, nx
                  mwn   = mx + dj(my)          ! --- mw = mx + dj for the negative-z 

                  if( abs(mwn) > nx ) then
                    ff(mx,my,-nz-nzb+iz,iv) = ( 0._DP, 0._DP )
                  else
                    ff(mx,my,-nz-nzb+iz,iv) = ck(my) * zb2_bottom(mwn,my,iz,iv)
                  end if

                end do
              end do
            end do
          end do
!$OMP end do nowait

        else

          write( olog, * ) " # z_bound is to be  outflow  or  zerofixed"
          call flush(olog)
          stop

        end if

      end if
    """,

    """
      if( rankz /= nprocz-1 ) then

!$OMP do schedule (dynamic)
          do iv = 1, 2*nv
            do iz = 0, nzb-1
              do my = ist_y, iend_y
                do mx = -nx, nx
                  ff(mx,my,nz+iz,iv) = zb2_top(mx,my,iz,iv)
                end do
              end do
            end do
          end do
!$OMP end do nowait

      else ! rankz==nprocz-1

        if ( trim(z_bound) == "outflow" .OR. trim(z_bound) == "mixed") then

!$OMP do schedule (dynamic)
          do iv = 1, 2*nv
              do my = ist_y, iend_y
                do mx = -nx, nx
                  mwp   = mx - dj(my)          ! --- mw = mx - dj for the positive-z 

                  if( abs(mwp) > nx ) then
                    if ( vl(iv) > 0._DP ) then ! outflow
                      ff(mx,my,nz  ,iv) =   ff(mx,my,nz-1,iv)
                      ff(mx,my,nz+1,iv) = - ff(mx,my,nz-2,iv) + 2._DP * ff(mx,my,nz-1,iv)
                    else                       ! inflow
                      do iz = 0, nzb-1
                        ff(mx,my,nz+iz,iv) = ( 0._DP, 0._DP )
                      end do
                    end if
                  else
                    do iz = 0, nzb-1
                      ff(mx,my,nz+iz,iv) = conjg( ck(my) ) * zb2_top(mwp,my,iz,iv)
                    end do
                  end if

                end do
              end do
          end do
!$OMP end do nowait

        else if ( trim(z_bound) == "zerofixed" ) then

    """,

    """
!$OMP do schedule (dynamic)
          do iv = 1, 2*nv
            do iz = 0, nzb-1
              do my = ist_y, iend_y
                do mx = -nx, nx
                  mwp   = mx - dj(my)          ! --- mw = mx - dj for the positive-z 

                  if( abs(mwp) > nx ) then
                    ff(mx,my,nz+iz,iv) = ( 0._DP, 0._DP )
                  else
                    ff(mx,my,nz+iz,iv) = conjg( ck(my) ) * zb2_top(mwp,my,iz,iv)
                  end if

                end do
              end do
            end do
          end do
!$OMP end do nowait

        else

          write( olog, * ) " # z_bound is to be  outflow  or  zerofixed"
          call flush(olog)
          stop

        end if

      end if

!$OMP master
                                         ! call fapp_stop("literm_boundf_bufferout",1353,1)
                                           call clock_end(1353)
!$OMP end master


  END SUBROUTINE bndry_bound_f_buffout



    """,

    """
!--------------------------------------
  SUBROUTINE bndry_shifts_v_buffin( ff, vb1, vb2 )
!--------------------------------------
!     Shift communications in v and m directions

    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,-nz-nzb:nz-1+nzb,1-nvb:2*nv+nvb) :: ff
    complex(kind=DP), intent(out),  &
      dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nvb) :: vb1, vb2

    integer  ::  mx, my, iz, iv


!$OMP master
                                           call clock_sta(1361)
                                         ! call fapp_start("literm_shifts_bufferin",1361,1)
!$OMP end master
! --- zero clear is required for rankv = 0, nprocv-1 and rankm = 0, nprocm-1
      do iv = 1, 2*nvb
!$OMP do schedule (dynamic)
          do iz = -nz, nz-1
            do my = ist_y, iend_y
              do mx = -nx, nx
                vb2(mx,my,iz,iv) = ( 0._DP, 0._DP )
              end do
            end do
          end do
!$OMP end do nowait
      end do

!$OMP do schedule (dynamic)
        do iz = -nz, nz-1
          do my = ist_y, iend_y
            do mx = -nx, nx
              do iv = 1, nvb
                vb1(mx,my,iz,iv    ) = ff(mx,my,iz,         iv)
                vb1(mx,my,iz,iv+nvb) = ff(mx,my,iz,2*nv-nvb+iv)
!              vb1(mx,my,iz,1) = ff(mx,my,iz,     1)
!              vb1(mx,my,iz,2) = ff(mx,my,iz,     2)
!              vb1(mx,my,iz,3) = ff(mx,my,iz,     3)
!              vb1(mx,my,iz,4) = ff(mx,my,iz,2*nv-2)
!              vb1(mx,my,iz,5) = ff(mx,my,iz,2*nv-1)
!              vb1(mx,my,iz,6) = ff(mx,my,iz,2*nv  )
              end do
            end do
          end do
        end do
!$OMP end do nowait


!$OMP master
                                         ! call fapp_stop("literm_shifts_bufferin",1361,1)
                                           call clock_end(1361)
!$OMP end master


  END SUBROUTINE bndry_shifts_v_buffin


    """,

    """
!--------------------------------------
  SUBROUTINE bndry_shifts_v_sendrecv( vb1, vb2 )
!--------------------------------------
!     Shift communications in v and m directions

    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nvb) :: vb1
    complex(kind=DP), intent(inout), &
      dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nvb) :: vb2
    integer  ::  slngv
    integer, dimension(4) :: ireq
    integer, dimension(MPI_STATUS_SIZE,4) :: istatus


      slngv = (2*nx+1)*(ny+1)*(2*nz) * nvb

                                           call clock_sta(1362)
                                         ! call fapp_start("literm_shifts_sendrecv",1362,1)
!      call MPI_sendrecv( vb1(-nx,0,-nz,1    ), slngv, MPI_DOUBLE_COMPLEX, ivdn, 1, &
!                         vb2(-nx,0,-nz,nvb+1), slngv, MPI_DOUBLE_COMPLEX, ivup, 1, &
!                         sub_comm_world, status, ierr_mpi )
!
!      call MPI_sendrecv( vb1(-nx,0,-nz,nvb+1), slngv, MPI_DOUBLE_COMPLEX, ivup, 2, &
!                         vb2(-nx,0,-nz,1    ), slngv, MPI_DOUBLE_COMPLEX, ivdn, 2, &
!                         sub_comm_world, status, ierr_mpi )

      call MPI_irecv( vb2(-nx,0,-nz,nvb+1), slngv, MPI_DOUBLE_COMPLEX, ivup, 1, &
                      sub_comm_world, ireq(1), ierr_mpi )
      call MPI_irecv( vb2(-nx,0,-nz,    1), slngv, MPI_DOUBLE_COMPLEX, ivdn, 2, &
                      sub_comm_world, ireq(2), ierr_mpi )
      call MPI_isend( vb1(-nx,0,-nz,    1), slngv, MPI_DOUBLE_COMPLEX, ivdn, 1, &
                      sub_comm_world, ireq(3), ierr_mpi )
      call MPI_isend( vb1(-nx,0,-nz,nvb+1), slngv, MPI_DOUBLE_COMPLEX, ivup, 2, &
                      sub_comm_world, ireq(4), ierr_mpi )
      call MPI_waitall( 4, ireq, istatus, ierr_mpi )
                                         ! call fapp_stop("literm_shifts_sendrecv",1362,1)
                                           call clock_end(1362)


  END SUBROUTINE bndry_shifts_v_sendrecv

    """,

    """

!--------------------------------------
  SUBROUTINE bndry_shifts_v_buffout( vb2, ff )
!--------------------------------------
!     Shift communications in v and m directions

    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nvb) :: vb2
    complex(kind=DP), intent(inout), &
      dimension(-nx:nx,0:ny,-nz-nzb:nz-1+nzb,1-nvb:2*nv+nvb) :: ff

    integer  ::  mx, my, iz, iv


!$OMP master
                                           call clock_sta(1363)
                                         ! call fapp_start("literm_shifts_bufferout",1363,1)
!$OMP end master

!$OMP do schedule (dynamic)
        do iz = -nz, nz-1
          do my = ist_y, iend_y
            do mx = -nx, nx
              do iv = 1, nvb
                ff(mx,my,iz,-nvb+iv) = vb2(mx,my,iz,iv    )
                ff(mx,my,iz,2*nv+iv) = vb2(mx,my,iz,iv+nvb)
!              ff(mx,my,iz,    -2) = vb2(mx,my,iz,1)
!              ff(mx,my,iz,    -1) = vb2(mx,my,iz,2)
!              ff(mx,my,iz,     0) = vb2(mx,my,iz,3)
!              ff(mx,my,iz,2*nv+1) = vb2(mx,my,iz,4)
!              ff(mx,my,iz,2*nv+2) = vb2(mx,my,iz,5)
!              ff(mx,my,iz,2*nv+3) = vb2(mx,my,iz,6)
              end do
            end do
          end do
        end do
!$OMP end do nowait


!$OMP master
                                         ! call fapp_stop("literm_shifts_bufferout",1363,1)
                                           call clock_end(1363)
!$OMP end master


  END SUBROUTINE bndry_shifts_v_buffout

    """,

    """

!--------------------------------------
  SUBROUTINE bndry_shifts_m_buffin( ff, mb1, mb2 )
!--------------------------------------
!     Shift communications in v and m directions

    complex(kind=DP), intent(inout), &
      dimension(-nx:nx,0:ny,-nz-nzb:nz-1+nzb,1-nvb:2*nv+nvb,0-nvb:nm+nvb) :: ff
    complex(kind=DP), intent(out), &
      dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nv,1:2*nvb) :: mb1, mb2

    integer  ::  mx, my, iz, iv, im


!$OMP master
                                           call clock_sta(1371)
                                         ! call fapp_start("literm_shifts_bufferin",1371,1)
!$OMP end master

! --- zero clear is required for rankv = 0, nprocv-1 and rankm = 0, nprocm-1
      do im = 1, 2*nvb
!$OMP do schedule (dynamic)
        do iv = 1, 2*nv
          do iz = -nz, nz-1
            do my = ist_y, iend_y
              do mx = -nx, nx
                mb2(mx,my,iz,iv,im) = ( 0._DP, 0._DP )
              end do
            end do
          end do
        end do
!$OMP end do nowait
      end do

!$OMP do schedule (dynamic)
      do iv = 1, 2*nv
        do iz = -nz, nz-1
          do my = ist_y, iend_y
            do mx = -nx, nx
              do im = 1, nvb
                mb1(mx,my,iz,iv,im    ) = ff(mx,my,iz,iv,     im-1)
                mb1(mx,my,iz,iv,im+nvb) = ff(mx,my,iz,iv,nm-nvb+im)
!              mb1(mx,my,iz,iv,1) = ff(mx,my,iz,iv,   0)
!              mb1(mx,my,iz,iv,2) = ff(mx,my,iz,iv,   1)
!              mb1(mx,my,iz,iv,3) = ff(mx,my,iz,iv,   2)
!              mb1(mx,my,iz,iv,4) = ff(mx,my,iz,iv,nm-2)
!              mb1(mx,my,iz,iv,5) = ff(mx,my,iz,iv,nm-1)
!              mb1(mx,my,iz,iv,6) = ff(mx,my,iz,iv,nm  )
              end do
            end do
          end do
        end do
      end do
!$OMP end do nowait


!$OMP master
                                         ! call fapp_stop("literm_shifts_bufferin",1371,1)
                                           call clock_end(1371)
!$OMP end master


  END SUBROUTINE bndry_shifts_m_buffin

    """,

    """

!--------------------------------------
  SUBROUTINE bndry_shifts_m_sendrecv( mb1, mb2 )
!--------------------------------------
!     Shift communications in v and m directions

    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nv,1:2*nvb) :: mb1
    complex(kind=DP), intent(inout), &
      dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nv,1:2*nvb) :: mb2
    integer  ::  slngm
    integer, dimension(4) :: ireq
    integer, dimension(MPI_STATUS_SIZE,4) :: istatus


      slngm = (2*nx+1)*(ny+1)*(2*nz)*(2*nv) * nvb

                                           call clock_sta(1372)
                                         ! call fapp_start("literm_shifts_sendrecv",1372,1)
!      call MPI_sendrecv( mb1(-nx,0,-nz,1,1    ), slngm, MPI_DOUBLE_COMPLEX, imdn, 3, &
!                         mb2(-nx,0,-nz,1,nvb+1), slngm, MPI_DOUBLE_COMPLEX, imup, 3, &
!                         sub_comm_world, status, ierr_mpi )
!
!      call MPI_sendrecv( mb1(-nx,0,-nz,1,nvb+1), slngm, MPI_DOUBLE_COMPLEX, imup, 4, &
!                         mb2(-nx,0,-nz,1,1    ), slngm, MPI_DOUBLE_COMPLEX, imdn, 4, &
!                         sub_comm_world, status, ierr_mpi )

      call MPI_irecv( mb2(-nx,0,-nz,1,nvb+1), slngm, MPI_DOUBLE_COMPLEX, imup, 3, &
                      sub_comm_world, ireq(1), ierr_mpi )
      call MPI_irecv( mb2(-nx,0,-nz,1,    1), slngm, MPI_DOUBLE_COMPLEX, imdn, 4, &
                      sub_comm_world, ireq(2), ierr_mpi )
      call MPI_isend( mb1(-nx,0,-nz,1,    1), slngm, MPI_DOUBLE_COMPLEX, imdn, 3, &
                      sub_comm_world, ireq(3), ierr_mpi )
      call MPI_isend( mb1(-nx,0,-nz,1,nvb+1), slngm, MPI_DOUBLE_COMPLEX, imup, 4, &
                      sub_comm_world, ireq(4), ierr_mpi )
      call MPI_waitall( 4, ireq, istatus, ierr_mpi )
                                         ! call fapp_stop("literm_shifts_sendrecv",1372,1)
                                           call clock_end(1372)


  END SUBROUTINE bndry_shifts_m_sendrecv


    """,

    """

!--------------------------------------
  SUBROUTINE bndry_shifts_m_buffout( mb2, ff )
!--------------------------------------
!     Shift communications in v and m directions

    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nv,1:2*nvb) :: mb2
    complex(kind=DP), intent(inout), &
      dimension(-nx:nx,0:ny,-nz-nzb:nz-1+nzb,1-nvb:2*nv+nvb,0-nvb:nm+nvb) :: ff

    integer  ::  mx, my, iz, iv, im


!$OMP master
                                           call clock_sta(1373)
                                         ! call fapp_start("literm_shifts_bufferout",1373,1)
!$OMP end master

!$OMP do schedule (dynamic)
      do iv = 1, 2*nv
        do iz = -nz, nz-1
          do my = ist_y, iend_y
            do mx = -nx, nx
              do im = 1, nvb
                ff(mx,my,iz,iv,-nvb-1+im) = mb2(mx,my,iz,iv,im    )
                ff(mx,my,iz,iv,nm+im    ) = mb2(mx,my,iz,iv,im+nvb)
!              ff(mx,my,iz,iv,  -3) = mb2(mx,my,iz,iv,1)
!              ff(mx,my,iz,iv,  -2) = mb2(mx,my,iz,iv,2)
!              ff(mx,my,iz,iv,  -1) = mb2(mx,my,iz,iv,3)
!              ff(mx,my,iz,iv,nm+1) = mb2(mx,my,iz,iv,4)
!              ff(mx,my,iz,iv,nm+2) = mb2(mx,my,iz,iv,5)
!              ff(mx,my,iz,iv,nm+3) = mb2(mx,my,iz,iv,6)
              end do
            end do
          end do
        end do
      end do
!$OMP end do nowait

!$OMP master
                                         ! call fapp_stop("literm_shifts_bufferout",1373,1)
                                           call clock_end(1373)
!$OMP end master


  END SUBROUTINE bndry_shifts_m_buffout


    """,

    """
!--------------------------------------
  SUBROUTINE bndry_bound_e ( ew )
!--------------------------------------
!  Impose the modified periodic boundary condition 
!    in the z-direction for the electric field

    complex(kind=DP), intent(inout), &
      dimension(-nx:nx,0:ny,-nz-nzb:nz-1+nzb,0:nm)   :: ew

    complex(kind=DP), dimension(:,:,:,:), allocatable :: zb1e_bottom, zb1e_top
    complex(kind=DP), dimension(:,:,:,:), allocatable :: zb2e_bottom, zb2e_top
    integer  ::  mx, my, iz, im, mwn, mwp
    integer  ::  slngze
    integer, dimension(4) :: ireq
    integer, dimension(MPI_STATUS_SIZE,4) :: istatus


      allocate( zb1e_bottom(-nx:nx,0:ny,0:nzb-1,0:nm) )
      allocate( zb1e_top(-nx:nx,0:ny,0:nzb-1,0:nm) )
      allocate( zb2e_bottom(-nx:nx,0:ny,0:nzb-1,0:nm) )
      allocate( zb2e_top(-nx:nx,0:ny,0:nzb-1,0:nm) )

      slngze  = (2*nx+1)*(ny+1)*(nm+1) * nzb

!$OMP parallel default(none) &
!$OMP shared(zb2e_bottom,zb2e_top,zb1e_bottom,zb1e_top,ist_y,iend_y,ew) &
!$OMP private(mx,my,iz,im)
!$OMP master
                                           call clock_sta(1381)
                                         ! call fapp_start("literm_bounde_bufferin",1381,1)
!$OMP end master

!$OMP do schedule (dynamic)
      do im = 0, nm
        do iz = 0, nzb-1
          do my = ist_y, iend_y
            do mx = -nx, nx
              zb2e_bottom(mx,my,iz,im) = ( 0._DP, 0._DP )
              zb2e_top   (mx,my,iz,im) = ( 0._DP, 0._DP )
            end do
          end do
        end do
      end do
!$OMP end do nowait

!$OMP do schedule (dynamic)
      do im = 0, nm
        do iz = 0, nzb-1
          do my = ist_y, iend_y
            do mx = -nx, nx
              zb1e_bottom(mx,my,iz,im) = ew(mx,my,-nz+iz  ,im)
              zb1e_top   (mx,my,iz,im) = ew(mx,my, nz-nzb+iz,im)
            end do
          end do
        end do
      end do
!$OMP end do

    """,

    """
!$OMP master
                                         ! call fapp_stop("literm_bounde_bufferin",1381,1)
                                           call clock_end(1381)
!$OMP end master
!$OMP end parallel

                                           call clock_sta(1382)
                                         ! call fapp_start("literm_bounde_sendrecv",1382,1)
!      call MPI_sendrecv( zb1e_bottom, slngze, MPI_DOUBLE_COMPLEX, izdn, 1, &
!                         zb2e_top,    slngze, MPI_DOUBLE_COMPLEX, izup, 1, &
!                         sub_comm_world, status, ierr_mpi )
!
!      call MPI_sendrecv( zb1e_top,    slngze, MPI_DOUBLE_COMPLEX, izup, 2, &
!                         zb2e_bottom, slngze, MPI_DOUBLE_COMPLEX, izdn, 2, &
!                         sub_comm_world, status, ierr_mpi )

      call MPI_irecv( zb2e_top,    slngze, MPI_DOUBLE_COMPLEX, izup, 1, &
                      sub_comm_world, ireq(1), ierr_mpi )
      call MPI_irecv( zb2e_bottom, slngze, MPI_DOUBLE_COMPLEX, izdn, 2, &
                      sub_comm_world, ireq(2), ierr_mpi )
      call MPI_isend( zb1e_bottom, slngze, MPI_DOUBLE_COMPLEX, izdn, 1, &
                      sub_comm_world, ireq(3), ierr_mpi )
      call MPI_isend( zb1e_top,    slngze, MPI_DOUBLE_COMPLEX, izup, 2, &
                      sub_comm_world, ireq(4), ierr_mpi )
      call MPI_waitall( 4, ireq, istatus, ierr_mpi )
                                         ! call fapp_stop("literm_bounde_sendrecv",1382,1)
                                           call clock_end(1382)

! --- substitution
!$OMP parallel default(none) &
!$OMP shared(zb2e_bottom,zb2e_top,zb1e_bottom,zb1e_top,ist_y,iend_y,ew) &
!$OMP shared(rankz,z_bound,ck,dj) &
!$OMP private(mx,my,iz,im,mwp,mwn)
!$OMP master
                                           call clock_sta(1383)
                                         ! call fapp_start("literm_bounde_bufferout",1383,1)
!$OMP end master

    """,

    """

      if( rankz /= 0 ) then

!$OMP do schedule (dynamic)
        do im = 0, nm
          do iz = 0, nzb-1
            do my = ist_y, iend_y
              do mx = -nx, nx
                ew(mx,my,-nz-nzb+iz,im) = zb2e_bottom(mx,my,iz,im)
              end do
            end do
          end do
        end do
!$OMP end do

      else   ! rankz==0

        if ( trim(z_bound) == "outflow" ) then

!$OMP do schedule (dynamic)
          do im = 0, nm
              do my = ist_y, iend_y
                do mx = -nx, nx
                  mwn   = mx + dj(my)          ! --- mw = mx + dj for the negative-z 

                  if( abs(mwn) > nx ) then
                    ew(mx,my,-nz-1,im)   =   ew(mx,my,-nz  ,im)
                    ew(mx,my,-nz-2,im)   = - ew(mx,my,-nz+1,im) + 2._DP * ew(mx,my,-nz  ,im)
                  else
                    do iz = 0, nzb-1
                      ew(mx,my,-nz-nzb+iz,im) = ck(my) * zb2e_bottom(mwn,my,iz,im)
                    end do
                  end if

                end do
              end do
          end do
!$OMP end do

        else if ( trim(z_bound) == "zerofixed" .OR. trim(z_bound) == "mixed" ) then

!$OMP do schedule (dynamic)
          do im = 0, nm
            do iz = 0, nzb-1
              do my = ist_y, iend_y
                do mx = -nx, nx
                  mwn   = mx + dj(my)          ! --- mw = mx + dj for the negative-z 

                  if( abs(mwn) > nx ) then
                    ew(mx,my,-nz-nzb+iz,im)   = ( 0._DP, 0._DP )
                  else
                    ew(mx,my,-nz-nzb+iz,im) = ck(my) * zb2e_bottom(mwn,my,iz,im)
                  end if

                end do
              end do
            end do
          end do
!$OMP end do

    """,

    """
        else

          write( olog, * ) " # z_bound is to be  outflow  or  zerofixed"
          call flush(olog)
          stop

        end if

      end if

      if( rankz /= nprocz-1 ) then

!$OMP do schedule (dynamic)
        do im = 0, nm
          do iz = 0, nzb-1
            do my = ist_y, iend_y
              do mx = -nx, nx
                ew(mx,my,nz+iz,im) = zb2e_top(mx,my,iz,im)
              end do
            end do
          end do
        end do
!$OMP end do

      else   ! rankz==nprocz-1

        if ( trim(z_bound) == "outflow" ) then

!$OMP do schedule (dynamic)
          do im = 0, nm
              do my = ist_y, iend_y
                do mx = -nx, nx
                  mwp   = mx - dj(my)          ! --- mw = mx - dj for the positive-z 

                  if( abs(mwp) > nx ) then
                    ew(mx,my,nz  ,im)   =   ew(mx,my,nz-1,im)
                    ew(mx,my,nz+1,im)   = - ew(mx,my,nz-2,im) + 2._DP * ew(mx,my,nz-1,im) 
                  else
                    do iz = 0, nzb-1
                      ew(mx,my,nz+iz,im) = conjg( ck(my) ) * zb2e_top(mwp,my,iz,im)
                    end do
                  end if

                end do
              end do
          end do
!$OMP end do

    """,

    """
        else if ( trim(z_bound) == "zerofixed" .OR. trim(z_bound) == "mixed" ) then

!$OMP do schedule (dynamic)
          do im = 0, nm
            do iz = 0, nzb-1
              do my = ist_y, iend_y
                do mx = -nx, nx
                  mwp   = mx - dj(my)          ! --- mw = mx - dj for the positive-z 

                  if( abs(mwp) > nx ) then
                    ew(mx,my,nz+iz,im)   = ( 0._DP, 0._DP )
                  else
                    ew(mx,my,nz+iz,im) = conjg( ck(my) ) * zb2e_top(mwp,my,iz,im)
                  end if

                end do
              end do
            end do
          end do
!$OMP end do

        else

          write( olog, * ) " # z_bound is to be  outflow  or  zerofixed"
          call flush(olog)
          stop

        end if

      end if

!$OMP master
                                         ! call fapp_stop("literm_bounde_bufferout",1383,1)
                                           call clock_end(1383)
!$OMP end master
!$OMP end parallel

      deallocate( zb1e_bottom )
      deallocate( zb1e_top )
      deallocate( zb2e_bottom )
      deallocate( zb2e_top )


  END SUBROUTINE bndry_bound_e

    """,

    """
!--------------------------------------
  SUBROUTINE bndry_zv_buffin( ff, zb1_bottom, zb1_top, vb1 )
!--------------------------------------
!   Impose the modified periodic boundary condition 
!     in the z-direction for the distribution function

    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,-nz-nzb:nz-1+nzb,1-nvb:2*nv+nvb) :: ff
    complex(kind=DP), intent(out), &
      dimension(-nx:nx,0:ny,0:nzb-1,1:2*nv) :: zb1_bottom, zb1_top
    complex(kind=DP), intent(out),  &
      dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nvb) :: vb1

    integer :: iz, iv


!$OMP master
                                           call clock_sta(1351)
                                         ! call fapp_start("literm_boundf_bufferin",1351,1)
!$OMP end master

!$OMP do collapse(2) schedule(dynamic)
        do iv = 1, 2*nv
          do iz = 0, nzb-1
            zb1_bottom(:,:,iz,iv) = ff(:,:,-nz+iz  ,iv)
            zb1_top   (:,:,iz,iv) = ff(:,:, nz-nzb+iz,iv)
          end do
        end do
!$OMP end do nowait


!! --- zero clear is required for rankv = 0, nprocv-1 and rankm = 0, nprocm-1
!      do iv = 1, 2*nvb
!!$OMP do schedule(dynamic)
!          do iz = -nz, nz-1
!            do my = ist_y, iend_y
!              do mx = -nx, nx
!                vb2(mx,my,iz,iv) = ( 0._DP, 0._DP )
!              end do
!            end do
!          end do
!!$OMP end do nowait
!      end do

!$OMP do collapse(2) schedule(dynamic)
        do iv = 1, nvb
          do iz = -nz, nz-1
            vb1(:,:,iz,iv    ) = ff(:,:,iz,         iv)
            vb1(:,:,iz,iv+nvb) = ff(:,:,iz,2*nv-nvb+iv)
          end do
        end do
!$OMP end do nowait

!$OMP master
                                         ! call fapp_stop("literm_boundf_bufferin",1351,1)
                                           call clock_end(1351)
!$OMP end master


  END SUBROUTINE bndry_zv_buffin

    """,

    """
!--------------------------------------
  SUBROUTINE bndry_zv_sendrecv ( zb1_bottom, zb1_top, zb2_bottom, zb2_top, vb1, vb2 )
!--------------------------------------

    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,0:nzb-1,1:2*nv) :: zb1_bottom, zb1_top
    complex(kind=DP), intent(out), &
      dimension(-nx:nx,0:ny,0:nzb-1,1:2*nv) :: zb2_bottom, zb2_top
    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nvb) :: vb1
    complex(kind=DP), intent(out), &
      dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nvb) :: vb2

    integer :: slngz, slngv
    integer, dimension(8) :: ireq
    integer, dimension(MPI_STATUS_SIZE,8) :: istatus


      slngz  = (2*nx+1)*(ny+1)*(2*nv) * nzb
      slngv = (2*nx+1)*(ny+1)*(2*nz) * nvb

                                           call clock_sta(1352)
                                         ! call fapp_start("literm_boundf_sendrecv",1352,1)
     !call MPI_sendrecv( zb1_bottom, slngz, MPI_DOUBLE_COMPLEX, izdn, 1, &
     !                   zb2_top,    slngz, MPI_DOUBLE_COMPLEX, izup, 1, &
     !                   sub_comm_world, status, ierr_mpi )
     !call MPI_sendrecv( zb1_top,    slngz, MPI_DOUBLE_COMPLEX, izup, 2, &
     !                   zb2_bottom, slngz, MPI_DOUBLE_COMPLEX, izdn, 2, &
     !                   sub_comm_world, status, ierr_mpi )
     !call MPI_sendrecv( vb1(-nx,0,-nz,1    ), slngv, MPI_DOUBLE_COMPLEX, ivdn, 3, &
     !                   vb2(-nx,0,-nz,nvb+1), slngv, MPI_DOUBLE_COMPLEX, ivup, 3, &
     !                   sub_comm_world, status, ierr_mpi )
     !call MPI_sendrecv( vb1(-nx,0,-nz,nvb+1), slngv, MPI_DOUBLE_COMPLEX, ivup, 4, &
     !                   vb2(-nx,0,-nz,1    ), slngv, MPI_DOUBLE_COMPLEX, ivdn, 4, &
     !                   sub_comm_world, status, ierr_mpi )

      call MPI_irecv( zb2_top,    slngz, MPI_DOUBLE_COMPLEX, izup, 1, &
                      sub_comm_world, ireq(1), ierr_mpi )
      call MPI_irecv( zb2_bottom, slngz, MPI_DOUBLE_COMPLEX, izdn, 2, &
                      sub_comm_world, ireq(2), ierr_mpi )
      call MPI_irecv( vb2(-nx,0,-nz,nvb+1), slngv, MPI_DOUBLE_COMPLEX, ivup, 3, &
                      sub_comm_world, ireq(3), ierr_mpi )
      call MPI_irecv( vb2(-nx,0,-nz,    1), slngv, MPI_DOUBLE_COMPLEX, ivdn, 4, &
                      sub_comm_world, ireq(4), ierr_mpi )
      call MPI_isend( zb1_bottom, slngz, MPI_DOUBLE_COMPLEX, izdn, 1, &
                      sub_comm_world, ireq(5), ierr_mpi )
      call MPI_isend( zb1_top,    slngz, MPI_DOUBLE_COMPLEX, izup, 2, &
                      sub_comm_world, ireq(6), ierr_mpi )
      call MPI_isend( vb1(-nx,0,-nz,    1), slngv, MPI_DOUBLE_COMPLEX, ivdn, 3, &
                      sub_comm_world, ireq(7), ierr_mpi )
      call MPI_isend( vb1(-nx,0,-nz,nvb+1), slngv, MPI_DOUBLE_COMPLEX, ivup, 4, &
                      sub_comm_world, ireq(8), ierr_mpi )
      call MPI_waitall( 8, ireq, istatus, ierr_mpi )
                                         ! call fapp_stop("literm_boundf_sendrecv",1352,1)
                                           call clock_end(1352)


  END SUBROUTINE bndry_zv_sendrecv

    """,

    """
!--------------------------------------
  SUBROUTINE bndry_zv_buffout ( zb2_bottom, zb2_top, vb2, ff )
!--------------------------------------
!   Impose the modified periodic boundary condition 
!     in the z-direction for the distribution function

    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,0:nzb-1,1:2*nv) :: zb2_bottom, zb2_top
    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nvb) :: vb2
    complex(kind=DP), intent(inout), &
      dimension(-nx:nx,0:ny,-nz-nzb:nz-1+nzb,1-nvb:2*nv+nvb) :: ff

    integer :: mx, my, iz, iv, mwn, mwp


! --- substitution
!$OMP master
                                           call clock_sta(1353)
                                         ! call fapp_start("literm_boundf_bufferout",1353,1)
!$OMP end master

      if( rankz /= 0 ) then

!$OMP do collapse(2) schedule(dynamic)
          do iv = 1, 2*nv
            do iz = 0, nzb-1
              ff(:,:,-nz-nzb+iz,iv) = zb2_bottom(:,:,iz,iv)
            end do
          end do
!$OMP end do nowait

    """,

    """

      else  ! rankz==0

        if ( trim(z_bound) == "outflow" .OR. trim(z_bound) == "mixed") then

!$OMP do collapse(2) schedule(dynamic)
          do iv = 1, 2*nv
              do my = ist_y, iend_y
                do mx = -nx, nx
                  mwn   = mx + dj(my)          ! --- mw = mx + dj for the negative-z 

                  if( abs(mwn) > nx ) then
                    if ( vl(iv) > 0._DP ) then ! inflow
                      do iz = 0, nzb-1
                        ff(mx,my,-nz-nzb+iz,iv) = ( 0._DP, 0._DP )
                      end do
                    else                       ! outflow
                      ff(mx,my,-nz-1,iv) =   ff(mx,my,-nz  ,iv)
                      ff(mx,my,-nz-2,iv) = - ff(mx,my,-nz+1,iv) + 2._DP * ff(mx,my,-nz  ,iv)
                    end if
                  else
                    do iz = 0, nzb-1
                      ff(mx,my,-nz-nzb+iz,iv) = ck(my) * zb2_bottom(mwn,my,iz,iv)
                    end do
                  end if

                end do
              end do
          end do
!$OMP end do nowait

        else if ( trim(z_bound) == "zerofixed" ) then


    """,


    """
        else if ( trim(z_bound) == "zerofixed" ) then

!$OMP do collapse(2) schedule(dynamic)
          do iv = 1, 2*nv
            do iz = 0, nzb-1
              do my = ist_y, iend_y
                do mx = -nx, nx
                  mwn   = mx + dj(my)          ! --- mw = mx + dj for the negative-z 

                  if( abs(mwn) > nx ) then
                    ff(mx,my,-nz-nzb+iz,iv) = ( 0._DP, 0._DP )
                  else
                    ff(mx,my,-nz-nzb+iz,iv) = ck(my) * zb2_bottom(mwn,my,iz,iv)
                  end if

                end do
              end do
            end do
          end do
!$OMP end do nowait

        else

          write( olog, * ) " # z_bound is to be  outflow  or  zerofixed"
          call flush(olog)
          stop

        end if

      end if

      if( rankz /= nprocz-1 ) then

!$OMP do collapse(2) schedule(dynamic)
          do iv = 1, 2*nv
            do iz = 0, nzb-1
              ff(:,:,nz+iz,iv) = zb2_top(:,:,iz,iv)
            end do
          end do
!$OMP end do nowait

    """,
    """
      else ! rankz==nprocz-1

        if ( trim(z_bound) == "outflow" .OR. trim(z_bound) == "mixed") then

!$OMP do collapse(2) schedule(dynamic)
          do iv = 1, 2*nv
              do my = ist_y, iend_y
                do mx = -nx, nx
                  mwp   = mx - dj(my)          ! --- mw = mx - dj for the positive-z 

                  if( abs(mwp) > nx ) then
                    if ( vl(iv) > 0._DP ) then ! outflow
                      ff(mx,my,nz  ,iv) =   ff(mx,my,nz-1,iv)
                      ff(mx,my,nz+1,iv) = - ff(mx,my,nz-2,iv) + 2._DP * ff(mx,my,nz-1,iv)
                    else                       ! inflow
                      do iz = 0, nzb-1
                        ff(mx,my,nz+iz,iv) = ( 0._DP, 0._DP )
                      end do
                    end if
                  else
                    do iz = 0, nzb-1
                      ff(mx,my,nz+iz,iv) = conjg( ck(my) ) * zb2_top(mwp,my,iz,iv)
                    end do
                  end if

                end do
              end do
          end do
!$OMP end do nowait

    """,
    """
        else if ( trim(z_bound) == "zerofixed" ) then

!$OMP do collapse(2) schedule(dynamic)
          do iv = 1, 2*nv
            do iz = 0, nzb-1
              do my = ist_y, iend_y
                do mx = -nx, nx
                  mwp   = mx - dj(my)          ! --- mw = mx - dj for the positive-z 

                  if( abs(mwp) > nx ) then
                    ff(mx,my,nz+iz,iv) = ( 0._DP, 0._DP )
                  else
                    ff(mx,my,nz+iz,iv) = conjg( ck(my) ) * zb2_top(mwp,my,iz,iv)
                  end if

                end do
              end do
            end do
          end do
!$OMP end do nowait

        else

          write( olog, * ) " # z_bound is to be  outflow  or  zerofixed"
          call flush(olog)
          stop

        end if

      end if

    """,
    """

        if ( rankv == 0 ) then
!$OMP do collapse(2) schedule(dynamic)
          do iv = 1, nvb
            do iz = -nz, nz-1
              ff(:,:,iz,-nvb+iv) = (0._DP, 0._DP)
              ff(:,:,iz,2*nv+iv) = vb2(:,:,iz,iv+nvb)
            end do
          end do
!$OMP end do nowait
        else if ( rankv == nprocv-1 ) then
!$OMP do collapse(2) schedule(dynamic)
          do iv = 1, nvb
            do iz = -nz, nz-1
              ff(:,:,iz,-nvb+iv) = vb2(:,:,iz,iv    )
              ff(:,:,iz,2*nv+iv) = (0._DP, 0._DP)
            end do
          end do
!$OMP end do nowait
        else
!$OMP do collapse(2) schedule(dynamic)
          do iv = 1, nvb
            do iz = -nz, nz-1
              ff(:,:,iz,-nvb+iv) = vb2(:,:,iz,iv    )
              ff(:,:,iz,2*nv+iv) = vb2(:,:,iz,iv+nvb)
            end do
          end do
!$OMP end do nowait
        end if

!$OMP master
                                         ! call fapp_stop("literm_boundf_bufferout",1353,1)
                                           call clock_end(1353)
!$OMP end master


  END SUBROUTINE bndry_zv_buffout


    """,
    """

!--------------------------------------
  SUBROUTINE bndry_vm_buffin( iz, ff, vb1, mb1 )
!--------------------------------------
!     Shift communications in v and m directions

    integer, intent(in) :: iz
    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,-nz-nzb:nz-1+nzb,1-nvb:2*nv+nvb,0-nvb:nm+nvb) :: ff
    complex(kind=DP), intent(out),  &
      dimension(-nx:nx,0:ny,0:nm,1:2*nvb) :: vb1
    complex(kind=DP), intent(out), &
      dimension(-nx:nx,0:ny,1:2*nv,1:2*nvb) :: mb1

    integer :: mx, my, iv, im


!$OMP master
                                           call clock_sta(1371)
                                         ! call fapp_start("literm_shifts_bufferin",1371,1)
!$OMP end master

!! --- zero clear is required for rankv = 0, nprocv-1 and rankm = 0, nprocm-1
!      do iv = 1, 2*nvb
!        do im = 0, nm
!            do my = ist_y, iend_y
!              do mx = -nx, nx
!                vb2(mx,my,im,iv) = ( 0._DP, 0._DP )
!              end do
!            end do
!        end do
!      end do

    """,
    """
!$OMP do collapse(2) schedule(dynamic)
        do iv = 1, nvb
          do im = 0, nm
            do my = ist_y, iend_y
              do mx = -nx, nx
                vb1(mx,my,im,iv    ) = ff(mx,my,iz,         iv,im)
                vb1(mx,my,im,iv+nvb) = ff(mx,my,iz,2*nv-nvb+iv,im)
              end do
            end do
          end do
        end do
!$OMP end do nowait

!! --- zero clear is required for rankv = 0, nprocv-1 and rankm = 0, nprocm-1
!      do im = 1, 2*nvb
!        do iv = 1, 2*nv
!            do my = ist_y, iend_y
!              do mx = -nx, nx
!                mb2(mx,my,iv,im) = ( 0._DP, 0._DP )
!              end do
!            end do
!        end do
!      end do

!$OMP do collapse(2) schedule(dynamic)
      do im = 1, nvb
        do iv = 1, 2*nv
          do my = ist_y, iend_y
            do mx = -nx, nx
              mb1(mx,my,iv,im    ) = ff(mx,my,iz,iv,     im-1)
              mb1(mx,my,iv,im+nvb) = ff(mx,my,iz,iv,nm-nvb+im)
            end do
          end do
        end do
      end do
!$OMP end do nowait


!$OMP master
                                         ! call fapp_stop("literm_shifts_bufferin",1371,1)
                                           call clock_end(1371)
!$OMP end master


  END SUBROUTINE bndry_vm_buffin

    """,
    """
!--------------------------------------
  SUBROUTINE bndry_vm_sendrecv ( vb1, vb2, mb1, mb2 )
!--------------------------------------
!     Shift communications in v and m directions

    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,0:nm,1:2*nvb) :: vb1
    complex(kind=DP), intent(out), &
      dimension(-nx:nx,0:ny,0:nm,1:2*nvb) :: vb2
    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,1:2*nv,1:2*nvb) :: mb1
    complex(kind=DP), intent(out), &
      dimension(-nx:nx,0:ny,1:2*nv,1:2*nvb) :: mb2

    integer :: slngv, slngm
    integer, dimension(8) :: ireq
    integer, dimension(MPI_STATUS_SIZE,8) :: istatus


      slngv = (2*nx+1)*(ny+1)*(nm+1) * nvb
      slngm = (2*nx+1)*(ny+1)*(2*nv) * nvb

                                           call clock_sta(1372)
                                         ! call fapp_start("literm_shifts_sendrecv",1372,1)
     !call MPI_sendrecv( vb1(-nx,0,0,1    ), slngv, MPI_DOUBLE_COMPLEX, ivdn, 1, &
     !                   vb2(-nx,0,0,nvb+1), slngv, MPI_DOUBLE_COMPLEX, ivup, 1, &
     !                   sub_comm_world, status, ierr_mpi )
     !call MPI_sendrecv( vb1(-nx,0,0,nvb+1), slngv, MPI_DOUBLE_COMPLEX, ivup, 2, &
     !                   vb2(-nx,0,0,1    ), slngv, MPI_DOUBLE_COMPLEX, ivdn, 2, &
     !                   sub_comm_world, status, ierr_mpi )
     !call MPI_sendrecv( mb1(-nx,0,1,1    ), slngm, MPI_DOUBLE_COMPLEX, imdn, 3, &
     !                   mb2(-nx,0,1,nvb+1), slngm, MPI_DOUBLE_COMPLEX, imup, 3, &
     !                   sub_comm_world, status, ierr_mpi )
     !call MPI_sendrecv( mb1(-nx,0,1,nvb+1), slngm, MPI_DOUBLE_COMPLEX, imup, 4, &
     !                   mb2(-nx,0,1,1    ), slngm, MPI_DOUBLE_COMPLEX, imdn, 4, &
     !                   sub_comm_world, status, ierr_mpi )

      call MPI_irecv( vb2(-nx,0,0,nvb+1), slngv, MPI_DOUBLE_COMPLEX, ivup, 1, &
                      sub_comm_world, ireq(1), ierr_mpi )
      call MPI_irecv( vb2(-nx,0,0,    1), slngv, MPI_DOUBLE_COMPLEX, ivdn, 2, &
                      sub_comm_world, ireq(2), ierr_mpi )
      call MPI_irecv( mb2(-nx,0,1,nvb+1), slngm, MPI_DOUBLE_COMPLEX, imup, 3, &
                      sub_comm_world, ireq(3), ierr_mpi )
      call MPI_irecv( mb2(-nx,0,1,    1), slngm, MPI_DOUBLE_COMPLEX, imdn, 4, &
                      sub_comm_world, ireq(4), ierr_mpi )
      call MPI_isend( vb1(-nx,0,0,    1), slngv, MPI_DOUBLE_COMPLEX, ivdn, 1, &
                      sub_comm_world, ireq(5), ierr_mpi )
      call MPI_isend( vb1(-nx,0,0,nvb+1), slngv, MPI_DOUBLE_COMPLEX, ivup, 2, &
                      sub_comm_world, ireq(6), ierr_mpi )
      call MPI_isend( mb1(-nx,0,1,    1), slngm, MPI_DOUBLE_COMPLEX, imdn, 3, &
                      sub_comm_world, ireq(7), ierr_mpi )
      call MPI_isend( mb1(-nx,0,1,nvb+1), slngm, MPI_DOUBLE_COMPLEX, imup, 4, &
                      sub_comm_world, ireq(8), ierr_mpi )
      call MPI_waitall( 8, ireq, istatus, ierr_mpi )
                                         ! call fapp_stop("literm_shifts_sendrecv",1372,1)
                                           call clock_end(1372)

  END SUBROUTINE bndry_vm_sendrecv

    """,
    """

!--------------------------------------
  SUBROUTINE bndry_vm_buffout ( iz, vb2, mb2, ff )
!--------------------------------------
!     Shift communications in v and m directions

    integer, intent(in) :: iz
    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,0:nm,1:2*nvb) :: vb2
    complex(kind=DP), intent(in), &
      dimension(-nx:nx,0:ny,1:2*nv,1:2*nvb) :: mb2
    complex(kind=DP), intent(inout), &
      dimension(-nx:nx,0:ny,-nz-nzb:nz-1+nzb,1-nvb:2*nv+nvb,0-nvb:nm+nvb) :: ff

    integer :: mx, my, iv, im


! --- substitution
!$OMP master
                                           call clock_sta(1373)
                                         ! call fapp_start("literm_shifts_bufferout",1373,1)
!$OMP end master

      if ( rankv == 0 ) then
!$OMP do collapse(2) schedule(dynamic)
        do iv = 1, nvb
          do im = 0, nm
            do my = ist_y, iend_y
              do mx = -nx, nx
                ff(mx,my,iz,-nvb+iv,im) = (0._DP, 0._DP)
                ff(mx,my,iz,2*nv+iv,im) = vb2(mx,my,im,iv+nvb)
              end do
            end do
          end do
        end do
!$OMP end do nowait
      else if ( rankv == nprocv-1 ) then
!$OMP do collapse(2) schedule(dynamic)
        do iv = 1, nvb
          do im = 0, nm
            do my = ist_y, iend_y
              do mx = -nx, nx
                ff(mx,my,iz,-nvb+iv,im) = vb2(mx,my,im,iv    )
                ff(mx,my,iz,2*nv+iv,im) = (0._DP, 0._DP)
              end do
            end do
          end do
        end do
!$OMP end do nowait
      else
!$OMP do collapse(2) schedule(dynamic)
        do iv = 1, nvb
          do im = 0, nm
            do my = ist_y, iend_y
              do mx = -nx, nx
                ff(mx,my,iz,-nvb+iv,im) = vb2(mx,my,im,iv    )
                ff(mx,my,iz,2*nv+iv,im) = vb2(mx,my,im,iv+nvb)
              end do
            end do
          end do
        end do
!$OMP end do nowait
      end if

    """,
    """

      if ( rankm == 0 ) then
!$OMP do collapse(2) schedule(dynamic)
        do im = 1, nvb
          do iv = 1, 2*nv
            do my = ist_y, iend_y
              do mx = -nx, nx
                ff(mx,my,iz,iv,-nvb-1+im) = (0._DP, 0._DP)
                ff(mx,my,iz,iv,nm+im    ) = mb2(mx,my,iv,im+nvb)
              end do
            end do
          end do
        end do
!$OMP end do nowait
      else if ( rankm == nprocm-1 ) then
!$OMP do collapse(2) schedule(dynamic)
        do im = 1, nvb
          do iv = 1, 2*nv
            do my = ist_y, iend_y
              do mx = -nx, nx
                ff(mx,my,iz,iv,-nvb-1+im) = mb2(mx,my,iv,im    )
                ff(mx,my,iz,iv,nm+im    ) = (0._DP, 0._DP)
              end do
            end do
          end do
        end do
!$OMP end do nowait
      else
!$OMP do collapse(2) schedule(dynamic)
        do im = 1, nvb
          do iv = 1, 2*nv
            do my = ist_y, iend_y
              do mx = -nx, nx
                ff(mx,my,iz,iv,-nvb-1+im) = mb2(mx,my,iv,im    )
                ff(mx,my,iz,iv,nm+im    ) = mb2(mx,my,iv,im+nvb)
              end do
            end do
          end do
        end do
!$OMP end do nowait
      end if

!$OMP master
                                         ! call fapp_stop("literm_shifts_bufferout",1373,1)
                                           call clock_end(1373)
!$OMP end master


  END SUBROUTINE bndry_vm_buffout


END MODULE GKV_bndry

    """]

    

#### SEAT

In [None]:
chunks3 = [
    """
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.optim.lr_scheduler import LambdaLR
import numpy as np
import json
import copy
import time
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
import random

device = "cuda" if torch.cuda.is_available else "cpu"
    """,

    """
# this class is only considering one layer
class SEAT4(nn.Module):
    
    def __init__(self, embed_dim, thres, num_layers):
        super(SEAT4, self).__init__()
        self.embed_dim = embed_dim
        self.num_layers = num_layers
        self.thres = thres.float().to(device)

        self.q_projs = nn.ModuleList([nn.Linear(embed_dim, embed_dim) for _ in range(num_layers)]).to(device)
        self.k_projs = nn.ModuleList([nn.Linear(embed_dim, embed_dim) for _ in range(num_layers)]).to(device)
        self.v_projs = nn.ModuleList([nn.Linear(embed_dim, embed_dim) for _ in range(num_layers)]).to(device)

        self.up_projs = nn.ModuleList([nn.Linear(embed_dim, embed_dim) for _ in range(num_layers)]).to(device)
        self.down_projs = nn.ModuleList([nn.Linear(embed_dim, embed_dim) for _ in range(num_layers)]).to(device)
        
        for layer in range(num_layers):
            self.q_projs[layer].weight.data = torch.eye(embed_dim).to(device)
            self.q_projs[layer].bias.data.fill_(0.0).to(device)
            self.k_projs[layer].weight.data = torch.eye(embed_dim).to(device)
            self.k_projs[layer].bias.data.fill_(0.0).to(device)
            self.v_projs[layer].weight.data = torch.eye(embed_dim).to(device)
            self.v_projs[layer].bias.data.fill_(0.0).to(device)
            self.up_projs[layer].weight.data = torch.eye(embed_dim).to(device)
            self.up_projs[layer].bias.data.fill_(0.0).to(device)
            self.down_projs[layer].weight.data = torch.eye(embed_dim).to(device)
            self.down_projs[layer].bias.data.fill_(0.0).to(device)
    """,

    """
    # q_embs:(num_questions, embed_dim)
    # inf_embs:(num_contexts, embed_dim)
    def forward(self, q_embs, inf_embs):
        num_questions, num_contexts = q_embs.shape[0], inf_embs.shape[0]
        embs = torch.cat((inf_embs, q_embs), dim = 0)  #(num_contexts + num_questions, embed_dim)
        
        for layer in range(self.num_layers):
            A = torch.matmul(self.q_projs[layer](embs), self.k_projs[layer](embs[:num_contexts,:]).transpose(-1,-2))  #(num_contexts + num_questions, num_contexts)
            embs = torch.matmul(nn.Softmax(dim=-1)(A), self.v_projs[layer](embs[:num_contexts,:]))  #(num_contexts + num_questions, embed_dim)

            embs = torch.relu(self.up_projs[layer](embs))
            embs = self.down_projs[layer](embs)  #(num_contexts + num_questions, embed_dim)

        attention = torch.matmul(embs[num_contexts:,:], embs[:num_contexts,:].transpose(-1,-2))  #(num_questions, num_contexts)
        
        return attention    
    """,

    """
    def step(self, q_embs, inf_embs, inf_one_zero, optimizer):
        
        self.train()
        mse_loss = nn.MSELoss()
        optimizer.zero_grad()
        #print(self(q_embs, inf_embs).shape, self.target(inf_one_zero).shape)
        loss = mse_loss(self(q_embs, inf_embs), self.target(inf_one_zero))
        loss.backward()
        optimizer.step()
        
        
    def target(self, inf_one_zero):
        return torch.where(inf_one_zero==1.0, 2*self.thres[1]-self.thres[0], 2*self.thres[0]-self.thres[1])
        
    """,

    """
    def evaluate(self, q_embs, inf_embs, inf_one_zero):
        mse_loss = nn.MSELoss()
        attention = self(q_embs, inf_embs)
        print(f"attention:{attention}")  #(num_questions, num_information)
        predict_inf_one_zero = self.get_inf_one_zero(attention)
        #print(attention)
        
        loss = mse_loss(predict_inf_one_zero, inf_one_zero) * inf_one_zero.shape[0]
        
        acc_vec = torch.sum(torch.square(inf_one_zero - predict_inf_one_zero), dim=(0,))
        acc = torch.sum(torch.where(acc_vec == 0,1,0), dim=(0,)) / acc_vec.shape[0]

        mean_num_out = torch.sum(predict_inf_one_zero, dim=(0,1)) / predict_inf_one_zero.shape[0]

        return loss.detach().cpu(), acc.detach().cpu(), mean_num_out.detach().cpu()
        

    def get_inf_one_zero(self, attention):
        return torch.where(attention > self.thres[1], 1, 0)
        

    def get_path_one_zero(self, attention):
        return torch.where(attention > self.thres[0], 1, 0)
    """,

    """
# set hyper parameters
num_epoch = 100
thres = torch.tensor([2, 10])
embed_dim = 1024
num_layers = 4
batch_size = 100  #adjust based on gpu memory
lr = 5e-6

# initializing SEAT
se_model = SEAT4(embed_dim, thres, num_layers)

optimizer = optim.SGD(se_model.parameters(), lr=lr)

loss_list = []
acc_list = []

# Preparing dataset
q_embs_train = torch.load("./squad_q_embs0.pt")
inf_embs_train = torch.load("./squad_inf_embs0.pt")
q_embs_test = torch.load("./squad_q_embs1.pt").to(device)
inf_embs_test = torch.load("./squad_inf_embs1.pt").to(device)

num_dataset_train = q_embs_train.shape[0]
num_dataset_test = q_embs_test.shape[0]

inf_one_zero_train = torch.eye(num_dataset_train).to(device)
inf_one_zero_test = torch.eye(num_dataset_test).to(device)

print(q_embs_train.shape, inf_embs_train.shape)

print(f"num_epoch:{num_epoch}")
print(f"num_dataset_train:{num_dataset_train}, num_dataset_test:{num_dataset_test}")


# scheduler
def set_lr(epoch, optimizer):
    global lr
    if epoch > 30:
        # Update learning rate to a new value
        lr = np.exp(-0.02) * lr
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
    return optimizer

    """,

    """
# Learning and Evaluation
for epoch in range(num_epoch):

    optimizer = set_lr(epoch, optimizer)

    # train
    last_log_step = 0
    for i in range(0, num_dataset_train, batch_size):        
        # in order to avoid cuda oom
        batch_q_embs_train = q_embs_train[i:i+batch_size,:].clone().to(device)
        batch_inf_embs_train = inf_embs_train[i:i+batch_size,:].clone().to(device)
        
        se_model.step(batch_q_embs_train, batch_inf_embs_train, inf_one_zero_train[i:i+batch_size, i:i+batch_size], optimizer)
        
        del batch_q_embs_train, batch_inf_embs_train
        
        if i/num_dataset_train > last_log_step + 0.1:
            print(f"epoch{epoch} {100*(i+1)/num_dataset_train} % finished")
            last_log_step += 0.1

    # evaluation
    loss, acc, mean_num_out = se_model.evaluate(q_embs_test, inf_embs_test, inf_one_zero_test)
    loss_list.append(loss)
    acc_list.append(acc)
    
    print(f"epoch:{epoch+1},  lr:{lr},  eval_loss:{loss},  eval_acc:{acc},  mean_num_out:{mean_num_out}")

torch.save(se_model, 'SEAT4-1.pth')

    """,
]

#### save

In [5]:
database = {
    "gkvp_advanc.f90 is a code for preparation of gkv code":chunks,
    "gkvp_bndry.f90 defines the boundary condition of gkv code":chunks2,
    "SEAT":chunks3
}

import json

with open("database.json", "w") as json_file:
    json.dump(database, json_file)

## Summarize chunks

In [1]:
database_name = "ows"
max_new_tokens = 4000  # embed_model should process only explanation in json text

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
device = "cuda" if torch.cuda.is_available else "cpu"
model_id = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left", add_eos_token=False, add_bos_token=False,)
model = AutoModelForCausalLM.from_pretrained(model_id).to(device)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

### chunks into summary, explanation, params, defs and calls

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
device = "cuda" if torch.cuda.is_available else "cpu"
model_id = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left", add_eos_token=False, add_bos_token=False,)

model = AutoModelForCausalLM.from_pretrained(model_id).to(device)

from pydantic import BaseModel
from lmformatenforcer import JsonSchemaParser
from lmformatenforcer.integrations.transformers import build_transformers_prefix_allowed_tokens_fn
from transformers import pipeline


# for restricting answer to be json 
class AnswerFormat(BaseModel):
    summary: str
    explanation: str
    parameters: dict[str, str]
    defined_functions: dict[str, str]
    called_functions: dict[str, str]

# Create a transformers pipeline
hf_pipeline = pipeline('text-generation', model=model, max_length = max_new_tokens,  tokenizer = tokenizer, device = 0)
#prompt = f'Here is information about Michael Jordan in the following json schema: {AnswerFormat.schema_json()} :\n'

# Create a character level parser and build a transformers prefix function from it
parser = JsonSchemaParser(AnswerFormat.schema())
prefix_function = build_transformers_prefix_allowed_tokens_fn(hf_pipeline.tokenizer, parser)


import os
if not os.path.exists("summary"):
    os.makedirs("summary")
if not os.path.exists("explanation"):
    os.makedirs("explanation")
if not os.path.exists("params"):
    os.makedirs("params")
if not os.path.exists("defs"):
    os.makedirs("defs")
if not os.path.exists("calls"):
    os.makedirs("calls")


import time
import json

database_path = "chunks/" + database_name + ".json"
with open(database_path) as json_file:
    chunks = json.load(json_file)

num_chunks = len(chunks)

start = time.time()

num_chunk = len(chunks)
summary = []
explanations = []
params = []
defs = []
calls = []

print()
print(f"number of chunks : {num_chunk}")

for i in range(num_chunk):
    print()
    print("=== code ===")
    print(chunks[i])
    
    text = "<s>[INST]You are an helpful assistant who analyzes the code bellow.\n\nCode:\n```\n" + chunks[i] + "\n```\n\nIn your answer, you must reply with json type text including single-line summary of the code, explanation of the code, all the parameters in the code, all the functions defined in the code and all the functions called in the code. Here's the form you must follow when you are answering:\n{'summary':(single-line summary), 'explanation':(explanation of the code), 'parameters':{(name of parameter):(explanation of parameter), ...}, 'defined_functions':{(name of defined function):(explanation of the function), ...}, 'called_functions':{(name of called function):(explanation of the function), ...}}[/INST]"
    output_dict = hf_pipeline(text, prefix_allowed_tokens_fn = prefix_function)
    print()
    print("=== output ===")
    print(output_dict[0]['generated_text'][len(text):])

    try:
        output = json.loads(output_dict[0]['generated_text'][len(text):])
        
        # add output to list
        summary.append(output["summary"])
        explanations.append(output["explanation"])
        params.append(output["parameters"])
        defs.append(output["defined_functions"])
        calls.append(output["called_functions"])

    except:
        print()
        print("Failed to get json type object")
        
        summary.append(output_dict[0]['generated_text'][len(text):])
        explanations.append(output_dict[0]['generated_text'][len(text):])
        params.append({})
        defs.append({})
        calls.append({})
    
    
    wrap = time.time()
    print(f"{(i+1)/num_chunk*100} % finished")
    print(wrap - start, "s has passed")

    
    # Save data to JSON file
    path = "summary/" + database_name + ".json"
    with open(path, 'w') as json_file:
        json.dump(summary, json_file)
    path = "explanation/" + database_name + ".json"
    with open(path, 'w') as json_file:
        json.dump(explanations, json_file)
    path = "params/" + database_name + ".json"
    with open(path, 'w') as json_file:
        json.dump(params, json_file)
    path = "defs/" + database_name + ".json"
    with open(path, 'w') as json_file:
        json.dump(defs, json_file)
    path = "calls/" + database_name + ".json"
    with open(path, 'w') as json_file:
        json.dump(calls, json_file)

print("file saved")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



number of chunks : 11

=== code ===
"""file containing functions"""
from time import time
import pygame

def animate_gif(delay, nb_images, time_gif, state):
    """animate the gif"""
    delay_gif = time() - time_gif
    if delay_gif > delay:
        state += 1
        time_gif = time()
    if state == nb_images:
        state = 0
    return time_gif, state

def play_sound(path, volume):
    """play a sound"""
    sound = pygame.mixer.Sound(path)
    sound.play()
    sound.set_volume(volume)



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



=== output ===
 {
"summary": "This code defines two functions: 'animate_gif' and 'play_sound'. The 'animate_gif' function animates a gif image with a given delay between frames, while the 'play_sound' function plays a sound file with a given volume using Pygame library",
"explanation": "The code starts by importing the necessary modules: 'time' for measuring time and 'pygame' for playing sounds. Two functions are defined: 'animate_gif' and 'play_sound'. The 'animate_gif' function takes four parameters: 'delay' (the time in seconds between each frame), 'nb_images' (the total number of frames in the gif), 'time_gif' (the time when the gif animation started), and'state' (the current frame index). The function calculates the elapsed time since the last frame change and, if the delay has been exceeded, it increments the frame index and updates the gif animation start time. If the frame index reaches the total number of frames, it is reset to zero. The function returns the new gif animation

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



=== output ===
 {
"summary": "Pygame script to display the top 5 best scores and exit on click",
"explanation": "This script uses Pygame library to create a window to display the top 5 best scores. It sorts the scores in descending order and displays the score rank, ID, and value on the window. The window also has an exit button that closes the window when clicked. The script uses two fonts, one for the numbers and one for the text, and loads them from a.ttf file. The script defines three functions: get_scores(), screen_scores(), and screen_scores(). get_scores() takes a list of sorted scores and returns nothing by blitting the surfaces and rects to the screen. screen_scores() launches the screen of scores and exits when the exit button is clicked. screen_scores() calls get_scores() with the top 5 sorted scores. No external functions are called in this script.",
"parameters": {
"width": "Screen width",
"height": "Screen height",
"screen": "Pygame screen object",
"scores": "Dictionary 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



=== output ===
 {
"summary": "Defines an Environment class that inherits from Entity and manages the creation and movement of various entities on a pygame screen",
"explanation": "This script defines an Environment class that initializes entities with specific rectangles, surfaces, and categories. The class includes methods for resetting the position of entities when they reach the end of the screen, generating random positions for certain entities, and animating the movement of entities. The script uses Pygame for graphics and random for generating random numbers",
"parameters": {
"height": "Height of the Pygame display screen",
"width": "Width of the Pygame display screen"
},
"defined_functions": {
"__init__": "Initializes an Environment entity with a given rectangle, surface, and category",
"loop": "Resets the position of the entity when it reaches the end of the screen",
"get_rand_pos": "Generates a random position for an entity based on its category",
"animate": "Moves the entity

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



=== output ===
 {
"summary": "A mother class named Entity is defined with methods for changing speed and position of an object",
"explanation": "The Entity class is defined with an initializer method that takes a rectangular object as an argument and initializes the speed and position of the entity using the rect's topleft corner. Two methods are defined: change_speed, which updates the speed of the entity based on an acceleration, and change_position, which updates the position of the entity based on the current speed and elapsed time. The position of the rect is updated accordingly in the change_position method",
"parameters": {
"rect": "A Pygame Rect object representing the boundaries of the entity"
},
"defined_functions": {
"__init__": "Initializes the Entity object with a given rect, initializes speed and position using the rect's topleft corner",
"change_speed": "Updates the speed of the Entity based on an acceleration",
"change_position": "Updates the position of the Entity bas

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



=== output ===
 {
"summary": "Pygame game script for an unnamed game with random enemies, scoring, and high scores screen. The game runs in an infinite loop, checking for user input and updating game objects accordingly.",
"explanation": "The script imports necessary modules, initializes game variables, and checks if pygame is installed. It then enters an infinite loop where it sets the acceleration based on the current score, checks for user input, and updates game objects accordingly. The script defines no functions but calls several functions from other modules, such as animate_gif and play_sound, which are used for animations and sounds respectively. The script also initializes several game objects such as enemies, surfaces, and rectangles, and sets their initial properties. The script also uses try-except block to handle the case when pygame is not installed",
"parameters": {
"ACCELERATION": "The acceleration of the game object based on the current score",
"event": "An event obje

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



=== output ===
 {
"summary": "This code handles user input and game logic for a Sonic-like game, including checking conditions to end the game, handling user jumps, and spawning enemies with certain probabilities based on Sonic's health and game state",
"explanation": "The code starts by checking several conditions to end the game or switch to the highscores screen. It then handles the user pressing the spacebar to jump, with additional checks to ensure Sonic is on the ground and not already jumping. The code also includes a delay before Sonic can jump again. The code then spawns enemies with a certain probability based on Sonic's health and the game state. The enemies have different types and appear at different locations on the screen",
"parameters": {
"width_restrict": "Boolean parameter to restrict the game width",
"height_restrict": "Boolean parameter to restrict the game height",
"state_game": "Boolean parameter representing the current game state",
"score_w_restrict": "Boolean 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



=== output ===
 {
"summary": "This code is for a game where the player, Sonic, avoids enemies and tries to achieve the highest score by collecting hearts. The game keeps track of the score, best score, and displays visual effects for damage and healing. If Sonic loses all his health, the game ends, and the best score is saved and displayed",
"explanation": "The code starts by defining some variables and constants. It then checks certain conditions to spawn enemies and creates visual effects for damage and healing. The screen is filled with a specific color based on the effect. The code then checks for the score, best score, and displays them accordingly. If Sonic loses all his health, the game ends, and the best score is saved and displayed. The code also includes functions for playing sounds and rendering text on the screen",
"parameters": {
"random_heart": "Boolean to check if a heart should be spawned randomly",
"CHECKHEART": "Boolean to check if a heart check condition is met",
"C

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



=== output ===
 {
"summary": "This code handles the movement and collision detection of various game elements, including grass, clouds, palms, and enemies, in a 2D platformer game. It also manages Sonic's jump and health, and displays the enemies on the screen",
"explanation": "The code starts by checking if the 'LOST' variable is false. If it is, the code animates the positions of grass, clouds, and palms. Then, it checks for collisions between Sonic and enemies, hearts, and the wall. If a collision occurs, the corresponding action is taken, such as healing Sonic or decreasing his health. If 'LOST' is true, the code sets the positions and animations of the clouds and palms for the game over screen. The code then handles the enemy population by removing enemies that have been popped from the list. Next, it manages Sonic's jump, and finally, it handles the display of texts on the screen. The code uses several variables, functions, and classes, which are explained below",
"parameters": 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



=== output ===
 {
"summary": "This code is a part of a Pygame game where it handles the display of game elements such as the game over screen, scores, and Sonic's character based on certain conditions and events",
"explanation": "The code starts by checking if the game has been lost. If so, it displays the game over screen, scores, last score, best score, and the player's pseudo. If not, it displays the score and Sonic's character. The code also checks if certain score milestones have been reached and plays corresponding sounds. Sonic's health is displayed as a series of heart images. The code also handles Sonic's jumping animation and movement restrictions. The game uses Pygame's blit function to display images on the screen, and Pygame's time function to manage time-based events",
"parameters": {
"LOST": "A boolean value indicating if the game has been lost or not",
"screen": "The Pygame screen object",
"end_surface": "The game over screen image",
"end_rect": "The game over screen i

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



=== output ===
 {
"summary": "This code displays a game over screen if the time since the end of the game is less than 3 seconds, otherwise it continues the game by displaying the Sonic character and the restart button, and animating the Sonic character using an animate_gif function",
"explanation": "The code starts by checking if the time since the end of the game is less than 3 seconds. If it is, then the screen is filled with white color and the game over image is displayed. If the time is greater than 3 seconds, then the Sonic character is displayed along with the restart button. The Sonic character is animated using the animate_gif function with a frame rate of 0.3 and a total of 2 frames. The grass surface is also displayed twice on the screen, once at the topright and once at the topleft. The pygame display is updated and then quit at the end of the code",
"parameters": {
"end_time": "The time when the game ended",
"height": "The height of the pygame screen",
"width": "The widt

### code summary to folder/file summary

In [2]:
import json
import os

file_path_json = "file_paths/" + database_name + ".json"
with open(file_path_json) as json_file:
    file_paths = json.load(json_file)
file_path_json = "summary/" + database_name + ".json"
with open(file_path_json) as json_file:
    summary = json.load(json_file)

# for summarizing all folders
f_dict = {}
root = None
class F:
    def __init__(self, path, child = None, summary = None):
        global root
        global f_dict
        
        if not path in f_dict:
            f_dict[path] = self
            self.is_dir = not "." in os.path.basename(path)
            self.path = path
            self.name = os.path.basename(path)

            if not self.is_dir:
                self.snippet_summaries = [summary]
            
            if not "/" in path:
                self.is_root = True
                root = self
                self.parent = None
            else:
                self.is_root = False
                if os.path.dirname(path) in f_dict:
                    f_dict[os.path.dirname(path)].children.append(self)
                    self.parent = f_dict[os.path.dirname(path)]
                else:
                    f = F(os.path.dirname(path), child = self)
                    self.parent = f
    
            if child != None:
                self.children = [child]
            else:
                self.children = []

            self.summary = None

        else:
            if "." in os.path.basename(path):
                f_dict[path].snippet_summaries.append(summary)

    def set_summary(self):
        global num_sum_done
        if self.is_dir:
            summary_text = ""
            for child in self.children:
                child.set_summary()
                summary_text += child.name + " : " + child.summary + "\n"
                
            prompt = "<s>[INST]You are a helpful assistant who summarizes a folder content. You are given content of file or folder under the folder, and you should summarize all of them into single-line sentence and answer what the folder is for.\n\nHere's the content of children files or folders under the folder you summarize:\n" + summary_text + "[/INST]"
            
            print(f"=== {self.path} ===")
            print("--- INPUT ---")
            print(prompt)
            
            input_ids = tokenizer(prompt, return_tensors="pt").to(device)
            output_ids = model.generate(**input_ids, max_new_tokens=2000)
            self.summary = tokenizer.decode(output_ids[0][len(input_ids[0]):], skip_special_tokens = True)

            num_sum_done += 1
            print("--- OUTPUT ---")
            print(self.summary)
            print()
            print(f"summarization {num_sum_done}/{num_f} finished")
            
        else:
            content = ""
            for i in range(len(self.snippet_summaries)):
                content += "snippet " + str(i+1) + " : " + self.snippet_summaries[i] + "\n"
                
            prompt = "<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.\n\nHere's the content of each code snippet:\n" + content + "[/INST]"

            print(f"=== {self.path} ===")
            print("--- INPUT ---")
            print(prompt)
            
            input_ids = tokenizer(prompt, return_tensors="pt").to(device)
            output_ids = model.generate(**input_ids, max_new_tokens=2000)
            self.summary = tokenizer.decode(output_ids[0][len(input_ids[0]):], skip_special_tokens = True)

            num_sum_done += 1
            print("--- OUTPUT ---")
            print(self.summary)
            print()
            print(f"summarization {num_sum_done}/{num_f} finished")

# for constructing class
for i in range(len(file_paths)):
    F(file_paths[i], summary = summary[i])

num_f = len(f_dict)
print(f"num file/folder : {num_f}")

num_sum_done = 0
root.set_summary()  # root should be batabase_name folder but it's /data now

f_summary = {}
for f_path in f_dict:
    f_summary[f_path] = f_dict[f_path].summary

if not os.path.exists("f_summary"):
    os.makedirs("f_summary")
    
file_path_json = "f_summary/" + database_name + ".json"
with open(file_path_json, 'w') as json_file:
    json.dump(f_summary, json_file)

print(f"{file_path_json} has been saved")


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


num file/folder : 59
=== data/ows/Test/test4.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A Unity script that calls a Python script using the Terminal application on macOS and logs the output in Unity's console
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This Unity script executes a Python script using macOS Terminal and displays the output in Unity's console.

summarization 1/59 finished
=== data/ows/Test/CompileFile.py ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : Defines and calls a function that prints a list as a string
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
The file contains a function definition and call that prints a list as a string using the `join()` method.

summarization 2/59 finished
=== data/ows/Test/warm_joint_ctr.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A Unity script for controlling the rotation of an object based on a boolean variable and user input
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This Unity script controls the rotation of an object based on a boolean variable and user input.

snippet 2 : A Python script for calculating the factorial of a given number using recursion
[This script calculates the factorial of a given number using recursion in Python.]

snippet 3 : A JavaScript function for validating an email address
[This JavaScript function validates an email address.]

snippet 4 : A C++ program for finding the largest number in an array
[This C++ program finds the largest number in an array.]

snippet 5 : A Swift function for reversing a string
[This Swift function reverses a given string.]

The file contains various scripts and functions written in different programming languages for performing specific tasks such as controlling object rotation, calculating factorials, validating email addresses, finding the largest number in an array, and reversing strings.

summarization 3/59 finished
=== data/ows/Test/test1.cs ===
--- INPUT ---
<s>[INST]You a

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
The file contains incomplete Unity C# scripts: the first script begins the definition of an empty class, the second script initializes variables and defines data structures for a MonoBehaviour component named 'test1', and the third script handles user input and object transformations in the Update method.

summarization 4/59 finished
=== data/ows/Test/test5.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A C# script for creating a TCP listener that connects to a Python script and exchanges Process ID (PID) information between them using Unity3D and.NET libraries
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This C# script in Unity3D sets up a TCP listener to exchange Process ID (PID) information with a connected Python script using .NET libraries.

summarization 5/59 finished
=== data/ows/Test/test8.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A JavaScript function that calculates the factorial of a given number using recursion
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This JavaScript file contains a function that calculates the factorial of a given number using recursion.

summarization 6/59 finished
=== data/ows/Test/PythonTest2.py ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : This code imports the sys module to access command-line arguments and prints out the first argument, the name of the script, and the total number of arguments as strings
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
The file imports the sys module to print the name of the script and the total number of arguments passed to it. (snippet 1)

summarization 7/59 finished
=== data/ows/Test/test7.py ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A Python script that calculates the force acting on a piston in a gas container based on its position using ideal gas law and adiabatic process
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This Python script calculates the force on a piston in an ideal gas container using the ideal gas law and adiabatic process.

summarization 8/59 finished
=== data/ows/Test/jet_test.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A Unity script that attaches to a GameObject with a Rigidbody component and applies a force in the negative direction of the object's forward axis when the Space key is pressed
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This Unity script applies a force in the opposite direction of a GameObject's forward axis when the Space key is pressed, using a Rigidbody component.

summarization 9/59 finished
=== data/ows/Test/test9.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 :  {
"summary": "A Unity script for controlling a GameObject using input from the mouse and keyboard, and applying forces based on arrow key presses",
"explanation": "This script, named 'test9', is attached to a GameObject in Unity. It uses the 'MonoBehaviour' class and starts and updates in the game loop. The script has a public 'GameObject' variable named 'obj'. In the 'Start' function, nothing happens. In the 'Update' function, if the left mouse button is pressed, a raycast is 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This Unity script, named 'test9', controls a GameObject using input from the mouse and keyboard, applying forces based on arrow key presses in the game loop. It uses raycasting to detect collisions and adds forces to the 'Rigidbody' component of the GameObject accordingly. Functions used include 'Input.GetMouseButton', 'Input.GetKeyDown', 'Camera.main.ScreenPointToRay', and 'Physics.Raycast'.

summarization 10/59 finished
=== data/ows/Test/PythonTest.py ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : This script takes two command-line arguments: a message and a count. It prints the message followed by an index number, repeated count times
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
The file contains a script that prints a given message followed by its index number, repeated a specified count, when run with two command-line arguments.

summarization 11/59 finished
=== data/ows/Test/joint_ctr.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A Unity script for controlling the rotation of a GameObject based on user input and a set mode
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This Unity script controls the rotation of a GameObject, allowing user input and toggling between different rotation modes.

snippet 2 : A C# script for managing a simple text-based RPG character's stats and abilities
[
This C# script manages a text-based RPG character's stats, including health, mana, and abilities, allowing for interaction and updates based on user input.

snippet 3 : A Python script for generating random mazes using depth-first search algorithm
[This Python script generates random mazes by implementing the depth-first search algorithm, allowing for the creation of unique and complex labyrinth structures.

snippet 4 : A JavaScript file for creating a simple drag-and-drop interface using HTML5 and CSS3
[This JavaScript file creates a simple drag-and-drop interface using HTML5 and CSS3, enabling users to interactively move elements on a webpage.

The file is a collection of scripts for various purposes, including Unity game control, text-based RPG charact

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
The Unity C# script 'test2' logs components and prints a colored string on pointer click events using MonoBehaviour and IPointerClickHandler interfaces.

summarization 13/59 finished
=== data/ows/Test/test6.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A C# script using Unity's System.Diagnostics.Process class to execute a Python script and parse its output as numbers to be logged in Unity's console
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
The file contains a C# script in Unity that executes a Python script using System.Diagnostics.Process, parses its output as numbers, and logs them in Unity's console.

summarization 14/59 finished
=== data/ows/Test/test3.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A Unity script named 'test3' initializes a dictionary, sets up two GameObjects, and responds to collision events. It also modifies the euler angles of an object upon collision
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
The Unity script 'test3' initializes a dictionary, sets up two GameObjects, and responds to collision events by modifying the euler angles of an object.

summarization 15/59 finished
=== data/ows/Test/ProbuilderTest/csg.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : This script appears to be a part of a Unity project using Parabox CSG library for 3D modeling and manipulation. It declares some namespaces and a public class with incomplete definition
snippet 2 : A C# script for creating composite GameObjects using ColliderShapeGroups and manipulating their properties
snippet 3 : This script creates a new GameObject by subtracting the mesh of a cylinder from a cube using CSG operation and attaches the resulting mesh to the new 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This Unity project file utilizes the Parabox CSG library for 3D modeling and manipulation, with scripts for creating composite GameObjects using ColliderShapeGroups, performing CSG operations such as subtracting meshes, and instantiating and interacting with GameObjects based on user input.

summarization 16/59 finished
=== data/ows/Test/ProbuilderTest/drill_test1.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A Unity script for controlling a drill object, managing drill animation, and generating chips with random positions and directions
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This Unity script controls a drill object, manages its animation, and generates chips with random positions and directions.

summarization 17/59 finished
=== data/ows/Test/ProbuilderTest/csg2.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A Unity script using Parabox.CSG library to perform subtraction operation on two GameObjects and replace the original one with the result in every frame when 'Alpha0' key is pressed
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This Unity script utilizes the Parabox.CSG library to perform repeated subtraction of two GameObjects and replace the original one with the result every frame when the 'Alpha0' key is pressed.

summarization 18/59 finished
=== data/ows/Test/ProbuilderTest/csg3.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A Unity script using Parabox.CSG library for 3D object subtraction and debugging. It allows users to subtract one object from another using the space key and debug the vertices and indices of objects using alpha keys
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This Unity script utilizes the Parabox.CSG library for 3D object subtraction and debugging. Users can subtract one object from another using the space key and examine object vertices and indices with alpha keys.

summarization 19/59 finished
=== data/ows/Test/ProbuilderTest ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes a folder content. You are given content of file or folder under the folder, and you should summarize all of them into single-line sentence and answer what the folder is for.

Here's the content of children files or folders under the folder you summarize:
csg.cs : This Unity project file utilizes the Parabox CSG library for 3D modeling and manipulation, with scripts for creating composite GameObjects using ColliderShapeGroups, performing CSG operations such as subtracting meshes, and instantiating and interacting with GameObjects based on user input.
drill_test1.cs : This Unity script controls a drill object, manages its animation, a

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This folder contains Unity scripts and project files for 3D modeling and manipulation using the Parabox CSG library. The scripts enable creating composite GameObjects through CSG operations such as subtracting meshes, managing drill object animations and chip generation, and debugging 3D object subtraction.

summarization 20/59 finished
=== data/ows/Test ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes a folder content. You are given content of file or folder under the folder, and you should summarize all of them into single-line sentence and answer what the folder is for.

Here's the content of children files or folders under the folder you summarize:
test4.cs : This Unity script executes a Python script using macOS Terminal and displays the output in Unity's console.
CompileFile.py : The file contains a function definition and call that prints a list as a string using the `join()` method.
warm_joint_ctr.cs : This Unity script controls the rotation 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This folder contains various scripts and projects written in different programming languages (C#, Python, JavaScript, Swift) for performing specific tasks such as Unity game control, text-based RPG character management, random maze generation, simple drag-and-drop interface creation, and 3D modeling using the Parabox CSG library. The scripts include functions for controlling object rotation, calculating factorials, validating email addresses, finding the largest number in an array, reversing strings, managing RPG character stats, generating random mazes, creating drag-and-drop interfaces, and logging components in Unity.

summarization 21/59 finished
=== data/ows/Workspace/MoldScrollViewBtnCtr.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snipp

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This C# script in Unity controls the selection of molds using a scroll view button.

snippet 2 : A JavaScript function that calculates the area of a rectangle given its width and height.
[The area of a rectangle is calculated as width multiplied by height.]

snippet 3 : A Python function that takes a list as an argument and returns the index of the first occurrence of a specific value.
[This Python function finds the index of the first occurrence of a value in a given list.]

snippet 4 : An HTML/CSS code snippet for creating a responsive navigation menu using media queries.
[This HTML/CSS code creates a responsive navigation menu that adapts to different screen sizes.]

The file is a collection of code snippets, including a Unity script for mold selection, a JavaScript function for calculating rectangle area, a Python function for finding the first occurrence of a value in a list, and an HTML/CSS code for creating a responsive navigation menu.

summarization 22/59 finish

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
The file contains a C# script defining a struct named 'UniqueJointInfo' for saving joint information to be used with Unity's physics engine.

summarization 23/59 finished
=== data/ows/Workspace/RestraintBtnCtr.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A C# script named 'RestraintBtnCtr' for Unity, which handles image color change when a button is clicked
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
The file contains a C# script named 'RestraintBtnCtr' in Unity, which manages image color modifications upon button clicks.

summarization 24/59 finished
=== data/ows/Workspace/UniqueJointCtr.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A C# script for managing unique joints in Unity, which calculates the position and EulerAngles of a child object relative to a parent object and triggers a unique joint collider based on the input item type and name
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This C# script in Unity manages unique joints by calculating the child object's position and EulerAngles relative to the parent object, and triggers corresponding joint colliders based on input item type and name.

summarization 25/59 finished
=== data/ows/Workspace/CameraCtr.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : This C# script starts with an inclusion of necessary namespaces for Unity and event systems, and declares a public class with no name or implementation provided. It seems incomplete and needs further development
snippet 2 : A C# script for managing camera controls and object interactions in a 3D application, implementing various interfaces for input handling and scrolling
snippet 3 : This script checks the 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This Unity script file manages various aspects of user input, camera controls, and object interactions in a 3D application, including camera movement, tool usage, raycasting, and object selection. It also creates and manages outlines for selected GameObjects.

summarization 26/59 finished
=== data/ows/Workspace/SmlParam.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A C# script named 'SmlParam' initializes a dictionary to store simulation parameters and defines an interface 'ISimulationHandler'.
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This C# file, named 'SmlParam', initializes a dictionary to store simulation parameters and defines an interface 'ISimulationHandler'.

summarization 27/59 finished
=== data/ows/Workspace/MoldInfo.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A C# script for defining mold information with collider properties and hole data for Unity game engine
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This C# script in Unity defines mold information with collider properties and hole data for creating molds in the game engine.

summarization 28/59 finished
=== data/ows/Workspace/OutlineScript.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A Unity script that adds an outline effect to a GameObject when the mouse is over it and rotates the object when the mouse is hovering over it
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This Unity script adds an outline effect to a GameObject when the mouse hovers over it and rotates the object continuously while the mouse remains hovered.

summarization 29/59 finished
=== data/ows/Workspace/UniqueJointCollider.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A Unity script named 'UniqueJointCollider' that detects collisions with objects tagged as 'Item' and triggers an event in 'UniqueJointCtr' component with the item's name and type
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This Unity script, named 'UniqueJointCollider', detects collisions with objects tagged as 'Item' and triggers an event in the associated 'UniqueJointCtr' component, passing the item's name and type as arguments.

summarization 30/59 finished
=== data/ows/Workspace/PhysicsCtr.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A Unity script named 'PhysicsCtr' that manages physics interactions and maintains a list of colliding objects, as well as some dictionaries for storing parameters and equation strings related to joints
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
The 'PhysicsCtr' Unity script manages physics interactions, maintains a list of colliding objects, and stores related joint parameters and equation strings in dictionaries.

summarization 31/59 finished
=== data/ows/Workspace/ObjectBtnCtr.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A C# script for managing object selection in a Unity game, using a shift key modifier to toggle selection of an object and check for existing selection of the camera object or already selected objects
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This C# script in Unity handles object selection in a game, allowing the user to toggle selection of an object using the shift key, while preventing the selection of the camera object or already selected objects.

summarization 32/59 finished
=== data/ows/Workspace/WorkspaceCtr.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : This C# script uses several Unity and System libraries for game development, CSG operations, and UI management. It doesn't define any specific functionality in the provided code snippet
snippet 2 : A MonoBehaviour script for managing various functionalities of a workspace, including object manipulation, mold creation, restraint, and simulation tools. It also manages object lists and panels for displaying 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This Unity C# script file manages various functionalities of a workspace in a game, including object manipulation, UI management, Rigidbody constraints, and GameObject selection. It initializes components, sets up panels, and defines functions for controlling object properties and UI elements.

summarization 33/59 finished
=== data/ows/Workspace/ItemInfo.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A C# script defining a 'ItemInfo' class for managing item names, types, and descriptions in Unity, using JSON-like format
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
The file contains a C# script defining an "ItemInfo" class for managing item names, types, and descriptions in Unity using a JSON-like format.

summarization 34/59 finished
=== data/ows/Workspace/Calculator.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A calculator class that evaluates mathematical expressions with support for user-defined parameters and various operators including exponentiation
snippet 2 : This code implements a simple expression parser and evaluator for arithmetic expressions with parentheses and basic operators. It supports parameters and handles operator precedence and associativity correctly. The expression is represented as a string, and the result is returned as a double value
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This file contains classes for a calculator that evaluates mathematical expressions with user-defined parameters and various operators, including exponentiation, and an implementation of a simple expression parser and evaluator for arithmetic expressions with parentheses and basic operators, handling precedence and associativity correctly. The expressions are represented as strings, and the results are returned as double values.

summarization 35/59 finished
=== data/ows/Workspace/GenerateObjCtr.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : This script is written in C# for Unity and appears to be incomplete. It begins by importing necessary namespaces and declaring that the script should be attached to a GameObject named 'M

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
The Unity C# file contains three scripts: the first one, incomplete, is designed for handling user input events in Unity's Event System and attaching it to a GameObject named 'MoldBtnPfb'. The second script generates objects in a Unity scene using drag-and-drop functionality. The third script defines a class with event functions for managing object updates when the user finishes dragging an object.

summarization 36/59 finished
=== data/ows/Workspace/Tool/SphereColliderCtr.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A C# script for detecting when a GameObject enters or exits a collider and handling specific actions based on the entered GameObject's name and the script's associated parent GameObject and WorkspaceCtr compon

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This C# script detects and responds to GameObjects entering or exiting its collider, using the entered GameObject's name, its parent GameObject, and associated WorkspaceCtr components for specific actions.

summarization 37/59 finished
=== data/ows/Workspace/Tool/WeldingMachineCtr.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A C# script for controlling a welding machine in a Unity game, managing its position, animation, and welding process
snippet 2 : This script defines a Welding class with methods for welding an object to a surface, setting the welder off, and managing the active state of the welder. It also includes a SetMove function to manage the active state of different welding modes
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This Unity C# file contains a script for controlling a welding machine in-game, including a Welding class with methods for welding objects to surfaces, managing the welder's active state, and handling different welding modes.

summarization 38/59 finished
=== data/ows/Workspace/Tool/WeldingSearchSphereCtr.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A C# script for managing a list of GameObjects that enter and exit a collider's trigger in Unity
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This C# script in Unity manages a list of GameObjects that enter and exit a collider's trigger.

summarization 39/59 finished
=== data/ows/Workspace/Tool/DrillCtr.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A Unity script for controlling a drill's behavior, including drilling, holding, and setting animations
snippet 2 : This script controls the movement and animation of a drill object in a game, including drilling, holding, and holding off positions, as well as setting and setting off animations and drill progression
snippet 3 : A C# script for managing drilling mechanics, including drilling to a surface, drill progress, and drill off. It also handles rotations and animations
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This Unity script file manages the behavior of a drill object in a game, controlling its movement, animations, and drilling mechanics such as drilling to a surface, drill progress, and rotations.

summarization 40/59 finished
=== data/ows/Workspace/Tool/ChipCtr.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A Unity script for controlling the movement and rotation of a game object, named ChipCtr, for a certain period of time before destroying it
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This Unity script controls the movement and rotation of an object named "ChipCtr" for a specific duration, before destroying it.

summarization 41/59 finished
=== data/ows/Workspace/Tool ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes a folder content. You are given content of file or folder under the folder, and you should summarize all of them into single-line sentence and answer what the folder is for.

Here's the content of children files or folders under the folder you summarize:
SphereColliderCtr.cs : This C# script detects and responds to GameObjects entering or exiting its collider, using the entered GameObject's name, its parent GameObject, and associated WorkspaceCtr components for specific actions.
WeldingMachineCtr.cs : This Unity C# file contains a script for controlling a welding machine in-game, including a Welding class with methods for welding objects to surfaces, managing the welder's active state, and handling different welding mo

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This folder contains scripts for controlling various machinery objects, such as SphereCollider, WeldingMachine, Drill, and Chip, in a Unity game. The scripts handle collider detection, welding mechanics, drilling mechanics, and object destruction.

summarization 42/59 finished
=== data/ows/Workspace/ECsml/ECPanelCtr.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A C# script implementing the Unity EventSystems interfaces IPointerDownHandler, IPointerUpHandler, IDragHandler, IEndDragHandler, IPointerEnterHandler, and IPointerExitHandler for an ECPanel controller
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This C# script is used for handling various pointer events such as pointer down, up, drag, and enter/exit for an ECPanel controller in Unity, implementing the required EventSystems interfaces.

summarization 43/59 finished
=== data/ows/Workspace/ECsml/ECPanelBtnJointCtr.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A C# script for managing joint interactions in Unity using event systems, implementing IPointerDownHandler, IPointerUpHandler, IDragHandler, and IEndDragHandler interfaces
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This C# script in Unity handles joint interactions through event systems, implementing interfaces for pointer down, pointer up, drag, and end drag events.

snippet 2 : A Python script for data preprocessing using NumPy and Pandas libraries, including data loading, cleaning, and transformation
[
This Python script uses NumPy and Pandas libraries for data preprocessing tasks such as loading, cleaning, and transforming data.

snippet 3 : A JavaScript function for validating user input in a form using regular expressions and conditional statements
[
This JavaScript function validates user input in a form by applying regular expressions and conditional statements to ensure data meets specific requirements.

snippet 4 : A Swift script for creating a custom UIView subclass with custom drawing functionality using Core Graphics
[
This Swift script creates a custom UIView subclass with the ability to draw custom graphics using Core Graphics.

The file contains scripts written in v

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This Unity C# file contains scripts for creating and managing electrical connections between components in a circuit simulation system, as well as handling the position and direction of objects in a 2D grid and calculating directions between points. The file includes classes for 'ecc' electrical connections and 'ece' utilities.

summarization 45/59 finished
=== data/ows/Workspace/ECsml/ECPanelBtnCtr.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A C# script for managing interactions with GameObjects tagged as 'ECE' (Environmental Control Elements) using Input events and storing references to ECsml (Environmental Control System Manager) and ecc (Environmental Control Component) objects, as well as a list of JointBtn GameObjec

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This C# script manages interactions with GameObjects tagged as 'ECE' (Environmental Control Elements) using Input events, storing references to ECsm (Environmental Control System Manager) and ecc (Environmental Control Component) objects, and a list of JointBtn GameObjects, while implementing IPointerDownHandler, IPointerUpHandler, IDragHandler, and IEndDragHandler interfaces for handling Input events in an environmental control system.

summarization 46/59 finished
=== data/ows/Workspace/ECsml/ECsml.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : This C# script uses several Unity packages and namespaces to create a UI system with event handling and a collection system, but the code body is missing
snippet 2 : This script is 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This file contains various C# scripts for managing different components in an Electro Chemical Simulation (ECsml) game, including Electro Chemical Equivalents (ECE), Electrical Connection Elements (ECEs), wires, End-of-Line Character (EOL) sequences, and Environmental Component Entities (ECEs). The scripts initialize components, set up data structures, handle events, and manage the behavior of different game objects. Some scripts also define functions for data processing, error correction, and graph processing in the context of power system analysis.

summarization 47/59 finished
=== data/ows/Workspace/ECsml/GenerateECECtr.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A C# script written in Unity for handling pointer events

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This Unity C# script generates Electro-Chemical Equivalent Circuit (ECEC) objects in response to pointer events within a panel.

summarization 48/59 finished
=== data/ows/Workspace/ECsml/JointBtnCtr.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A C# script for managing joint interaction with the user input in Unity, implementing IPointerDownHandler, IPointerUpHandler, IDragHandler, IEndDragHandler, IPointerEnterHandler, and IPointerExitHandler interfaces
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This C# script in Unity handles user input interactions with joints, implementing interfaces for pointer down, up, drag, end drag, enter, and exit events.

snippet 2 : A Python script for data preprocessing, including reading CSV files, handling missing values, and encoding categorical variables using one-hot encoding
[
This Python script preprocesses data by reading CSV files, managing missing values, and encoding categorical variables using one-hot encoding.

snippet 3 : An HTML/CSS/JavaScript file for creating a responsive image gallery, allowing users to view enlarged images and navigate through them using arrow keys or swipe gestures
[
This HTML/CSS/JavaScript file creates a responsive image gallery, enabling users to view larger images and navigate through them using arrow keys or swipe gestures.

snippet 4 : A Swift script for generating random mazes using depth-first search algorithm, creating a 2D grid with walls and paths
[
This Swift script generates random ma

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This Unity3D C# script file contains incomplete code for a script using System.Collections and UI, a script for drawing lines on a UICanvas using Vertex Helper, and a function named "DrawVHLine" to draw vertical or horizontal lines with custom specifications.

summarization 50/59 finished
=== data/ows/Workspace/ECsml/ECPanel2Ctr.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A C# script for managing pointer enter and exit events on an Unity GameObject, interacting with an ECsml component
[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This C# script in Unity manages pointer enter and exit events on a GameObject, interacting with an ECsml component.

snippet 2 : A Python script for data preprocessing, including reading CSV files, handling missing values, and encoding categorical variables

This Python script performs data preprocessing tasks such as reading CSV files, handling missing values, and encoding categorical variables.

snippet 3 : A JavaScript function for validating user input in a form, using regular expressions for email and password fields

This JavaScript function validates user input in a form by using regular expressions for email and password fields.

snippet 4 : A Swift function for sorting an array of custom structs based on a specific property

This Swift function sorts an array of custom structs based on a specific property.

The file contains scripts and functions for managing events, data preprocessing, form validation, and array sorting in various programming languages.

summar

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This Unity C# script file contains classes and functions for creating and managing Electro-Mechanical Component (ECE) objects, including their properties, connections, and calculations of directions based on local coordinates.

summarization 52/59 finished
=== data/ows/Workspace/ECsml ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes a folder content. You are given content of file or folder under the folder, and you should summarize all of them into single-line sentence and answer what the folder is for.

Here's the content of children files or folders under the folder you summarize:
ECPanelCtr.cs : This C# script is used for handling various pointer events such as pointer down, up, drag, and enter/exit for an ECPanel controller in Unity, implementing the required EventSystems interfaces.
ECPanelBtnJointCtr.cs : This C# script in Unity handles joint interactions through event systems, implementing interfaces for pointer down, pointer up, drag, and end

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This folder contains scripts and files written in various programming languages (C#, Python, JavaScript, and Swift) for managing user interactions, data preprocessing, form validation, array sorting, and creating custom components in different systems such as Unity, Python, HTML/CSS/JavaScript, and Swift. Specifically, it includes scripts for handling joint interactions in Unity, preprocessing data using Python, creating a responsive image gallery, generating random mazes, and managing Electro-Mechanical Components in Unity.

summarization 53/59 finished
=== data/ows/Workspace ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes a folder content. You are given content of file or folder under the folder, and you should summarize all of them into single-line sentence and answer what the folder is for.

Here's the content of children files or folders under the folder you summarize:
MoldScrollViewBtnCtr.cs : This C# script in Unity controls the selection of 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This folder contains a collection of scripts and code snippets written in various programming languages (C#, JavaScript, Python, and HTML/CSS) for different purposes in various projects, including Unity game development, calculator functions, and web development. The scripts include Unity scripts for mold selection, joint information management, object selection, and physics interactions; JavaScript functions for calculating rectangle area and creating responsive navigation menus; Python functions for finding the first occurrence of a value in a list; and HTML/CSS code for creating responsive navigation menus. Additionally, there are folders for specific projects, such as 'Tool' for Unity machinery objects and 'ECsml' for managing user interactions, data preprocessing, form validation, array sorting, and creating custom components in various systems.

summarization 54/59 finished
=== data/ows/Start/StartSceneCtr.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant w

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
The Unity script named 'StartSceneCtr' manages the start scene, rotates a GameObject, and loads the next scene upon button press, utilizing the SceneManager.LoadScene() function.

summarization 55/59 finished
=== data/ows/Start/vars.cs ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes content of code file. You are given contents of code snippets in the file, and you should summarize all of them into single-line sentence and answer what the file is for.

Here's the content of each code snippet:
snippet 1 : A C# script for Unity game engine using various System and UnityEngine namespaces for UI management
snippet 2 : A C# script for managing game objects, dictionaries, and other data structures for a game development project
snippet 3 : A C# script for managing game data, file paths, and sprites for a game. It initializes game data, sets up dictionaries for sprites and file paths, and provides functions to load saved data and initialize the game.', 
sni

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This Unity game development project file contains various C# scripts for managing UI, game objects, game data, file paths, and sprites. Some scripts define classes for managing dictionaries, file and folder hierarchies, and game data saving and loading using JSON and PlayerPrefs.

summarization 56/59 finished
=== data/ows/Start ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes a folder content. You are given content of file or folder under the folder, and you should summarize all of them into single-line sentence and answer what the folder is for.

Here's the content of children files or folders under the folder you summarize:
StartSceneCtr.cs : The Unity script named 'StartSceneCtr' manages the start scene, rotates a GameObject, and loads the next scene upon button press, utilizing the SceneManager.LoadScene() function.
vars.cs : This Unity game development project file contains various C# scripts for managing UI, game objects, game data, file paths,

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This folder contains Unity script files named "StartSceneCtr.cs" and "vars.cs". The former manages the start scene, rotates a GameObject, and loads the next scene upon button press using SceneManager.LoadScene(). The latter contains various scripts for managing UI, game objects, game data, file paths, and sprites, defining classes for managing dictionaries, file and folder hierarchies, and game data saving and loading using JSON and PlayerPrefs. In summary, this folder is for managing the Unity game development project's start scene and various scripts for game management and data handling.

summarization 57/59 finished
=== data/ows ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes a folder content. You are given content of file or folder under the folder, and you should summarize all of them into single-line sentence and answer what the folder is for.

Here's the content of children files or folders under the folder you summarize:
Test : This folder 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


--- OUTPUT ---
This folder contains various programming projects and scripts written in different languages (C#, Python, JavaScript, Swift) for diverse purposes such as Unity game control, text-based RPG character management, random maze generation, interface creation, and 3D modeling. The scripts include functions for controlling object rotation, calculating factorials, validating email addresses, finding the largest number in an array, reversing strings, managing RPG character stats, generating random mazes, creating drag-and-drop interfaces, and logging components in Unity. Additionally, there are folders for specific projects, including 'Workspace' with scripts for Unity game development, calculator functions, and web development, and 'Start' for managing the Unity game development project's start scene and various scripts for game management and data handling.

summarization 58/59 finished
=== data ===
--- INPUT ---
<s>[INST]You are a helpful assistant who summarizes a folder cont

In [7]:
def get_path_summaries(file_path, dataset_name):
    file_path_json = "f_summary/" + database_name + ".json"
    with open(file_path_json) as json_file:
        f_summary = json.load(json_file)

    f_name_list = []
    f_summary_list = []
    while "/" in file_path: # not run when path == data where summary of dataset_name folder is already added to the list
        f_name_list.insert(0, os.path.basename(file_path))
        f_summary_list.insert(0, f_summary[file_path])
        file_path = os.path.dirname(file_path)
        
    return f_name_list, f_summary_list

In [8]:
get_path_summaries("data/ows/Workspace/WorkspaceCtr.cs", "ows")

(['ows', 'Workspace', 'WorkspaceCtr.cs'],
 ["This folder contains various programming projects and scripts written in different languages (C#, Python, JavaScript, Swift) for diverse purposes such as Unity game control, text-based RPG character management, random maze generation, interface creation, and 3D modeling. The scripts include functions for controlling object rotation, calculating factorials, validating email addresses, finding the largest number in an array, reversing strings, managing RPG character stats, generating random mazes, creating drag-and-drop interfaces, and logging components in Unity. Additionally, there are folders for specific projects, including 'Workspace' with scripts for Unity game development, calculator functions, and web development, and 'Start' for managing the Unity game development project's start scene and various scripts for game management and data handling.",
  "This folder contains a collection of scripts and code snippets written in various progr

## Summary Embedding

In [1]:
database_name = "Sonic-Game"
embed_model_id = "mixedbread-ai/mxbai-embed-large-v1"

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
device = "cuda" if torch.cuda.is_available else "cpu"
model_id = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_id, 
    padding_side="left",
    add_eos_token=True,
    add_bos_token=True,)

import os
if not os.path.exists("summary"):
    os.makedirs("summary")

model = AutoModelForCausalLM.from_pretrained(model_id).to(device)


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [2]:
import os

if not os.path.exists("db_embs"):
    os.makedirs("db_embs")

from sentence_transformers import SentenceTransformer, util
embed_model = SentenceTransformer(embed_model_id)

# make pandas dataframe
import json

json_file_path = "summary/" + database_name + ".json"
with open(json_file_path, 'r') as json_file:
    summary = json.load(json_file)


import torch

device = "cuda" if torch.cuda.is_available else "cpu"
model.to(device)

"""
def get_cat_list(series):
    cat_list = []
    num_rows = 0
    for text_list in series:
        num_rows += 1
        cat_list += text_list
    return cat_list, num_rows
"""

def save_outputs(text_list, batch_size, file_path):
    num_batch = 0


    # Get model outputs
    with torch.no_grad():
        outputs = torch.tensor(embed_model.encode(text_list))
        #outputs = outputs.reshape(num_rows, max_length, -1)
        print("outputs.shape : ", outputs.shape)
        torch.save(outputs.detach().cpu(), file_path+".pt")

    """ when total data size exceeds the batch_size
    for i in range(0, len(text_list), batch_size):
        batch_text_list = text_list[i:i+batch_size]
        num_rows = len(batch_text_list)
        #inputs = tokenizer(batch_text_series, padding="max_length", max_length=max_length, truncation=True, return_tensors="pt")
        #inputs.to(device)
        
        # Get model outputs
        with torch.no_grad():
            outputs = torch.tensor(model.encode(batch_text_list))
            #outputs = outputs.reshape(num_rows, max_length, -1)
            print("outputs.shape : ", outputs.shape)
            torch.save(outputs.detach().cpu(), file_path+str(num_batch)+".pt")
            num_batch += 1

            del outputs
        
        print(100*(i+batch_size)/len(batch_text_list), "% finished")
    """

#batch_size_questions = 100  # Adjust based on your GPU memory
batch_size = 20000

# here each list in df["chunks"] must have same length
save_outputs(summary, batch_size, f"db_embs/{database_name}")


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/171 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/113k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/677 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/670M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/297 [00:00<?, ?B/s]

outputs.shape :  torch.Size([11, 1024])


## Template Chat

In [3]:
database_name = "memformer"

In [4]:
# model load
import torch
from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer

device = "cuda" if torch.cuda.is_available else "cpu"
emb_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1").to(device)

"""
# Model load for japanese
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer

assert transformers.__version__ >= "4.34.1"

model = AutoModelForCausalLM.from_pretrained("cyberagent/calm2-7b-chat", device_map="auto", torch_dtype="auto")
tokenizer = AutoTokenizer.from_pretrained("cyberagent/calm2-7b-chat")
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
"""

model_id = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_id, 
    padding_side="left",
    add_eos_token=False,
    add_bos_token=False,)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

model = AutoModelForCausalLM.from_pretrained(model_id).to(device)

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/171 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/113k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/677 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/670M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/297 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [9]:
import os
import json
import sys

if not os.path.exists(f"{database_name}_logs"):
    os.makedirs(f"{database_name}_logs")

if not os.path.exists("DS for SEAT"):
    os.makedirs("DS for SEAT")

def save_log(new_log, file_path):
    with open(file_path) as f: # note that file must be already created
        log = f.read()

    log += new_log

    with open(file_path, "w") as f:
        f.write(log)

def get_infs(question, disposed_id_list, keep_id_list):
    # 問題文に基づいて検索する
    q_embs = torch.tensor(emb_model.encode(question)).to(device)
    inf_embs = torch.load(f"db_embs/{database_name}.pt").to(device)
    
    with open(f"chunks/{database_name}.json") as json_file:
        chunks = json.load(json_file)
    
    relevance = torch.matmul(q_embs, inf_embs.T) 
    
    # Top-3 のIDを取得
    values, inf_ids = torch.topk(relevance, k=3, dim=0)  # dim=1 で行ごとのTop-Kを取得
    
    infs = []
    selected_id = None
    for id in inf_ids:
        if id.item() not in disposed_id_list:
            if id.item() not in keep_id_list:
                selected_id = id.item()
                infs.append(chunks[selected_id])
                break  # 最初に見つかった適切なIDで終了

    if selected_id == None:
        values, inf_ids = torch.topk(relevance, k=relevance.shape[0], dim=0)
        for id in inf_ids:
            if id.item() not in disposed_id_list:
                if id.item() not in keep_id_list:
                    selected_id = id.item()
                    infs.append(chunks[selected_id])
                    break  # 最初に見つかった適切なIDで終了
            
    return infs, selected_id


def get_func_description(id):
    #initialize func_list
    func_list = []
    func_set = set()
    # open calls folder
    with open(path_call, 'r') as file:
        data = json.load(file)
        functions = data[id]
    for key1, value1 in functions.items():
        # open defs folder
        with open(path_def, 'r') as file:
             defs_data = json.load(file)
        
        for def_item in defs_data:
            for key2, value2 in def_item.items():
                if key2 == key1:
                    if key2 not in func_set:
                        func_set.add(key2)
                        func_list.append(f"{key2}:{value2}")

    if not func_list:
        return ""
    
    formatted_descriptions = [
    f"- {desc.split(':')[0]}: {desc.split(':')[1].strip()}."
    for desc in func_list
]

    # 最終的な説明文を生成
    description_of_functions = "Description of the functions used in the code below:\n" + "\n".join(formatted_descriptions)

    return description_of_functions     


def get_prompt(q, inf_list):
    prompt = q + "\nCode:"
    for inf in inf_list:
        prompt += "\n```" + inf + "```"
        
    return prompt

def combine_codes(code_mem_list,keep_id_list):
    combined_code = ""
    for id, code in zip(keep_id_list, code_mem_list):
        func_des = get_func_description(id)
        combined_code += f"{func_des}\n\n{code}\n\n"
    return combined_code
    

def new_prompt_LLM1(LLM1_system1, LLM1_system2, question, code_inf, id):
    func_des = get_func_description(id)
    return  f"{LLM1_system1}{question}\n\nCode from system:\n\n{func_des}\n\nCode\"\"\"{code_inf[0]}\"\"\"\n\n{LLM1_system2}"

def new_prompt_LLM2(LLM2_system1,LLM2_system2,original_question,question,code_mem_list,keep_id_list):
    if code_mem_list == []:
        combined_code = 'There is no code available to answer'

    else:
        combined_code = combine_codes(code_mem_list,keep_id_list)
    return  f"{LLM2_system1}{original_question}\nLast search question:{question}\n\nPieces of code from system:\n{combined_code}\n{LLM2_system2}"


def new_prompt_SUMLLM(SUMLLM_system1, SUMLLM_system2, original_question, code_inf, id):
    func_des = get_func_description(id)
    return  f"{SUMLLM_system1}{original_question}\nCode from system:\n\n{func_des}\n\nCode:\"\"\"{code_inf[0]}\"\"\"{SUMLLM_system2}"

def new_prompt_GENELLM(GENELLM_system1,original_question,code_mem_list):
    combined_code = combine_codes(code_mem_list,keep_id_list)
    return  f"{GENELLM_system1}{original_question}\n\nPieces of code from system:\n{combined_code}[/INST]"
    
def get_new_question(output):
    # 'Next question:' または 'Next question :' のインデックスを取得
    next_question_index = output.find('Next question:')
    if next_question_index != -1:
        # 'Next question:'の後の空白をスキップ
        question_start_index = next_question_index + len('Next question:')
        while output[question_start_index] == ' ':
            question_start_index += 1
        
        # 質問文を取得し、不要なタグを削除
        question_end_index = output.find('</s>', question_start_index)
        if question_end_index == -1:
            question_end_index = None  # タグがない場合は文字列の最後までが質問
        question = output[question_start_index:question_end_index].strip()
    else:
        question = "Next question not found in input"
    
    return question

    


def get_command(output):
    # 'Decision:' または 'Decison:' (typoを許容) の後の1単語を取得
    decision_key = 'Decision:'
    decision_index = output.find(decision_key)
    
    # Typo 'Decison:' が含まれているか確認
    if decision_index == -1:
        decision_key = 'Decison:'
        decision_index = output.find(decision_key)
    
    # 'Decision:' または 'Decison:' の後の単語を取得
    if decision_index != -1:
        decision_index += len(decision_key)
        words = output[decision_index:].split()
        if words:
            # コマンドの取得と不要な文字の削除
            command = words[0].strip('.<>/s')
        else:
            command = 'error'
    else:
        # 'Decision:' または 'Decison:' が見つからない場合、
        # 最初に出現する 'keep', 'generate', 'dispose' のいずれかの単語をコマンドとする
        for word in output.split():
            # コマンドの抽出と不要な文字の削除
            clean_word = word.strip('.<>/s')
            if clean_word in ['keep', 'generate', 'dispose', 'more']:
                command = clean_word
                break
        else:
            command = 'error'

    return command




In [12]:
LLM1_system1 = """
[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute in any way to answering the question and should be disregarded.

Your actionable commands:
- keep
- dispose

<</SYS>>

User input:

User question: """

LLM1_system2 = """<<SYS>>
Firstly, you need to share your opinion about the reason for your decision, then you need to share your decision. 
Use the format below:

Thought: (Explain whether the given code is necessary to answer the user's question, and how it relates, even if partially.)
Decision: (Choose from 'keep' or 'dispose'.)
<</SYS>>
[/INST] """

LLM2_system1 = """[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, which are necessary to complete the answer or to cover all aspects of the question.
- Prompt 'Decision:generate' if the code provided fully satisfies the requirements to answer the user's question comprehensively.

Your actionable commands:
- more
- generate


<</SYS>>

User input:

User question: """

LLM2_system2 = """<<SYS>>
Firstly,you need to share your opinion about that the provided code is sufficient or insufficient,then you need to share your decision. Additionally, you must formulate a follow-up question to collect the missing information necessary to complete the code. Use the format below:

Thought: (Explain why the provided code is sufficient or insufficient)
Decision: (Choose 'generate' or 'more')
Next question: (Formulate a question to help gather the missing or additional code required)
<</SYS>>
[/INST]"""



SUMLLM_system1 = """[INST]<<SYS>>
You are a skilled programmer proficient in explaining code. Your primary task is to identify and extract the crucial parts of code based on the pairings of user-submitted questions and corresponding code snippets. While the code often relates to the questions, not all parts may be necessary to answer these questions. Users are specifically interested in those portions of the code that are most relevant to their inquiries. Therefore, you must focus solely on extracting these pertinent sections without modifying or editorializing the code. If no relevant code sections are found, output "Nothing".

User input:

User question:"""


SUMLLM_system2 = """<<SYS>>
You are required to extract the significant sections from the provided code that are essential for answering the user's question. Highlight these sections and explain their relevance to the question without altering the original code format or content. Please follow the format below:

Code:(The critical parts of the code necessary to answer the user's question. Do not modifying or editorializing the code.If no sections of the code are critical, you should explicitly output "Nothing".)
Relation:(Tell me relation between the code and Users question. If no sections of the code are related, you should explicitly output "Nothing")
        
<</SYS>>
[/INST]"""


GENELLM_system1 = """[INST]<<SYS>>
You are an excellent programmer and are adept at explaining code. You will be provided with one or more pieces of code along with corresponding questions from systems. The provided code is selected from a larger codebase specifically to enable you to answer these questions. Your task is to answer the user’s questions as thoroughly and clearly as possible, demonstrating your understanding and ability to communicate key coding concepts.

<</SYS>>

User input:

User question:"""





original_question = """
I want to know how to calcurate score in this game
"""

output = f"""Decison: dispose. Next question:{original_question}"""

id = None
code_mem_list = []
disposed_id_list =[]
keep_id_list = []
num_logs = len(os.listdir(f"{database_name}_logs"))
log_file_path = os.path.join(f"{database_name}_logs", f"log_{num_logs}")
# path for making function-explanation
path_call = "calls/" + database_name + ".json"
path_def = "defs/" + database_name + ".json"
with open(log_file_path, "w") as f:  # to make a log file
    f.write("")
    

while get_command(output) != 'generate':
    output_search = "Decison: more."
    # Search until keep
    while get_command(output_search) != 'keep':
        # initialized id is None
        if id is not None:
            disposed_id_list.append(id)
        # output is made by LLM2
        question = get_new_question(output)
        code_inf, id = get_infs(question, disposed_id_list, keep_id_list)
        if not code_inf:
            break
        # LLM1 to give 'dispose' or 'keep'
        prompt = new_prompt_LLM1(LLM1_system1, LLM1_system2, question, code_inf, id)
        input_ids = tokenizer(prompt, return_tensors="pt").to(device)
        output_ids = model.generate(
            **input_ids,
            max_new_tokens=1000,
            do_sample=True,
            temperature=0.8,
            #streamer=streamer,
        )
        # output_search is not for making new question,for decide 'dispose' or 'keep'
        output_search = tokenizer.decode(output_ids[0][len(input_ids[0]):], skip_special_tokens=True)
    if not code_inf:
        break

    # After search until decision be 'keep', make memory
    keep_id_list.append(id)
    # make memory using SUMLLM
    prompt = new_prompt_SUMLLM(SUMLLM_system1, SUMLLM_system2, original_question, code_inf, id)
    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(
    **input_ids,
    max_new_tokens=1000,
    do_sample=True,
    temperature=0.8,
    #streamer=streamer
    )
    # output mem is a set of code and relation for memory
    output_mem = tokenizer.decode(output_ids[0][len(input_ids[0]):])
    if output_mem.endswith("</s>"):
        output_mem = output_mem[:-4]
    code_mem_list.append(output_mem)
    #initialize id for don't append keep_id to disposed_id_list
    id = None

    # log mem
    print("------- PROMPT FOR MEMORY --------")
    print(prompt)
    print("------- MEMORY --------")
    print(output_mem)
    
    # log
    print("------- OUTPUT FROM SEARCH --------")
    print(output_search)
    print("------- code_mem_list --------")
    print(code_mem_list)
    print("------- disposed_id_list --------")
    print(disposed_id_list)
    print("------- keep_id_list --------")
    print(keep_id_list)

    
    new_log = ("\n\n------- OUTPUT FROM SEARCH --------\n" + output_search + 
           "------- code_mem_list --------\n" + '\n'.join(code_mem_list) + 
           "------- disposed_id_list --------\n" + '\n'.join(map(str, disposed_id_list)) + 
           "------- keep_id_list --------\n" + '\n'.join(map(str, keep_id_list)))
    save_log(new_log, log_file_path)
    
    
    # LLM2 to give 'generate' or 'more', and 'next question'
    prompt = new_prompt_LLM2(LLM2_system1, LLM2_system2, original_question, question, code_mem_list, keep_id_list)
    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(
        **input_ids,
        max_new_tokens=1000,
        do_sample=True,
        temperature=0.8,
        #streamer=streamer,
    )
    output = tokenizer.decode(output_ids[0][len(input_ids[0]):])
    
    print("------- OUTPUT FROM GENLLM --------")
    print(output)
    print("------- INPUT FOR GENLLM --------")
    print(prompt)
    print("------- code_mem_list --------")
    print(code_mem_list)
    print("------- disposed_id_list --------")
    print(disposed_id_list)

    new_log = ("\n\n------- OUTPUT1 --------\n" + output + 
           "------- INPUT1 --------\n" + prompt + 
           "------- code_mem_list1 --------\n" + '\n'.join(code_mem_list) + 
           "------- disposed_id_list1 --------\n" + '\n'.join(map(str, disposed_id_list)))

    save_log(new_log, log_file_path)

    
    if get_command(output) == 'error':
        print('error')
        break
if code_mem_list != []:
    prompt = new_prompt_GENELLM(GENELLM_system1,original_question,code_mem_list)
    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(
            **input_ids,
            max_new_tokens=1000,
            do_sample=True,
            temperature=0.8,
            #streamer=streamer
    )
    output = tokenizer.decode(output_ids[0][len(input_ids[0]):])
        
    print("------- ANSWER OF THE QUESTION --------")
    print(output)
    
    
    new_log = "\n\n------- ANSWER OF THE QUESTION --------\n" + output
    save_log(new_log, log_file_path)


    # save data for SEAT learning
    # when code_inf=None, answer may not be ganerated perfectlly.So don't save
    if code_inf:
        file_path = os.path.join("DS for SEAT", f"{database_name}.json")
        new_data = {
        "question": original_question,
        "disposed_id_list": disposed_id_list,
        "keep_id_list": keep_id_list
    }
        # check past data  
        if os.path.exists(file_path):
            with open(file_path, 'r', encoding='utf-8') as file:
                # load past data
                data = json.load(file)
                data.append(new_data)
        else:
            data = [new_data]
        
        # write
        with open(file_path, 'w', encoding='utf-8') as file:
            json.dump(data, file, ensure_ascii=False, indent=4)
        print(f"Data written to {file_path}")

if code_mem_list == []:
    print('与えられたコードからは回答できない質問です。質問を変更するか、もう少し詳しく教えてください。')

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The given code is not related to the user's question as it doesn't include a definition or manipulation of the 'num_mem_updates' variable. This variable's meaning and modification methods are not determined by the provided context.

  Decision: dispose.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute in any way to answering the question and should be disregarded.

Your actionabl

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: Based on the given user question and the provided pieces of code, it is insufficient as there is no code available to determine or change the variable 'num_mem_updates'.

Decision: more

Next question: Could you please provide the context of 'num_mem_updates' by sharing the relevant code snippet where it is defined and used? Additionally, please specify the programming language and framework you are using for better understanding.</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, which are necessary to complete the answer or to cover all aspects of the question.
- Prompt 'Decision:generate' if the code provided fully satisfies the requirements to answer the user's

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The user's question asks for the context of 'num_mem_updates' and requests the relevant code snippet where it is defined and used. The code provided defines a class `Memformer` which includes an instance variable `num_mem_updates`. Therefore, the code is related to the user's question and may contain useful elements or logic that pertains to the question.
Decision: keep.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question.

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code snippet defines the `num_mem_updates` attribute in the constructor of the `Memformer` class, indicating that it is a class-level variable. However, it does not provide any information about how to change its value or what its specific purpose is within the context of the `Memformer` class.

Decision: more

Next question: Could you please provide more context regarding the `Memformer` class and its role in your deep learning model, as well as any methods or functions where `num_mem_updates` is used or modified? Additionally, can you clarify what you mean by "changing" `num_mem_updates`, as the provided code only initializes its value?</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The provided code is related to the user's question as it includes the `Memformer` class, which is mentioned in the question. However, the code does not contain any specific methods or functions where `num_mem_updates` is used or modified as explicitly asked in the question. Moreover, the provided code only initializes the value of `num_mem_updates` and does not show any instance where this value is changed.

Decision: keep.

This decision is based on the fact that while the provided code does not have direct relevance to the exact aspects of the user's question, it could still potentially provide useful context or background information about the `Memformer` class. This could potentially help in understanding the context and importance of `num_mem_updates` in the larger deep learning model.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The code provided defines the `Memformer` class, which includes the initialization of the `num_mem_updates` parameter. However, the user's question also requires understanding the role of this variable, where it's used, and how to change it. The code snippets do not include enough context related to these requirements.

Decision: more

Next question: Could you please share the definition and usage of the `Memformer` class, especially the methods where `num_mem_updates` is modified or utilized? Additionally, could you provide examples of how to change the value of `num_mem_updates` during the model's execution?</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, whic

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The code provided is not directly related to the user's question about the `Memformer` class and its methods where `num_mem_updates` is modified or utilized. The code is for an `AutoregressiveWrapper` class, which does not seem to be related to the `Memformer` class. Moreover, the `num_mem_updates` variable is not defined or used in the given code, making it irrelevant to the user's question.

Decision: dispose.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provided is co

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code is related but not comprehensive enough to answer the user's question. While the code snippet shows the definition of the `Memformer` class, which includes the initialization of the `num_mem_updates` attribute, it does not provide sufficient information on how to change this value during the model's execution or where it is modified in the code.

Decision: more

Next question: Could you provide additional context or code snippets where the `num_mem_updates` attribute is updated or modified? For instance, are there specific methods in the `Memformer` class where this value can be changed? Additionally, are there any training or inference functions where this value is updated?</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not com

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The given code does not contain any code snippet or method related to updating or modifying the `num_mem_updates` attribute mentioned in the user's question. However, it does contain a memory replay buffer implementation with various operations on memories, such as passing memories from one step to the next and performing backpropagation.

Decision: dispose.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means t

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The code provided by the user defines the `Memformer` class and initializes the `num_mem_updates` attribute within the constructor. Therefore, it is crucial for understanding the meaning and changing the value of `num_mem_updates`. However, there is no evidence that this value can be updated or changed within the provided code. To answer the question comprehensively, we need to find where this value is updated or modified.

Decision: more

Next question: Could you provide additional context or code snippets where the `num_mem_updates` attribute is updated or modified? For instance, are there specific methods in the `Memformer` class where this value can be changed? Additionally, are there any training or inference functions where this value is updated?</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The user is asking for context or code snippets where the `num_mem_updates` attribute is updated. The provided code does contain the method `forward` where the `num_mem_updates` is used in a loop to update the memory. Therefore, the code is related to the user's question, even though it might not directly show the update of `num_mem_updates` itself.

Decision: keep.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code snippets are crucial in understanding the context and role of the `num_mem_updates` attribute. The first snippet shows that `num_mem_updates` is a parameter assigned during the construction of the `Memformer` class. Snippet 2 provides some context about the memory updates in the script, suggesting that 'num_mem_updates' may be a variable defined elsewhere and assigned a default value using the `default` function. The third snippet confirms this hypothesis and demonstrates where `num_mem_updates` is defined and used during the memory update process. Based on this information, the code is sufficient to answer the user's question about the meaning of `num_mem_updates` and that it can be changed in the constructor of the `Memformer` class.

Decision: generate

Next question: To further clarify the user's question, it would be helpful to ask if they are specifically interested in modifying the value of `num_mem_updates` during runtime, or 

## chat template with llama3

In [1]:
database_name = "Sonic-Game"

In [2]:
# model load
import torch
from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer

device = "cuda" if torch.cuda.is_available else "cpu"
emb_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1").to(device)

"""
# Model load for japanese
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer

assert transformers.__version__ >= "4.34.1"

model = AutoModelForCausalLM.from_pretrained("cyberagent/calm2-7b-chat", device_map="auto", torch_dtype="auto")
tokenizer = AutoTokenizer.from_pretrained("cyberagent/calm2-7b-chat")
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
"""

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id, 
    padding_side="left",
    add_eos_token=False,
    add_bos_token=False,)
tokenizer.pad_token = tokenizer.eos_token
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

model = AutoModelForCausalLM.from_pretrained(model_id).to(device)



Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [3]:
import os
import json
import sys

if not os.path.exists(f"{database_name}_logs"):
    os.makedirs(f"{database_name}_logs")

if not os.path.exists("DS for SEAT"):
    os.makedirs("DS for SEAT")

def save_log(new_log, file_path):
    with open(file_path) as f: # note that file must be already created
        log = f.read()

    log += new_log

    with open(file_path, "w") as f:
        f.write(log)

def get_infs(question, disposed_id_list, keep_id_list):
    # 問題文に基づいて検索する
    q_embs = torch.tensor(emb_model.encode(question)).to(device)
    inf_embs = torch.load(f"db_embs/{database_name}.pt").to(device)
    
    with open(f"chunks/{database_name}.json") as json_file:
        chunks = json.load(json_file)
    
    relevance = torch.matmul(q_embs, inf_embs.T) 
    
    # Top-3 のIDを取得
    values, inf_ids = torch.topk(relevance, k=3, dim=0)  # dim=1 で行ごとのTop-Kを取得
    
    infs = []
    selected_id = None
    for id in inf_ids:
        if id.item() not in disposed_id_list:
            if id.item() not in keep_id_list:
                selected_id = id.item()
                infs.append(chunks[selected_id])
                break  # 最初に見つかった適切なIDで終了

    if selected_id == None:
        values, inf_ids = torch.topk(relevance, k=relevance.shape[0], dim=0)
        for id in inf_ids:
            if id.item() not in disposed_id_list:
                if id.item() not in keep_id_list:
                    selected_id = id.item()
                    infs.append(chunks[selected_id])
                    break  # 最初に見つかった適切なIDで終了
            
    return infs, selected_id


def get_func_description(id):
    #initialize func_list
    func_list = []
    func_set = set()
    # open calls folder
    with open(path_call, 'r') as file:
        data = json.load(file)
        functions = data[id]
    for key1, value1 in functions.items():
        # open defs folder
        with open(path_def, 'r') as file:
             defs_data = json.load(file)
        
        for def_item in defs_data:
            for key2, value2 in def_item.items():
                if key2 == key1:
                    if key2 not in func_set:
                        func_set.add(key2)
                        func_list.append(f"{key2}:{value2}")

    if not func_list:
        return ""
    
    formatted_descriptions = [
    f"- {desc.split(':')[0]}: {desc.split(':')[1].strip()}."
    for desc in func_list
]

    # 最終的な説明文を生成
    description_of_functions = "Description of the functions used in the code below:\n" + "\n".join(formatted_descriptions)

    return description_of_functions     


def get_prompt(q, inf_list):
    prompt = q + "\nCode:"
    for inf in inf_list:
        prompt += "\n```" + inf + "```"
        
    return prompt

def combine_codes(code_mem_list,keep_id_list):
    combined_code = ""
    for id, code in zip(keep_id_list, code_mem_list):
        func_des = get_func_description(id)
        combined_code += f"{func_des}\n\n{code}\n\n"
    return combined_code
    

def new_prompt_LLM1(LLM1_system1, LLM1_system2, question, code_inf, id):
    func_des = get_func_description(id)
    return  f"{LLM1_system1}{question}\n\nCode from system:\n\n{func_des}\n\nCode\"\"\"{code_inf[0]}\"\"\"\n\n{LLM1_system2}"

def new_prompt_LLM2(LLM2_system1,LLM2_system2,original_question,question,code_mem_list,keep_id_list):
    if code_mem_list == []:
        combined_code = 'There is no code available to answer'

    else:
        combined_code = combine_codes(code_mem_list,keep_id_list)
    return  f"{LLM2_system1}{original_question}\nLast search question:{question}\n\nPieces of code from system:\n{combined_code}\n{LLM2_system2}"


def new_prompt_SUMLLM(SUMLLM_system1, SUMLLM_system2, original_question, code_inf, id):
    func_des = get_func_description(id)
    return  f"{SUMLLM_system1}{original_question}\nCode from system:\n\n{func_des}\n\nCode:\"\"\"{code_inf[0]}\"\"\"{SUMLLM_system2}"

def new_prompt_GENELLM(GENELLM_system1,original_question,code_mem_list):
    combined_code = combine_codes(code_mem_list,keep_id_list)
    return  f"{GENELLM_system1}{original_question}\n\nPieces of code from system:\n{combined_code}[/INST]"
    
def get_new_question(output):
    # 'Next question:' または 'Next question :' のインデックスを取得
    next_question_index = output.find('Next question:')
    if next_question_index != -1:
        # 'Next question:'の後の空白をスキップ
        question_start_index = next_question_index + len('Next question:')
        while output[question_start_index] == ' ':
            question_start_index += 1
        
        # 質問文を取得し、不要なタグを削除
        question_end_index = output.find('</s>', question_start_index)
        if question_end_index == -1:
            question_end_index = None  # タグがない場合は文字列の最後までが質問
        question = output[question_start_index:question_end_index].strip()
    else:
        question = "Next question not found in input"
    
    return question

    


def get_command(output):
    # 'Decision:' または 'Decison:' (typoを許容) の後の1単語を取得
    decision_key = 'Decision:'
    decision_index = output.find(decision_key)
    
    # Typo 'Decison:' が含まれているか確認
    if decision_index == -1:
        decision_key = 'Decison:'
        decision_index = output.find(decision_key)
    
    # 'Decision:' または 'Decison:' の後の単語を取得
    if decision_index != -1:
        decision_index += len(decision_key)
        words = output[decision_index:].split()
        if words:
            # コマンドの取得と不要な文字の削除
            command = words[0].strip('.<>/s')
        else:
            command = 'error'
    else:
        # 'Decision:' または 'Decison:' が見つからない場合、
        # 最初に出現する 'keep', 'generate', 'dispose' のいずれかの単語をコマンドとする
        for word in output.split():
            # コマンドの抽出と不要な文字の削除
            clean_word = word.strip('.<>/s')
            if clean_word in ['keep', 'generate', 'dispose', 'more']:
                command = clean_word
                break
        else:
            command = 'error'

    return command




In [10]:
LLM1_system1 = """
<|begin_of_text|><|start_header_id|>system<|end_header_id|>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute in any way to answering the question and should be disregarded.

Your actionable commands:
- keep
- dispose

<|eot_id|>
<|start_header_id|>user<|end_header_id|>

User input:

User question: """

LLM1_system2 = """<|eot_id|>
<|start_header_id|>system<|end_header_id|>
Firstly, you need to share your opinion about the reason for your decision, then you need to share your decision. 
Use the format below:

Thought: (Explain whether the given code is necessary to answer the user's question, and how it relates, even if partially.)
Decision: (Choose from 'keep' or 'dispose'.)
<|eot_id|>"""

LLM2_system1 = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, which are necessary to complete the answer or to cover all aspects of the question.
- Prompt 'Decision:generate' if the code provided fully satisfies the requirements to answer the user's question comprehensively.

Your actionable commands:
- more
- generate


<|eot_id|>
<|start_header_id|>user<|end_header_id|>

User input:

User question: """

LLM2_system2 = """<|eot_id|>
<|start_header_id|>system<|end_header_id|>
Firstly,you need to share your opinion about that the provided code is sufficient or insufficient,then you need to share your decision. Additionally, you must formulate a follow-up question to collect the missing information necessary to complete the code. Use the format below:

Thought: (Explain why the provided code is sufficient or insufficient)
Decision: (Choose 'generate' or 'more')
Next question: (Formulate a question to help gather the missing or additional code required)
<|eot_id|>"""



SUMLLM_system1 = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a skilled programmer proficient in explaining code. Your primary task is to identify and extract the crucial parts of code based on the pairings of user-submitted questions and corresponding code snippets. While the code often relates to the questions, not all parts may be necessary to answer these questions. Users are specifically interested in those portions of the code that are most relevant to their inquiries. Therefore, you must focus solely on extracting these pertinent sections without modifying or editorializing the code. If no relevant code sections are found, output "Nothing".
<|eot_id|>
<|start_header_id|>user<|end_header_id|>

User input:

User question:"""


SUMLLM_system2 = """<|eot_id|>
<|start_header_id|>system<|end_header_id|>
You are required to extract the significant sections from the provided code that are essential for answering the user's question. Highlight these sections and explain their relevance to the question without altering the original code format or content. Please follow the format below:

Code:(The critical parts of the code necessary to answer the user's question. Do not modifying or editorializing the code.If no sections of the code are critical, you should explicitly output "Nothing".)
Relation:(Tell me relation between the code and Users question. If no sections of the code are related, you should explicitly output "Nothing")
        
<|eot_id|>"""


GENELLM_system1 = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are an excellent programmer and are adept at explaining code. You will be provided with one or more pieces of code along with corresponding questions from systems. The provided code is selected from a larger codebase specifically to enable you to answer these questions. Your task is to answer the user’s questions as thoroughly and clearly as possible, demonstrating your understanding and ability to communicate key coding concepts.

<|eot_id|>
<|start_header_id|>user<|end_header_id|>

User input:

User question:"""


original_question = """
I want to know how to calcurate score in this game
"""

output = f"""Decison: dispose. Next question:{original_question}"""

id = None
code_mem_list = []
disposed_id_list =[]
keep_id_list = []
num_logs = len(os.listdir(f"{database_name}_logs"))
log_file_path = os.path.join(f"{database_name}_logs", f"log_{num_logs}")
# path for making function-explanation
path_call = "calls/" + database_name + ".json"
path_def = "defs/" + database_name + ".json"
with open(log_file_path, "w") as f:  # to make a log file
    f.write("")
    

while get_command(output) != 'generate':
    output_search = "Decison: more."
    # Search until keep
    while get_command(output_search) != 'keep':
        # initialized id is None
        if id is not None:
            disposed_id_list.append(id)
        # output is made by LLM2
        question = get_new_question(output)
        code_inf, id = get_infs(question, disposed_id_list, keep_id_list)
        if not code_inf:
            break
        # LLM1 to give 'dispose' or 'keep'
        prompt = new_prompt_LLM1(LLM1_system1, LLM1_system2, question, code_inf, id)
        input_ids = tokenizer(prompt, return_tensors="pt").to(device)
        output_ids = model.generate(
            **input_ids,
            max_new_tokens=1000,
            do_sample=True,
            temperature=0.8,
            #streamer=streamer,
        )
        # output_search is not for making new question,for decide 'dispose' or 'keep'
        output_search = tokenizer.decode(output_ids[0][len(input_ids[0]):], skip_special_tokens=True)
        print("------- output_search --------")
        print(output_search)
    if not code_inf:
        break

    # After search until decision be 'keep', make memory
    keep_id_list.append(id)
    # make memory using SUMLLM
    prompt = new_prompt_SUMLLM(SUMLLM_system1, SUMLLM_system2, original_question, code_inf, id)
    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(
    **input_ids,
    max_new_tokens=1000,
    do_sample=True,
    temperature=0.8,
    #streamer=streamer
    )
    # output mem is a set of code and relation for memory
    output_mem = tokenizer.decode(output_ids[0][len(input_ids[0]):])
    if output_mem.endswith("</s>"):
        output_mem = output_mem[:-4]
    code_mem_list.append(output_mem)
    #initialize id for don't append keep_id to disposed_id_list
    id = None

    # log mem
    print("------- PROMPT FOR MEMORY --------")
    print(prompt)
    print("------- MEMORY --------")
    print(output_mem)
    
    # log
    print("------- OUTPUT FROM SEARCH --------")
    print(output_search)
    print("------- code_mem_list --------")
    print(code_mem_list)
    print("------- disposed_id_list --------")
    print(disposed_id_list)
    print("------- keep_id_list --------")
    print(keep_id_list)

    
    new_log = ("\n\n------- OUTPUT FROM SEARCH --------\n" + output_search + 
           "------- code_mem_list --------\n" + '\n'.join(code_mem_list) + 
           "------- disposed_id_list --------\n" + '\n'.join(map(str, disposed_id_list)) + 
           "------- keep_id_list --------\n" + '\n'.join(map(str, keep_id_list)))
    save_log(new_log, log_file_path)
    
    
    # LLM2 to give 'generate' or 'more', and 'next question'
    prompt = new_prompt_LLM2(LLM2_system1, LLM2_system2, original_question, question, code_mem_list, keep_id_list)
    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(
        **input_ids,
        max_new_tokens=1000,
        do_sample=True,
        temperature=0.8,
        #streamer=streamer,
    )
    output = tokenizer.decode(output_ids[0][len(input_ids[0]):])
    
    print("------- OUTPUT FROM GENLLM --------")
    print(output)
    print("------- INPUT FOR GENLLM --------")
    print(prompt)
    print("------- code_mem_list --------")
    print(code_mem_list)
    print("------- disposed_id_list --------")
    print(disposed_id_list)

    new_log = ("\n\n------- OUTPUT1 --------\n" + output + 
           "------- INPUT1 --------\n" + prompt + 
           "------- code_mem_list1 --------\n" + '\n'.join(code_mem_list) + 
           "------- disposed_id_list1 --------\n" + '\n'.join(map(str, disposed_id_list)))

    save_log(new_log, log_file_path)

    
    if get_command(output) == 'error':
        print('error')
        break
if code_mem_list != []:
    prompt = new_prompt_GENELLM(GENELLM_system1,original_question,code_mem_list)
    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(
            **input_ids,
            max_new_tokens=1000,
            do_sample=True,
            temperature=0.8,
            #streamer=streamer
    )
    output = tokenizer.decode(output_ids[0][len(input_ids[0]):])
        
    print("------- ANSWER OF THE QUESTION --------")
    print(output)
    
    
    new_log = "\n\n------- ANSWER OF THE QUESTION --------\n" + output
    save_log(new_log, log_file_path)


    # save data for SEAT learning
    # when code_inf=None, answer may not be ganerated perfectlly.So don't save
    if code_inf:
        file_path = os.path.join("DS for SEAT", f"{database_name}.json")
        new_data = {
        "question": original_question,
        "disposed_id_list": disposed_id_list,
        "keep_id_list": keep_id_list
    }
        # check past data  
        if os.path.exists(file_path):
            with open(file_path, 'r', encoding='utf-8') as file:
                # load past data
                data = json.load(file)
                data.append(new_data)
        else:
            data = [new_data]
        
        # write
        
        with open(file_path, 'w', encoding='utf-8') as file:
            json.dump(data, file, ensure_ascii=False, indent=4)
        print(f"Data written to {file_path}")

if code_mem_list == []:
    print('与えられたコードからは回答できない質問です。質問を変更するか、もう少し詳しく教えてください。')

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


------- output_search --------
assistant

You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute in any way to answering the question and should be disregarded.

Your actionable commands:
- keep
- dispose

Thought: The code provided seems to be part of a game, possibly a platformer or an endless runner. The question asked about calculating a score in the game, but the code provided does not directly calculate a score. However, it does contain variables related

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


------- output_search --------
assistant

You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute in any way to answering the question and should be disregarded.

Your actionable commands:
- keep
- dispose

Thought: The code appears to be related to scoring in a game, as it references a "register" module, "width", "height", and "screen", which could all be related to a graphical game environment. Additionally, the code handles displaying scores and sorting them

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


------- output_search --------
assistant

Thought: The code provided seems to be related to the user's question about calculating the score in the game. The code has multiple parts, including the display of scores, playing sound effects based on the score, and updating the score value. Although the code may not directly provide the full calculation of the score, it does demonstrate how the score is being updated and displayed throughout the game.

Decision: keep.


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


------- PROMPT FOR MEMORY --------
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a skilled programmer proficient in explaining code. Your primary task is to identify and extract the crucial parts of code based on the pairings of user-submitted questions and corresponding code snippets. While the code often relates to the questions, not all parts may be necessary to answer these questions. Users are specifically interested in those portions of the code that are most relevant to their inquiries. Therefore, you must focus solely on extracting these pertinent sections without modifying or editorializing the code. If no relevant code sections are found, output "Nothing".
<|eot_id|>
<|start_header_id|>user<|end_header_id|>

User input:

User question:
I want to know how to calcurate score in this game

Code from system:

Description of the functions used in the code below:
- play_sound: A function that plays a sound file with a given volume level using Pygame library.



Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


------- OUTPUT FROM GENLLM --------
<|start_header_id|>assistant<|end_header_id|>

Code:
```
if SCORE % 100 == 0 and SCORE!= 0 and SCORE % 1000!= 0 and time() - time_score_sound > 0.2:
    play_sound(SCOREPATH, 0.03)
    time_score_sound = time()
elif SCORE % 1000 == 0 and SCORE!= 0 and time() - time_score_sound > 0.2:
    play_sound(SCORE1000PATH, 0.05)
    time_score_sound = time()
```

Relation: The code provided calculates the score in the game. The sections highlighted above display the logic for calculating the score and playing a sound effect when the score reaches a multiple of 100 or 1000. The variable SCORE is used to keep track of the current score in the game.<|eot_id|>
------- INPUT FOR GENLLM --------
<|begin_of_text|><|start_header_id|>system<|end_header_id|>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but 

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


------- output_search --------
assistant

Thought: The given code is relevant to the user's question as it is a part of a game's logic, particularly related to game development, enemy spawning, and game mechanics. Although the user's question is not specified, this code snippet appears to be a part of a game programming script, and its components like enemy spawning, health management, and level management might be relevant to the user's question.

Decision: keep.


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


------- PROMPT FOR MEMORY --------
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a skilled programmer proficient in explaining code. Your primary task is to identify and extract the crucial parts of code based on the pairings of user-submitted questions and corresponding code snippets. While the code often relates to the questions, not all parts may be necessary to answer these questions. Users are specifically interested in those portions of the code that are most relevant to their inquiries. Therefore, you must focus solely on extracting these pertinent sections without modifying or editorializing the code. If no relevant code sections are found, output "Nothing".
<|eot_id|>
<|start_header_id|>user<|end_header_id|>

User input:

User question:
I want to know how to calcurate score in this game

Code from system:



Code:"""if width_restrict and height_restrict and state_game:
                PLAYING = False
            # on presse le bouton highscores
          

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


------- OUTPUT FROM GENLLM --------
<|start_header_id|>assistant<|end_header_id|>

Code:
```
if SCORE % 100 == 0 and SCORE!= 0 and SCORE % 1000!= 0 and time() - time_score_sound > 0.2:
    play_sound(SCOREPATH, 0.03)
    time_score_sound = time()
elif SCORE % 1000 == 0 and SCORE!= 0 and time() - time_score_sound > 0.2:
    play_sound(SCORE1000PATH, 0.05)
    time_score_sound = time()
```

Relation: The code provided calculates the score in the game. The sections highlighted above display the logic for calculating the score and playing a sound effect when the score reaches a multiple of 100 or 1000. The variable SCORE is used to keep track of the current score in the game.

<|eot_id|>
------- INPUT FOR GENLLM --------
<|begin_of_text|><|start_header_id|>system<|end_header_id|>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related bu

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


------- output_search --------
assistant

Thought: The code provided seems to be a part of a game development project using the Pygame library. It appears to handle the game states and animations, with a focus on Sonic the Hedgehog. The code is not directly related to the user's question, as it does not provide any information about the command to run the game. However, it does demonstrate the use of game development techniques and graphics handling, which could be relevant in a broader context.

Decision: dispose.


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


------- output_search --------
assistant

You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute in any way to answering the question and should be disregarded.

Your actionable commands:
- keep
- dispose




Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


------- output_search --------
assistant

Thought: The provided code seems to be a part of a game development project written in Python using the Pygame library. The code is responsible for handling various game mechanics such as spawning enemies, displaying scores, and handling game over conditions. While the question about the next question not found in input, the provided code does not appear to be directly related to the question. However, it is still a relevant and useful code snippet as it provides insight into how game mechanics can be implemented in a Pygame project.

Decision: dispose.


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


------- output_search --------
assistantYou are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute in any way to answering the question and should be disregarded.

Your actionable commands:
- keep
- dispose

assistant

Thought: The provided code seems to be a basic implementation of an entity class in a game, with methods for changing the speed and position of the entity. The question does not mention specific game development, but the code is related to game deve

KeyboardInterrupt: 

## llama with Pydantic

In [1]:
database_name = "Sonic-Game"

In [2]:
# model load
import torch
from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer

device = "cuda" if torch.cuda.is_available else "cpu"
emb_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1").to(device)

"""
# Model load for japanese
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer

assert transformers.__version__ >= "4.34.1"

model = AutoModelForCausalLM.from_pretrained("cyberagent/calm2-7b-chat", device_map="auto", torch_dtype="auto")
tokenizer = AutoTokenizer.from_pretrained("cyberagent/calm2-7b-chat")
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
"""

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id, 
    padding_side="left",
    add_eos_token=False,
    add_bos_token=False,)
tokenizer.pad_token = tokenizer.eos_token
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

model = AutoModelForCausalLM.from_pretrained(model_id).to(device)

# pydantic
from pydantic import BaseModel
from lmformatenforcer import JsonSchemaParser
from lmformatenforcer.integrations.transformers import build_transformers_prefix_allowed_tokens_fn
from transformers import pipeline

# for restricting answer to be json 
class AnswerFormat1(BaseModel):
    Thought: str
    Decision: str

# Create a character level parser and build a transformers prefix function from it
parser1 = JsonSchemaParser(AnswerFormat1.schema())
prefix_function1 = build_transformers_prefix_allowed_tokens_fn(tokenizer, parser1)

# for restricting answer to be json 
class AnswerFormat2(BaseModel):
    Thought: str
    Decision: str
    Next_question: str

# Create a character level parser and build a transformers prefix function from it
parser2 = JsonSchemaParser(AnswerFormat2.schema())
prefix_function2 = build_transformers_prefix_allowed_tokens_fn(tokenizer, parser2)

# for restricting answer to be json
class AnswerFormat_SUM(BaseModel):
    Code: str
    Relation: str

# Create a character level parser and build a transformers prefix function from it
parserS = JsonSchemaParser(AnswerFormat_SUM.schema())
prefix_functionS = build_transformers_prefix_allowed_tokens_fn(tokenizer, parserS)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [7]:
import os
import json
import sys

if not os.path.exists(f"{database_name}_logs"):
    os.makedirs(f"{database_name}_logs")

if not os.path.exists("DS for SEAT"):
    os.makedirs("DS for SEAT")

def save_log(new_log, file_path):
    with open(file_path) as f: # note that file must be already created
        log = f.read()

    log += new_log

    with open(file_path, "w") as f:
        f.write(log)

def get_infs(question, disposed_id_list, keep_id_list):
    # 問題文に基づいて検索する
    q_embs = torch.tensor(emb_model.encode(question)).to(device)
    inf_embs = torch.load(f"db_embs/{database_name}.pt").to(device)
    
    with open(f"chunks/{database_name}.json") as json_file:
        chunks = json.load(json_file)
    
    relevance = torch.matmul(q_embs, inf_embs.T) 
    
    # Top-3 のIDを取得
    values, inf_ids = torch.topk(relevance, k=3, dim=0)  # dim=1 で行ごとのTop-Kを取得
    
    infs = []
    selected_id = None
    for id in inf_ids:
        if id.item() not in disposed_id_list:
            if id.item() not in keep_id_list:
                selected_id = id.item()
                infs.append(chunks[selected_id])
                break  # 最初に見つかった適切なIDで終了

    if selected_id == None:
        values, inf_ids = torch.topk(relevance, k=relevance.shape[0], dim=0)
        for id in inf_ids:
            if id.item() not in disposed_id_list:
                if id.item() not in keep_id_list:
                    selected_id = id.item()
                    infs.append(chunks[selected_id])
                    break  # 最初に見つかった適切なIDで終了
            
    return infs, selected_id


def get_func_description(id):
    #initialize func_list
    func_list = []
    func_set = set()
    # open calls folder
    with open(path_call, 'r') as file:
        data = json.load(file)
        functions = data[id]
    for key1, value1 in functions.items():
        # open defs folder
        with open(path_def, 'r') as file:
             defs_data = json.load(file)
        
        for def_item in defs_data:
            for key2, value2 in def_item.items():
                if key2 == key1:
                    if key2 not in func_set:
                        func_set.add(key2)
                        func_list.append(f"{key2}:{value2}")

    if not func_list:
        return ""
    
    formatted_descriptions = [
    f"- {desc.split(':')[0]}: {desc.split(':')[1].strip()}."
    for desc in func_list
]

    # 最終的な説明文を生成
    description_of_functions = "Description of the functions used in the code below:\n" + "\n".join(formatted_descriptions)

    return description_of_functions     


def get_prompt(q, inf_list):
    prompt = q + "\nCode:"
    for inf in inf_list:
        prompt += "\n```" + inf + "```"
        
    return prompt

def combine_codes(code_mem_list,keep_id_list):
    combined_code = ""
    for id, code in zip(keep_id_list, code_mem_list):
        func_des = get_func_description(id)
        combined_code += f"{func_des}\n\n{code}\n\n"
    return combined_code
    

def new_prompt_LLM1(LLM1_system1, LLM1_system2, question, code_inf, id):
    func_des = get_func_description(id)
    return  f"{LLM1_system1}{question}\n\nCode from system:\n\n{func_des}\n\nCode\"\"\"{code_inf[0]}\"\"\"\n\n{LLM1_system2}"

def new_prompt_LLM2(LLM2_system1,LLM2_system2,original_question,question,code_mem_list,keep_id_list):
    if code_mem_list == []:
        combined_code = 'There is no code available to answer'

    else:
        combined_code = combine_codes(code_mem_list,keep_id_list)
    return  f"{LLM2_system1}{original_question}\nLast search question:{question}\n\nPieces of code from system:\n{combined_code}\n{LLM2_system2}"


def new_prompt_SUMLLM(SUMLLM_system1, SUMLLM_system2, original_question, code_inf, id):
    func_des = get_func_description(id)
    return  f"{SUMLLM_system1}{original_question}\nCode from system:\n\n{func_des}\n\nCode:\"\"\"{code_inf[0]}\"\"\"{SUMLLM_system2}"

def new_prompt_GENELLM(GENELLM_system1,original_question,code_mem_list):
    combined_code = combine_codes(code_mem_list,keep_id_list)
    return  f"{GENELLM_system1}{original_question}\n\nPieces of code from system:\n{combined_code}[/INST]"
    
def get_new_question(output):
    print(f"output:{output}")
    output_type = type(output)
    print(output_type)
    try:
        data_dict = json.loads(output)
        print("Converted to dictionary successfully.")
        print(data_dict)
    except json.JSONDecodeError as e:
        print("Failed to convert string to dictionary:", e)
    question = data_dict.get('Next_question', None)
    if question is not None:
        # 末尾のピリオドや特殊文字を削除
        question = question.rstrip('.<s>')
    return question    


    


def get_command(output):
    """
    Extract the command associated with the 'Decision' key from the given dictionary.
    Cleans the command string by removing any trailing periods or special characters.

    Args:
    data_dict (dict): A dictionary from which to extract the command.

    Returns:
    str: The cleaned command associated with the 'Decision' key, or None if not found.
    """
    # "Decision" キーの値を取得し、存在しない場合は None を返す
    print(f"output:{output}")
    output_type = type(output)
    print(output_type)
    try:
        data_dict = json.loads(output)
        print("Converted to dictionary successfully.")
        print(data_dict)
    except json.JSONDecodeError as e:
        print("Failed to convert string to dictionary:", e)
    command = data_dict.get('Decision', None)
    if command is not None:
        # 末尾のピリオドや特殊文字を削除
        command = command.rstrip('.<s>')
    return command




In [8]:
LLM1_system1 = """
<|begin_of_text|><|start_header_id|>system<|end_header_id|>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute in any way to answering the question and should be disregarded.

Your actionable commands:
- keep
- dispose

<|eot_id|>
<|start_header_id|>user<|end_header_id|>

User input:

User question: """

LLM1_system2 = """<|eot_id|>
<|start_header_id|>system<|end_header_id|>
Firstly, you need to share your opinion about the reason for your decision, then you need to share your decision. 
Use the format below:
{'Thought':(Explain whether the given code is necessary to answer the user's question, and how it relates, even if partially.), 
'Decision':(Choose from 'keep' or 'dispose'.)}
<|eot_id|>"""

LLM2_system1 = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, which are necessary to complete the answer or to cover all aspects of the question.
- Prompt 'Decision:generate' if the code provided fully satisfies the requirements to answer the user's question comprehensively.

Your actionable commands:
- more
- generate


<|eot_id|>
<|start_header_id|>user<|end_header_id|>

User input:

User question: """

LLM2_system2 = """<|eot_id|>
<|start_header_id|>system<|end_header_id|>
Firstly,you need to share your opinion about that the provided code is sufficient or insufficient,then you need to share your decision. Additionally, you must formulate a follow-up question to collect the missing information necessary to complete the code. 
Use the format below:
{'Thought':(Explain why the provided code is sufficient or insufficient), 
'Decision':(Choose 'generate' or 'more'),
'Next_question':(Formulate a question to help gather the missing or additional code required)}
<|eot_id|>"""



SUMLLM_system1 = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a skilled programmer proficient in explaining code. Your primary task is to identify and extract the crucial parts of code based on the pairings of user-submitted questions and corresponding code snippets. While the code often relates to the questions, not all parts may be necessary to answer these questions. Users are specifically interested in those portions of the code that are most relevant to their inquiries. Therefore, you must focus solely on extracting these pertinent sections without modifying or editorializing the code. If no relevant code sections are found, output "Nothing".
<|eot_id|>
<|start_header_id|>user<|end_header_id|>

User input:

User question:"""


SUMLLM_system2 = """<|eot_id|>
<|start_header_id|>system<|end_header_id|>
You are required to extract the significant sections from the provided code that are essential for answering the user's question. Highlight these sections and explain their relevance to the question without altering the original code format or content. 
Please follow the format below:
{'Code':(The critical parts of the code necessary to answer the user's question. Do not modifying or editorializing the code.If no sections of the code are critical, you should explicitly output "Nothing".), 
'Relation':(Tell me relation between the code and Users question. If no sections of the code are related, you should explicitly output "Nothing")}        
<|eot_id|>"""


GENELLM_system1 = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are an excellent programmer and are adept at explaining code. You will be provided with one or more pieces of code along with corresponding questions from systems. The provided code is selected from a larger codebase specifically to enable you to answer these questions. Your task is to answer the user’s questions as thoroughly and clearly as possible, demonstrating your understanding and ability to communicate key coding concepts.

<|eot_id|>
<|start_header_id|>user<|end_header_id|>

User input:

User question:"""


original_question = """I want to know how to calcurate score in this game"""

output = {"Decison":"more", 
"Next_question":f"""{original_question}"""}       

output = json.dumps(output)

id = None
code_mem_list = []
disposed_id_list =[]
keep_id_list = []
num_logs = len(os.listdir(f"{database_name}_logs"))
log_file_path = os.path.join(f"{database_name}_logs", f"log_{num_logs}")
# path for making function-explanation
path_call = "calls/" + database_name + ".json"
path_def = "defs/" + database_name + ".json"
with open(log_file_path, "w") as f:  # to make a log file
    f.write("")


while get_command(output) != 'generate':
    output_search = json.dumps({"Decison": "more"})
    # Search until keep
    while get_command(output_search) != 'keep':
        # initialized id is None
        if id is not None:
            disposed_id_list.append(id)
        # output is made by LLM2
        question = get_new_question(output)
        code_inf, id = get_infs(question, disposed_id_list, keep_id_list)
        if not code_inf:
            break
        # LLM1 to give 'dispose' or 'keep'
        prompt = new_prompt_LLM1(LLM1_system1, LLM1_system2, question, code_inf, id)
        input_ids = tokenizer(prompt, return_tensors="pt", ).to(device)
        output_ids = model.generate(
            **input_ids,
            max_new_tokens=1000,
            do_sample=True,
            temperature=0.8,
            prefix_allowed_tokens_fn=prefix_function1
        )
        # output_search is not for making new question,for decide 'dispose' or 'keep'
        output_search = tokenizer.decode(output_ids[0][len(input_ids[0]):], skip_special_tokens=True)
        print("------- output_search --------")
        print(output_search)
    if not code_inf:
        break

    # After search until decision be 'keep', make memory
    keep_id_list.append(id)
    # make memory using SUMLLM
    prompt = new_prompt_SUMLLM(SUMLLM_system1, SUMLLM_system2, original_question, code_inf, id)
    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(
    **input_ids,
    max_new_tokens=1000,
    do_sample=True,
    temperature=0.8,
    prefix_allowed_tokens_fn=prefix_functionS
    )
    # output mem is a set of code and relation for memory
    output_mem = tokenizer.decode(output_ids[0][len(input_ids[0]):])
    if output_mem.endswith("</s>"):
        output_mem = output_mem[:-4]
    code_mem_list.append(output_mem)
    #initialize id for don't append keep_id to disposed_id_list
    id = None

    # log mem
    print("------- PROMPT FOR MEMORY --------")
    print(prompt)
    print("------- MEMORY --------")
    print(output_mem)
    
    # log
    print("------- OUTPUT FROM SEARCH --------")
    print(output_search)
    print("------- code_mem_list --------")
    print(code_mem_list)
    print("------- disposed_id_list --------")
    print(disposed_id_list)
    print("------- keep_id_list --------")
    print(keep_id_list)

    
    new_log = ("\n\n------- OUTPUT FROM SEARCH --------\n" + output_search + 
           "------- code_mem_list --------\n" + '\n'.join(code_mem_list) + 
           "------- disposed_id_list --------\n" + '\n'.join(map(str, disposed_id_list)) + 
           "------- keep_id_list --------\n" + '\n'.join(map(str, keep_id_list)))
    save_log(new_log, log_file_path)
    
    
    # LLM2 to give 'generate' or 'more', and 'next question'
    prompt = new_prompt_LLM2(LLM2_system1, LLM2_system2, original_question, question, code_mem_list, keep_id_list)
    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(
        **input_ids,
        max_new_tokens=1000,
        do_sample=True,
        temperature=0.8,
        prefix_allowed_tokens_fn=prefix_function2
    )
    output = tokenizer.decode(output_ids[0][len(input_ids[0]):])
    
    print("------- OUTPUT FROM GENLLM --------")
    print(output)
    print("------- INPUT FOR GENLLM --------")
    print(prompt)
    print("------- code_mem_list --------")
    print(code_mem_list)
    print("------- disposed_id_list --------")
    print(disposed_id_list)

    new_log = ("\n\n------- OUTPUT1 --------\n" + output + 
           "------- INPUT1 --------\n" + prompt + 
           "------- code_mem_list1 --------\n" + '\n'.join(code_mem_list) + 
           "------- disposed_id_list1 --------\n" + '\n'.join(map(str, disposed_id_list)))

    save_log(new_log, log_file_path)

    
    if get_command(output) == 'error':
        print('error')
        break
if code_mem_list != []:
    prompt = new_prompt_GENELLM(GENELLM_system1,original_question,code_mem_list)
    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(
            **input_ids,
            max_new_tokens=1000,
            do_sample=True,
            temperature=0.8,
            #streamer=streamer
    )
    output = tokenizer.decode(output_ids[0][len(input_ids[0]):])
        
    print("------- ANSWER OF THE QUESTION --------")
    print(output)
    
    
    new_log = "\n\n------- ANSWER OF THE QUESTION --------\n" + output
    save_log(new_log, log_file_path)


    # save data for SEAT learning
    # when code_inf=None, answer may not be ganerated perfectlly.So don't save
    if code_inf:
        file_path = os.path.join("DS for SEAT", f"{database_name}.json")
        new_data = {
        "question": original_question,
        "disposed_id_list": disposed_id_list,
        "keep_id_list": keep_id_list
    }
        # check past data  
        if os.path.exists(file_path):
            with open(file_path, 'r', encoding='utf-8') as file:
                # load past data
                data = json.load(file)
                data.append(new_data)
        else:
            data = [new_data]
        
        # write
        
        with open(file_path, 'w', encoding='utf-8') as file:
            json.dump(data, file, ensure_ascii=False, indent=4)
        print(f"Data written to {file_path}")

if code_mem_list == []:
    print('与えられたコードからは回答できない質問です。質問を変更するか、もう少し詳しく教えてください。')

output:{"Decison": "more", "Next_question": "I want to know how to calcurate score in this game"}
<class 'str'>
Converted to dictionary successfully.
{'Decison': 'more', 'Next_question': 'I want to know how to calcurate score in this game'}
output:{"Decison": "more"}
<class 'str'>
Converted to dictionary successfully.
{'Decison': 'more'}
output:{"Decison": "more", "Next_question": "I want to know how to calcurate score in this game"}
<class 'str'>
Converted to dictionary successfully.
{'Decison': 'more', 'Next_question': 'I want to know how to calcurate score in this game'}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


------- output_search --------

{"Thought": "The code provided is not directly related to the user's question about calculating scores in the game. It seems to be a part of the game's main loop and event handling, and there is no mention of score calculation. However, some functions like `play_sound` might be indirectly related to scores, but the code itself does not provide any information about score calculation. Nonetheless, it's a part of the game's codebase, and there might be clues or patterns that could be helpful in answering the user's question. Therefore, I will give it a chance to provide relevant information. Decision:keep.
output:
{"Thought": "The code provided is not directly related to the user's question about calculating scores in the game. It seems to be a part of the game's main loop and event handling, and there is no mention of score calculation. However, some functions like `play_sound` might be indirectly related to scores, but the code itself does not provide an

UnboundLocalError: local variable 'data_dict' referenced before assignment

### test

In [None]:
from pydantic import BaseModel
from lmformatenforcer import JsonSchemaParser
from lmformatenforcer.integrations.transformers import build_transformers_prefix_allowed_tokens_fn
from transformers import pipeline

max_new_tokens = 4000
# for restricting answer to be json 
class AnswerFormat1(BaseModel):
    Thought: str
    Decision: str

# Create a transformers pipeline
hf_pipeline = pipeline('text-generation', model=model, max_length = max_new_tokens,  tokenizer = tokenizer, device = 0)
#prompt = f'Here is information about Michael Jordan in the following json schema: {AnswerFormat.schema_json()} :\n'

# Create a character level parser and build a transformers prefix function from it
parser1 = JsonSchemaParser(AnswerFormat1.schema())
prefix_function1 = build_transformers_prefix_allowed_tokens_fn(hf_pipeline.tokenizer, parser1)

LLM1_system1 = """
<|begin_of_text|><|start_header_id|>system<|end_header_id|>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute in any way to answering the question and should be disregarded.

Your actionable commands:
- keep
- dispose

<|eot_id|>
<|start_header_id|>user<|end_header_id|>

User input:

User question: """

LLM1_system2 = """<|eot_id|>
<|start_header_id|>system<|end_header_id|>
Firstly, you need to share your opinion about the reason for your decision, then you need to share your decision. 
Use the format below:

{'Thought':(Explain whether the given code is necessary to answer the user's question, and how it relates, even if partially.), 
'Decision':(Choose from 'keep' or 'dispose'.)}
<|eot_id|>"""

original_question = """
I want to know how to calcurate score in this game
"""

output = f"""Decison: dispose. Next question:{original_question}"""

id = None
code_mem_list = []
disposed_id_list =[]
keep_id_list = []
num_logs = len(os.listdir(f"{database_name}_logs"))
log_file_path = os.path.join(f"{database_name}_logs", f"log_{num_logs}")
# path for making function-explanation
path_call = "calls/" + database_name + ".json"
path_def = "defs/" + database_name + ".json"
with open(log_file_path, "w") as f:  # to make a log file
    f.write("")

question = get_new_question(output)
code_inf, id = get_infs(question, disposed_id_list, keep_id_list)
# LLM1 to give 'dispose' or 'keep'
prompt = new_prompt_LLM1(LLM1_system1, LLM1_system2, question, code_inf, id)
output_dict = hf_pipeline(prompt, prefix_allowed_tokens_fn = prefix_function1)
print()
print("=== output ===")
print(output_dict[0]['generated_text'][len(prompt):])
    

In [23]:
import json

original_question = """I want to know how to calcurate score in this game"""

output = {"Decison": "more", 
          "Next_question": f"""{original_question}"""}

print(output)

# 辞書をJSON文字列に変換してから、再度辞書に戻す
json_str = json.dumps(output)
try:
    data_dict = json.loads(json_str)
    print("Converted to dictionary successfully.")
    # 'Decison' キーの値を出力する
    print(data_dict['Decison'])
except Exception as e:
    print("An error occurred:", e)


{'Decison': 'more', 'Next_question': 'I want to know how to calcurate score in this game'}
Converted to dictionary successfully.
more


## Sonic-Game chat

In [1]:
database_name = "Sonic-Game"

In [2]:
# model load
import torch
from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer

device = "cuda" if torch.cuda.is_available else "cpu"
emb_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1").to(device)

"""
# Model load for japanese
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer

assert transformers.__version__ >= "4.34.1"

model = AutoModelForCausalLM.from_pretrained("cyberagent/calm2-7b-chat", device_map="auto", torch_dtype="auto")
tokenizer = AutoTokenizer.from_pretrained("cyberagent/calm2-7b-chat")
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
"""

model_id = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_id, 
    padding_side="left",
    add_eos_token=False,
    add_bos_token=False,)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

model = AutoModelForCausalLM.from_pretrained(model_id).to(device)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [57]:
import os
import json
import sys

if not os.path.exists(f"{database_name}_logs"):
    os.makedirs(f"{database_name}_logs")

if not os.path.exists("DS for SEAT"):
    os.makedirs("DS for SEAT")

def save_log(new_log, file_path):
    with open(file_path) as f: # note that file must be already created
        log = f.read()

    log += new_log

    with open(file_path, "w") as f:
        f.write(log)

def get_infs(question, disposed_id_list, keep_id_list):
    # 問題文に基づいて検索する
    q_embs = torch.tensor(emb_model.encode(question)).to(device)
    inf_embs = torch.load(f"db_embs/{database_name}.pt").to(device)
    
    with open(f"chunks/{database_name}.json") as json_file:
        chunks = json.load(json_file)
    
    relevance = torch.matmul(q_embs, inf_embs.T) 
    
    # Top-3 のIDを取得
    values, inf_ids = torch.topk(relevance, k=3, dim=0)  # dim=1 で行ごとのTop-Kを取得
    
    infs = []
    selected_id = None
    for id in inf_ids:
        if id.item() not in disposed_id_list:
            if id.item() not in keep_id_list:
                selected_id = id.item()
                infs.append(chunks[selected_id])
                break  # 最初に見つかった適切なIDで終了

    if selected_id == None:
        values, inf_ids = torch.topk(relevance, k=relevance.shape[0], dim=0)
        for id in inf_ids:
            if id.item() not in disposed_id_list:
                if id.item() not in keep_id_list:
                    selected_id = id.item()
                    infs.append(chunks[selected_id])
                    break  # 最初に見つかった適切なIDで終了
            
    return infs, selected_id


def get_func_description(id):
    #initialize func_list
    func_list = []
    func_set = set()
    # open calls folder
    with open(path_call, 'r') as file:
        data = json.load(file)
        functions = data[id]
    for key1, value1 in functions.items():
        # open defs folder
        with open(path_def, 'r') as file:
             defs_data = json.load(file)
        
        for def_item in defs_data:
            for key2, value2 in def_item.items():
                if key2 == key1:
                    if key2 not in func_set:
                        func_set.add(key2)
                        func_list.append(f"{key2}:{value2}")

    if not func_list:
        return ""
    
    formatted_descriptions = [
    f"- {desc.split(':')[0]}: {desc.split(':')[1].strip()}."
    for desc in func_list
]

    # 最終的な説明文を生成
    description_of_functions = "Description of the functions used in the code below:\n" + "\n".join(formatted_descriptions)

    return description_of_functions     


def get_prompt(q, inf_list):
    prompt = q + "\nCode:"
    for inf in inf_list:
        prompt += "\n```" + inf + "```"
        
    return prompt

def combine_codes(code_mem_list,keep_id_list):
    combined_code = ""
    for id, code in zip(keep_id_list, code_mem_list):
        func_des = get_func_description(id)
        combined_code += f"{func_des}\n\n{code}\n\n"
    return combined_code
    

def new_prompt_LLM1(LLM1_system1, LLM1_system2, question, code_inf, id):
    func_des = get_func_description(id)
    return  f"{LLM1_system1}{question}\n\nCode from system:\n\n{func_des}\n\nCode\"\"\"{code_inf[0]}\"\"\"\n\n{LLM1_system2}"

def new_prompt_LLM2(LLM2_system1,LLM2_system2,original_question,question,code_mem_list,keep_id_list):
    if code_mem_list == []:
        combined_code = 'There is no code available to answer'

    else:
        combined_code = combine_codes(code_mem_list,keep_id_list)
    return  f"{LLM2_system1}{original_question}\nLast search question:{question}\n\nPieces of code from system:\n{combined_code}\n{LLM2_system2}"


def new_prompt_SUMLLM(SUMLLM_system1, SUMLLM_system2, original_question, code_inf, id):
    func_des = get_func_description(id)
    return  f"{SUMLLM_system1}{original_question}\nCode from system:\n\n{func_des}\n\nCode:\"\"\"{code_inf[0]}\"\"\"{SUMLLM_system2}"

def new_prompt_GENELLM(GENELLM_system1,original_question,code_mem_list):
    combined_code = combine_codes(code_mem_list,keep_id_list)
    return  f"{GENELLM_system1}{original_question}\n\nPieces of code from system:\n{combined_code}[/INST]"
    
def get_new_question(output):
    # 'Next question:' または 'Next question :' のインデックスを取得
    next_question_index = output.find('Next question:')
    if next_question_index != -1:
        # 'Next question:'の後の空白をスキップ
        question_start_index = next_question_index + len('Next question:')
        while output[question_start_index] == ' ':
            question_start_index += 1
        
        # 質問文を取得し、不要なタグを削除
        question_end_index = output.find('</s>', question_start_index)
        if question_end_index == -1:
            question_end_index = None  # タグがない場合は文字列の最後までが質問
        question = output[question_start_index:question_end_index].strip()
    else:
        question = "Next question not found in input"
    
    return question

    


def get_command(output):
    # 'Decision:' または 'Decison:' (typoを許容) の後の1単語を取得
    decision_key = 'Decision:'
    decision_index = output.find(decision_key)
    
    # Typo 'Decison:' が含まれているか確認
    if decision_index == -1:
        decision_key = 'Decison:'
        decision_index = output.find(decision_key)
    
    # 'Decision:' または 'Decison:' の後の単語を取得
    if decision_index != -1:
        decision_index += len(decision_key)
        words = output[decision_index:].split()
        if words:
            # コマンドの取得と不要な文字の削除
            command = words[0].strip('.<>/s')
        else:
            command = 'error'
    else:
        # 'Decision:' または 'Decison:' が見つからない場合、
        # 最初に出現する 'keep', 'generate', 'dispose' のいずれかの単語をコマンドとする
        for word in output.split():
            # コマンドの抽出と不要な文字の削除
            clean_word = word.strip('.<>/s')
            if clean_word in ['keep', 'generate', 'dispose', 'more']:
                command = clean_word
                break
        else:
            command = 'error'

    return command




In [58]:
LLM1_system1 = """
[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute in any way to answering the question and should be disregarded.

Your actionable commands:
- keep
- dispose

<</SYS>>

User input:

User question: """

LLM1_system2 = """<<SYS>>
Firstly, you need to share your opinion about the reason for your decision, then you need to share your decision. 
Use the format below:

Thought: (Explain whether the given code is necessary to answer the user's question, and how it relates, even if partially.)
Decision: (Choose from 'keep' or 'dispose'.)
<</SYS>>
[/INST] """

LLM2_system1 = """[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, which are necessary to complete the answer or to cover all aspects of the question.
- Prompt 'Decision:generate' if the code provided fully satisfies the requirements to answer the user's question comprehensively.

Your actionable commands:
- more
- generate


<</SYS>>

User input:

User question: """

LLM2_system2 = """<<SYS>>
Firstly,you need to share your opinion about that the provided code is sufficient or insufficient,then you need to share your decision. Additionally, you must formulate a follow-up question to collect the missing information necessary to complete the code. Use the format below:

Thought: (Explain why the provided code is sufficient or insufficient)
Decision: (Choose 'generate' or 'more')
Next question: (Formulate a question to help gather the missing or additional code required)
<</SYS>>
[/INST]"""



SUMLLM_system1 = """[INST]<<SYS>>
You are a skilled programmer proficient in explaining code. Your primary task is to identify and extract the crucial parts of code based on the pairings of user-submitted questions and corresponding code snippets. While the code often relates to the questions, not all parts may be necessary to answer these questions. Users are specifically interested in those portions of the code that are most relevant to their inquiries. Therefore, you must focus solely on extracting these pertinent sections without modifying or editorializing the code. If no relevant code sections are found, output "Nothing".

User input:

User question:"""


SUMLLM_system2 = """<<SYS>>
You are required to extract the significant sections from the provided code that are essential for answering the user's question. Highlight these sections and explain their relevance to the question without altering the original code format or content. Please follow the format below:

Code:(The critical parts of the code necessary to answer the user's question. Do not modifying or editorializing the code.If no sections of the code are critical, you should explicitly output "Nothing".)
Relation:(Tell me relation between the code and Users question. If no sections of the code are related, you should explicitly output "Nothing")
        
<</SYS>>
[/INST]"""


GENELLM_system1 = """[INST]<<SYS>>
You are an excellent programmer and are adept at explaining code. You will be provided with one or more pieces of code along with corresponding questions from systems. The provided code is selected from a larger codebase specifically to enable you to answer these questions. Your task is to answer the user’s questions as thoroughly and clearly as possible, demonstrating your understanding and ability to communicate key coding concepts.

<</SYS>>

User input:

User question:"""





original_question = """
I want to know how to calcurate score in this game
"""

output = f"""Decison: dispose. Next question:{original_question}"""

id = None
code_mem_list = []
disposed_id_list =[]
keep_id_list = []
num_logs = len(os.listdir(f"{database_name}_logs"))
log_file_path = os.path.join(f"{database_name}_logs", f"log_{num_logs}")
# path for making function-explanation
path_call = "calls/" + database_name + ".json"
path_def = "defs/" + database_name + ".json"
with open(log_file_path, "w") as f:  # to make a log file
    f.write("")
    

while get_command(output) != 'generate':
    output_search = "Decison: more."
    # Search until keep
    while get_command(output_search) != 'keep':
        # initialized id is None
        if id is not None:
            disposed_id_list.append(id)
        # output is made by LLM2
        question = get_new_question(output)
        code_inf, id = get_infs(question, disposed_id_list, keep_id_list)
        if not code_inf:
            break
        # LLM1 to give 'dispose' or 'keep'
        prompt = new_prompt_LLM1(LLM1_system1, LLM1_system2, question, code_inf, id)
        input_ids = tokenizer(prompt, return_tensors="pt").to(device)
        output_ids = model.generate(
            **input_ids,
            max_new_tokens=1000,
            do_sample=True,
            temperature=0.8,
            #streamer=streamer,
        )
        # output_search is not for making new question,for decide 'dispose' or 'keep'
        output_search = tokenizer.decode(output_ids[0][len(input_ids[0]):], skip_special_tokens=True)
    if not code_inf:
        break

    # After search until decision be 'keep', make memory
    keep_id_list.append(id)
    # make memory using SUMLLM
    prompt = new_prompt_SUMLLM(SUMLLM_system1, SUMLLM_system2, original_question, code_inf, id)
    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(
    **input_ids,
    max_new_tokens=1000,
    do_sample=True,
    temperature=0.8,
    #streamer=streamer
    )
    # output mem is a set of code and relation for memory
    output_mem = tokenizer.decode(output_ids[0][len(input_ids[0]):])
    if output_mem.endswith("</s>"):
        output_mem = output_mem[:-4]
    code_mem_list.append(output_mem)
    #initialize id for don't append keep_id to disposed_id_list
    id = None

    # log mem
    print("------- PROMPT FOR MEMORY --------")
    print(prompt)
    print("------- MEMORY --------")
    print(output_mem)
    
    # log
    print("------- OUTPUT FROM SEARCH --------")
    print(output_search)
    print("------- code_mem_list --------")
    print(code_mem_list)
    print("------- disposed_id_list --------")
    print(disposed_id_list)
    print("------- keep_id_list --------")
    print(keep_id_list)

    
    new_log = ("\n\n------- OUTPUT FROM SEARCH --------\n" + output_search + 
           "------- code_mem_list --------\n" + '\n'.join(code_mem_list) + 
           "------- disposed_id_list --------\n" + '\n'.join(map(str, disposed_id_list)) + 
           "------- keep_id_list --------\n" + '\n'.join(map(str, keep_id_list)))
    save_log(new_log, log_file_path)
    
    
    # LLM2 to give 'generate' or 'more', and 'next question'
    prompt = new_prompt_LLM2(LLM2_system1, LLM2_system2, original_question, question, code_mem_list, keep_id_list)
    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(
        **input_ids,
        max_new_tokens=1000,
        do_sample=True,
        temperature=0.8,
        #streamer=streamer,
    )
    output = tokenizer.decode(output_ids[0][len(input_ids[0]):])
    
    print("------- OUTPUT FROM GENLLM --------")
    print(output)
    print("------- INPUT FOR GENLLM --------")
    print(prompt)
    print("------- code_mem_list --------")
    print(code_mem_list)
    print("------- disposed_id_list --------")
    print(disposed_id_list)

    new_log = ("\n\n------- OUTPUT1 --------\n" + output + 
           "------- INPUT1 --------\n" + prompt + 
           "------- code_mem_list1 --------\n" + '\n'.join(code_mem_list) + 
           "------- disposed_id_list1 --------\n" + '\n'.join(map(str, disposed_id_list)))

    save_log(new_log, log_file_path)

    
    if get_command(output) == 'error':
        print('error')
        break
if code_mem_list != []:
    prompt = new_prompt_GENELLM(GENELLM_system1,original_question,code_mem_list)
    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(
            **input_ids,
            max_new_tokens=1000,
            do_sample=True,
            temperature=0.8,
            #streamer=streamer
    )
    output = tokenizer.decode(output_ids[0][len(input_ids[0]):])
        
    print("------- ANSWER OF THE QUESTION --------")
    print(output)
    
    
    new_log = "\n\n------- ANSWER OF THE QUESTION --------\n" + output
    save_log(new_log, log_file_path)


    # save data for SEAT learning
    # when code_inf=None, answer may not be ganerated perfectlly.So don't save
    if code_inf:
        file_path = os.path.join("DS for SEAT", f"{database_name}.json")
        new_data = {
        "question": original_question,
        "disposed_id_list": disposed_id_list,
        "keep_id_list": keep_id_list
    }
        # check past data  
        if os.path.exists(file_path):
            with open(file_path, 'r', encoding='utf-8') as file:
                # load past data
                data = json.load(file)
                data.append(new_data)
        else:
            data = [new_data]
        
        # write
        with open(file_path, 'w', encoding='utf-8') as file:
            json.dump(data, file, ensure_ascii=False, indent=4)
        print(f"Data written to {file_path}")

if code_mem_list == []:
    print('与えられたコードからは回答できない質問です。質問を変更するか、もう少し詳しく教えてください。')

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- PROMPT FOR MEMORY --------
[INST]<<SYS>>
You are a skilled programmer proficient in explaining code. Your primary task is to identify and extract the crucial parts of code based on the pairings of user-submitted questions and corresponding code snippets. While the code often relates to the questions, not all parts may be necessary to answer these questions. Users are specifically interested in those portions of the code that are most relevant to their inquiries. Therefore, you must focus solely on extracting these pertinent sections without modifying or editorializing the code. If no relevant code sections are found, output "Nothing".

User input:

User question:
I want to know how to calcurate score in this game

Code from system:



Code:""""""screen of best scores"""
import pygame
from register import width, height, screen, scores

def get_scores(sorted_scores):
    """return the surfaces and rects to blit"""
    ctr = 0
    for key, value in sorted_scores:
        if ctr ==

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT FROM GENLLM --------
Thought: The provided code is insufficient for answering the user's question as it does not contain any calculation logic for determining the scores in the game. It only displays the existing scores on the screen.

Decision: more

Next question: Could you please share the part of your code that handles the calculation of scores in the game so I can help you understand how to display those scores on the screen?</s>
------- INPUT FOR GENLLM --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, which are necessary to complete the answer or to cover all aspects of the question.
- Prompt 'Decision:generate' if the code provided fully satisfies the requirements to answer the user's question

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- PROMPT FOR MEMORY --------
[INST]<<SYS>>
You are a skilled programmer proficient in explaining code. Your primary task is to identify and extract the crucial parts of code based on the pairings of user-submitted questions and corresponding code snippets. While the code often relates to the questions, not all parts may be necessary to answer these questions. Users are specifically interested in those portions of the code that are most relevant to their inquiries. Therefore, you must focus solely on extracting these pertinent sections without modifying or editorializing the code. If no relevant code sections are found, output "Nothing".

User input:

User question:
I want to know how to calcurate score in this game

Code from system:

Description of the functions used in the code below:
- play_sound: A function that plays a sound file with a given volume level using Pygame library.

Code:"""if LOST:
        screen.blit(end_surface, end_rect)
        screen.blit(scores_screen_surf

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT FROM GENLLM --------
Thought: The provided code is insufficient to answer the user's question because it only deals with displaying the scores and their corresponding keys on the screen, but it does not include any logic for calculating the scores.

Decision: more

Next question: Could you please share the part of your code that calculates the scores in the game so I can help you understand how to display those scores on the screen? This will help us answer your question about calculating the score in the game.</s>
------- INPUT FOR GENLLM --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, which are necessary to complete the answer or to cover all aspects of the question.
- Prompt 'Decision:generate' i

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- PROMPT FOR MEMORY --------
[INST]<<SYS>>
You are a skilled programmer proficient in explaining code. Your primary task is to identify and extract the crucial parts of code based on the pairings of user-submitted questions and corresponding code snippets. While the code often relates to the questions, not all parts may be necessary to answer these questions. Users are specifically interested in those portions of the code that are most relevant to their inquiries. Therefore, you must focus solely on extracting these pertinent sections without modifying or editorializing the code. If no relevant code sections are found, output "Nothing".

User input:

User question:
I want to know how to calcurate score in this game

Code from system:



Code:"""if random_heart == 1 and CHECKHEART and CHECKHEART2:
            enemies.append(Enemy(
                heart_surface.get_rect(topleft=(
                    width,
                    height - randint(200, 700)
                )
           

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT FROM GENLLM --------
Thought: The provided code snippet calculates and updates the game score, which directly addresses the user's question about calculating the score in the game.

Decision: generate

Next question: Could you please provide the context or the rest of the code where the scores are used, such as the game mechanics or the scoring system, so I can better understand the implementation and its application to the game?</s>
------- INPUT FOR GENLLM --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, which are necessary to complete the answer or to cover all aspects of the question.
- Prompt 'Decision:generate' if the code provided fully satisfies the requirements to answer the user's question 

### Test

In [13]:
LLM1_system1 = """
[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute in any way to answering the question and should be disregarded.

Your actionable commands:
- keep
- dispose

<</SYS>>

User input:

User question: """

LLM1_system2 = """<<SYS>>
Firstly, you need to share your opinion about the reason for your decision, then you need to share your decision. 
Use the format below:

Thought: (Explain whether the given code is necessary to answer the user's question, and how it relates, even if partially.)
Decision: (Choose from 'keep' or 'dispose'.)
<</SYS>>
[/INST] """

LLM2_system1 = """[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, which are necessary to complete the answer or to cover all aspects of the question.
- Prompt 'Decision:generate' if the code provided fully satisfies the requirements to answer the user's question comprehensively.

Your actionable commands:
- more
- generate


<</SYS>>

User input:

User question: """

LLM2_system2 = """<<SYS>>
Firstly,you need to share your opinion about that the provided code is sufficient or insufficient,then you need to share your decision. Additionally, you must formulate a follow-up question to collect the missing information necessary to complete the code. Use the format below:

Thought: (Explain why the provided code is sufficient or insufficient)
Decision: (Choose 'generate' or 'more')
Next question: (Formulate a question to help gather the missing or additional code required)
<</SYS>>
[/INST]"""



SUMLLM_system1 = """[INST]<<SYS>>
You are a skilled programmer proficient in explaining code. Your primary task is to identify and extract the crucial parts of code based on the pairings of user-submitted questions and corresponding code snippets. While the code often relates to the questions, not all parts may be necessary to answer these questions. Users are specifically interested in those portions of the code that are most relevant to their inquiries. Therefore, you must focus solely on extracting these pertinent sections without modifying or editorializing the code. If no relevant code sections are found, output "Nothing".

User input:

User question:"""


SUMLLM_system2 = """<<SYS>>
You are required to extract the significant sections from the provided code that are essential for answering the user's question. Highlight these sections and explain their relevance to the question without altering the original code format or content. Please follow the format below:

Code:(The critical parts of the code necessary to answer the user's question. Do not modifying or editorializing the code.If no sections of the code are critical, you should explicitly output "Nothing".)
Relation:(Tell me relation between the code and Users question. If no sections of the code are related, you should explicitly output "Nothing")
        
<</SYS>>
[/INST]"""


GENELLM_system1 = """[INST]<<SYS>>
You are an excellent programmer and are adept at explaining code. You will be provided with one or more pieces of code along with corresponding questions from systems. The provided code is selected from a larger codebase specifically to enable you to answer these questions. Your task is to answer the user’s questions as thoroughly and clearly as possible, demonstrating your understanding and ability to communicate key coding concepts.

<</SYS>>

User input:

User question:"""
original_question = """
I want to know how to calcurate score in this game
"""

output = f"""Decison: dispose. Next question:{original_question}"""

id = None
code_mem_list = []
disposed_id_list =[]
keep_id_list = []
num_logs = len(os.listdir(f"{database_name}_logs"))
log_file_path = os.path.join(f"{database_name}_logs", f"log_{num_logs}")
with open(log_file_path, "w") as f:  # to make a log file
    f.write("")


In [20]:
if get_command(output) != 'generate':
    output_search = "Decison: more."
    # Search until keep
    while get_command(output_search) != 'keep':
        # initialized id is None
        if id is not None:
            disposed_id_list.append(id)
        # output is made by LLM2
        question = get_new_question(output)
        code_inf, id = get_infs(question, disposed_id_list, keep_id_list)
        if not code_inf:
            break
        # LLM1 to give 'dispose' or 'keep'
        prompt = new_prompt_LLM1(LLM1_system1,LLM1_system2,question,code_inf)
        input_ids = tokenizer(prompt, return_tensors="pt").to(device)
        output_ids = model.generate(
            **input_ids,
            max_new_tokens=1000,
            do_sample=True,
            temperature=0.8,
            #streamer=streamer,
        )
        # output_search is not for making new question,for decide 'dispose' or 'keep'
        output_search = tokenizer.decode(output_ids[0][len(input_ids[0]):], skip_special_tokens=True)
            # After search until decision be 'keep', make memory
    keep_id_list.append(id)
    #initialize id for don't append keep_id to disposed_id_list
    id = None
    # make memory using SUMLLM
    prompt = new_prompt_SUMLLM(SUMLLM_system1, SUMLLM_system2, original_question, code_inf)
    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(
    **input_ids,
    max_new_tokens=1000,
    do_sample=True,
    temperature=0.8,
    #streamer=streamer
    )
    # output mem is a set of code and relation for memory
    output_mem = tokenizer.decode(output_ids[0][len(input_ids[0]):])
    code_mem_list.append(output_mem)

    # log mem
    print("------- PROMPT FOR MEMORY --------")
    print(prompt)
    print("------- MEMORY --------")
    print(output_mem)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- PROMPT FOR MEMORY --------
[INST]<<SYS>>
You are a skilled programmer proficient in explaining code. Your primary task is to identify and extract the crucial parts of code based on the pairings of user-submitted questions and corresponding code snippets. While the code often relates to the questions, not all parts may be necessary to answer these questions. Users are specifically interested in those portions of the code that are most relevant to their inquiries. Therefore, you must focus solely on extracting these pertinent sections without modifying or editorializing the code. If no relevant code sections are found, output "Nothing".

User input:

User question:
I want to know how to calcurate score in this game

Code from system:"""if random_heart == 1 and CHECKHEART and CHECKHEART2:
            enemies.append(Enemy(
                heart_surface.get_rect(topleft=(
                    width,
                    height - randint(200, 700)
                )
                ),
 

In [21]:
# LLM2 to give 'generate' or 'more', and 'next question'
prompt = new_prompt_LLM2(LLM2_system1, LLM2_system2, original_question, question, code_mem_list)
print(prompt)

[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, which are necessary to complete the answer or to cover all aspects of the question.
- Prompt 'Decision:generate' if the code provided fully satisfies the requirements to answer the user's question comprehensively.

Your actionable commands:
- more
- generate


<</SYS>>

User input:

User question: 
I want to know how to calcurate score in this game

Last search question:Could you please provide the part of the code where the scores are calculated and updated before they are displayed on the high score screen?

Pieces of code from system:
code_1:

Code:
```python
def get_scores(sorted_scores):
    # code for getting rects and blitting score surfaces
    for key, value in sorte

In [22]:
prompt = """[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, which are necessary to complete the answer or to cover all aspects of the question.
- Prompt 'Decision:generate' if the code provided fully satisfies the requirements to answer the user's question comprehensively.

Your actionable commands:
- more
- generate


<</SYS>>

User input:

User question: 
I want to know how to calcurate score in this game

Last search question:Could you please provide the part of the code where the scores are calculated and updated before they are displayed on the high score screen?

Pieces of code from system:

The address of code below:{
Sonic-Game/
├─ high_scores_screen.py
}

Folder descriptions:{
Sonic-Game:Folder containing all the code needed to implement Sonic-Game
high_scores_screen.py:This code manages a Pygame window to display the game's scoreboard, sorts and shows the top 5 scores, and allows the user to close the window by clicking an "EXIT" button.
}

Code:
```python
def get_scores(sorted_scores):
    # code for getting rects and blitting score surfaces
    for key, value in sorted_scores:
        # code for setting colors based on position
        score_surface = big_font.render(f"{value}", True, (0,0,0))
        score_rect=score_surface.get_rect(topright=(width-15,(height/6.5)*ctr+(height / 4.5)))
        # code for blitting score surface
        screen.blit(score_surface, score_rect)
```
Relation:
The user asked for calculating the score in the game. While the code does not directly show calculation of scores, it does display the scores on the screen. The `get_scores` function is responsible for blitting the scores, and the provided code snippet contains the relevant parts of this function.</s>


The address of code below:{
Sonic-Game/
├─ main.py
}

Folder descriptions:{
Sonic-Game:Folder containing all the code needed to implement Sonic-Game
high_scores_screen.py:Code for Pygame-based game that dynamically handles player inputs and events, managing jumping, collisions, enemy spawns, score updates, health management, and game state transitions.
}

Code:
```python
# ... (code before the relevant sections)

# ... (code not related to the user's question)

#############
#LES SCORES#
#############
# si on a pas perdu on affiche le score actuel, sinon le last score
if not LOST:
    SCORE = int(round((time() - score_timer) * 10, 0))
    # ... (code for rendering score)

if BESTSCORE < SCORE:
    BESTSCORE = SCORE
    # ... (code for playing sound and updating best score on screen)

# ... (code after the relevant sections)

# ... (code not related to the user's question)
```

Relation:
The user asked for information on how to calculate the score in this game. The provided code includes a section related to scoring. Specifically, it calculates the current score based on the elapsed time since the start of the game and displays it on the screen if the player has not lost. It also checks if the current score is higher than the best score and updates the best score accordingly. These sections of the code are crucial for understanding how the score is calculated and displayed in the game.</s>


<<SYS>>
Firstly,you need to share your opinion about that the provided code is sufficient or insufficient,then you need to share your decision. Additionally, you must formulate a follow-up question to collect the missing information necessary to complete the code. Use the format below:

Thought: (Explain why the provided code is sufficient or insufficient)
Decision: (Choose 'generate' or 'more')
Next question: (Formulate a question to help gather the missing or additional code required)
<</SYS>>
[/INST]"""

In [17]:
prompt = """[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, which are necessary to complete the answer or to cover all aspects of the question.
- Prompt 'Decision:generate' if the code provided fully satisfies the requirements to answer the user's question comprehensively.

Your actionable commands:
- more
- generate


<</SYS>>

User input:

User question: 
I want to know how to calcurate score in this game

Last search question:I want to know how to calcurate score in this game

Pieces of code from system:

The address of code below:{
Sonic-Game/
├─ high_scores_screen.py
}

Folder descriptions:{
Sonic-Game:Code for Pygame-based game that dynamically handles player inputs and events, managing jumping, collisions, enemy spawns, score updates, health management, and game state transitions.
high_scores_screen.py:This code manages a Pygame window to display the game's scoreboard, sorts and shows the top 5 scores, and allows the user to close the window by clicking an "EXIT" button.
}

Code:
```python
def get_scores(sorted_scores):
    # code for getting rects and blitting score surfaces
    for key, value in sorted_scores:
        # code for setting colors based on position
        score_surface = big_font.render(f"{value}", True, (0,0,0))
        score_rect=score_surface.get_rect(topright=(width-15,(height/6.5)*ctr+(height / 4.5)))
        # code for blitting score surface
        screen.blit(score_surface, score_rect)
```
Relation:
The user asked for calculating the score in the game. While the code does not directly show calculation of scores, it does display the scores on the screen. The `get_scores` function is responsible for blitting the scores, and the provided code snippet contains the relevant parts of this function.</s>


<<SYS>>
Firstly,you need to share your opinion about that the provided code is sufficient or insufficient,then you need to share your decision. Additionally, you must formulate a follow-up question to collect the missing information necessary to complete the code. Use the format below:

Thought: (Explain why the provided code is sufficient or insufficient)
Decision: (Choose 'generate' or 'more')
Next question: (Formulate a question to help gather the missing or additional code required)
<</SYS>>
[/INST]"""

In [23]:
input_ids = tokenizer(prompt, return_tensors="pt").to(device)
output_ids = model.generate(
    **input_ids,
    max_new_tokens=1000,
    do_sample=True,
    temperature=0.8,
    #streamer=streamer,
)
output = tokenizer.decode(output_ids[0][len(input_ids[0]):])

print("------- OUTPUT FROM GENLLM --------")
print(output)
print("------- INPUT FOR GENLLM --------")
print(prompt)
print("------- code_mem_list --------")
print(code_mem_list)
print("------- disposed_id_list --------")
print(disposed_id_list)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT FROM GENLLM --------
Thought: The provided code is comprehensive and sufficient enough to answer the user's question, as it directly calculates and updates the score in the game.

Decision: generate

Next question: Could you please share the part of the code where the score variable is initialized and updated throughout the game's lifecycle? It would help in gaining a deeper understanding of the game's scoring mechanism.</s>
------- INPUT FOR GENLLM --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, which are necessary to complete the answer or to cover all aspects of the question.
- Prompt 'Decision:generate' if the code provided fully satisfies the requirements to answer the user's question comprehen

In [24]:
prompt = new_prompt_GENELLM(GENELLM_system1,original_question,code_mem_list)
print(prompt)

[INST]<<SYS>>
You are an excellent programmer and are adept at explaining code. You will be provided with one or more pieces of code along with corresponding questions from systems. The provided code is selected from a larger codebase specifically to enable you to answer these questions. Your task is to answer the user’s questions as thoroughly and clearly as possible, demonstrating your understanding and ability to communicate key coding concepts.

<</SYS>>

User input:

User question:
I want to know how to calcurate score in this game


Pieces of code from system:
code_1:

Code:
```python
def get_scores(sorted_scores):
    # code for getting rects and blitting score surfaces
    for key, value in sorted_scores:
        # code for setting colors based on position
        score_surface = big_font.render(f"{value}", True, (0,0,0))
        score_rect=score_surface.get_rect(topright=(width-15,(height/6.5)*ctr+(height / 4.5)))
        # code for blitting score surface
        screen.blit(s

In [25]:
prompt = """[INST]<<SYS>>
You are an excellent programmer and are adept at explaining code. You will be provided with one or more pieces of code along with corresponding questions from systems. The provided code is selected from a larger codebase specifically to enable you to answer these questions. Your task is to answer the user’s questions as thoroughly and clearly as possible, demonstrating your understanding and ability to communicate key coding concepts.

<</SYS>>

User input:

User question:
I want to know how to calcurate score in this game


Pieces of code from system:

The address of code below:{
Sonic-Game/
├─ high_scores_screen.py
}

Folder descriptions:{
Sonic-Game:Folder containing all the code needed to implement Sonic-Game
high_scores_screen.py:This code manages a Pygame window to display the game's scoreboard, sorts and shows the top 5 scores, and allows the user to close the window by clicking an "EXIT" button.
}

Code:
```python
def get_scores(sorted_scores):
    # code for getting rects and blitting score surfaces
    for key, value in sorted_scores:
        # code for setting colors based on position
        score_surface = big_font.render(f"{value}", True, (0,0,0))
        score_rect=score_surface.get_rect(topright=(width-15,(height/6.5)*ctr+(height / 4.5)))
        # code for blitting score surface
        screen.blit(score_surface, score_rect)
```
Relation:
The user asked for calculating the score in the game. While the code does not directly show calculation of scores, it does display the scores on the screen. The `get_scores` function is responsible for blitting the scores, and the provided code snippet contains the relevant parts of this function.


The address of code below:{
Sonic-Game/
├─ main.py
}

Folder descriptions:{
Sonic-Game:Folder containing all the code needed to implement Sonic-Game
high_scores_screen.py:Code for Pygame-based game that dynamically handles player inputs and events, managing jumping, collisions, enemy spawns, score updates, health management, and game state transitions.
}

Code:
```python
# ... (code before the relevant sections)

# ... (code not related to the user's question)

#############
#LES SCORES#
#############
# si on a pas perdu on affiche le score actuel, sinon le last score
if not LOST:
    SCORE = int(round((time() - score_timer) * 10, 0))
    # ... (code for rendering score)

if BESTSCORE < SCORE:
    BESTSCORE = SCORE
    # ... (code for playing sound and updating best score on screen)

# ... (code after the relevant sections)

# ... (code not related to the user's question)
```

Relation:
The user asked for information on how to calculate the score in this game. The provided code includes a section related to scoring. Specifically, it calculates the current score based on the elapsed time since the start of the game and displays it on the screen if the player has not lost. It also checks if the current score is higher than the best score and updates the best score accordingly. These sections of the code are crucial for understanding how the score is calculated and displayed in the game.


[/INST]"""

In [27]:
input_ids = tokenizer(prompt, return_tensors="pt").to(device)
output_ids = model.generate(
        **input_ids,
        max_new_tokens=1000,
        do_sample=True,
        temperature=0.8,
        #streamer=streamer
)
output = tokenizer.decode(output_ids[0][len(input_ids[0]):])

print("------- PROMPT --------")
print(prompt)

print("------- ANSWER OF THE QUESTION --------")
print(output)
    

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- PROMPT --------
[INST]<<SYS>>
You are an excellent programmer and are adept at explaining code. You will be provided with one or more pieces of code along with corresponding questions from systems. The provided code is selected from a larger codebase specifically to enable you to answer these questions. Your task is to answer the user’s questions as thoroughly and clearly as possible, demonstrating your understanding and ability to communicate key coding concepts.

<</SYS>>

User input:

User question:
I want to know how to calcurate score in this game


Pieces of code from system:

The address of code below:{
Sonic-Game/
├─ high_scores_screen.py
}

Folder descriptions:{
Sonic-Game:Folder containing all the code needed to implement Sonic-Game
high_scores_screen.py:This code manages a Pygame window to display the game's scoreboard, sorts and shows the top 5 scores, and allows the user to close the window by clicking an "EXIT" button.
}

Code:
```python
def get_scores(sorted_scor

## python_game chat

In [1]:
database_name = "python_game"

In [2]:
# model load
import torch
from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer

device = "cuda" if torch.cuda.is_available else "cpu"
emb_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1").to(device)

"""
# Model load for japanese
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer

assert transformers.__version__ >= "4.34.1"

model = AutoModelForCausalLM.from_pretrained("cyberagent/calm2-7b-chat", device_map="auto", torch_dtype="auto")
tokenizer = AutoTokenizer.from_pretrained("cyberagent/calm2-7b-chat")
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
"""

model_id = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_id, 
    padding_side="left",
    add_eos_token=False,
    add_bos_token=False,)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

model = AutoModelForCausalLM.from_pretrained(model_id).to(device)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [3]:
import os
import json

if not os.path.exists("logs"):
    os.makedirs("logs")

def save_log(new_log, file_path):
    with open(file_path) as f: # note that file must be already created
        log = f.read()

    log += new_log

    with open(file_path, "w") as f:
        f.write(log)

def get_infs(question, disposed_id_list, keep_id_list):
    # 問題文に基づいて検索する
    q_embs = torch.tensor(emb_model.encode(question)).to(device)
    inf_embs = torch.load(f"db_embs/{database_name}.pt").to(device)
    
    with open(f"chunks/{database_name}.json") as json_file:
        chunks = json.load(json_file)
    
    relevance = torch.matmul(q_embs, inf_embs.T) 
    
    # Top-3 のIDを取得
    values, inf_ids = torch.topk(relevance, k=3, dim=0)  # dim=1 で行ごとのTop-Kを取得
    
    infs = []
    selected_id = None
    for id in inf_ids:
        if id.item() not in disposed_id_list:
            if id.item() not in keep_id_list:
                selected_id = id.item()
                infs.append(chunks[selected_id])
                break  # 最初に見つかった適切なIDで終了

    if selected_id == None:
        values, inf_ids = torch.topk(relevance, k=relevance.shape[0], dim=0)
        for id in inf_ids:
            if id.item() not in disposed_id_list:
                if id.item() not in keep_id_list:
                    selected_id = id.item()
                    infs.append(chunks[selected_id])
                    break  # 最初に見つかった適切なIDで終了
            
    return infs, selected_id


def get_prompt(q, inf_list):
    prompt = q + "\nCode:"
    for inf in inf_list:
        prompt += "\n```" + inf + "```"
        
    return prompt

def combine_codes(code_mem_list):
    combined_code = ""
    for i, code in enumerate(code_mem_list, start=1):
        combined_code += f"code_{i}:\n\n{code}\n\n"
    return combined_code
    
    
def new_prompt_LLM2(LLM2_system1,LLM2_system2,original_question,question,code_mem_list):
    if code_mem_list == []:
        combined_code = 'There is no code available to answer'

    else:
        combined_code = combine_codes(code_mem_list)
    return  f"{LLM2_system1}{original_question}\nLast search question:{question}\n\nPieces of code:\n{combined_code}\n{LLM2_system2}"

def new_prompt_LLM1(LLM1_system1,LLM1_system2,question,code_inf):
    return  f"{LLM1_system1}{question}\n\nCode:\"\"\"{code_inf[0]}\"\"\"\n\n{LLM1_system2}"


def new_prompt_SUMLLM(SUMLLM_system1,SUMLLM_system2,original_question,code_inf):
    return  f"{SUMLLM_system1}{original_question}\nCode:\"\"\"{code_inf[0]}\"\"\"{SUMLLM_system2}"

def new_prompt_GENELLM(GENELLM_system1,original_question,code_mem_list):
    combined_code = combine_codes(code_mem_list)
    return  f"{GENELLM_system1}{original_question}\n\nPieces of code:\n{combined_code}[/INST]"
    
def get_new_question(output):
    # 'Next question:' または 'Next question :' のインデックスを取得
    next_question_index = output.find('Next question:')
    if next_question_index != -1:
        # 'Next question:'の後の空白をスキップ
        question_start_index = next_question_index + len('Next question:')
        while output[question_start_index] == ' ':
            question_start_index += 1
        
        # 質問文を取得し、不要なタグを削除
        question_end_index = output.find('</s>', question_start_index)
        if question_end_index == -1:
            question_end_index = None  # タグがない場合は文字列の最後までが質問
        question = output[question_start_index:question_end_index].strip()
    else:
        question = "Next question not found in input"
    
    return question

    


def get_command(output):
    # 'Decision:' または 'Decison:' (typoを許容) の後の1単語を取得
    decision_key = 'Decision:'
    decision_index = output.find(decision_key)
    
    # Typo 'Decison:' が含まれているか確認
    if decision_index == -1:
        decision_key = 'Decison:'
        decision_index = output.find(decision_key)
    
    # 'Decision:' または 'Decison:' の後の単語を取得
    if decision_index != -1:
        decision_index += len(decision_key)
        words = output[decision_index:].split()
        if words:
            # コマンドの取得と不要な文字の削除
            command = words[0].strip('.<>/s')
        else:
            command = 'error'
    else:
        # 'Decision:' または 'Decison:' が見つからない場合、
        # 最初に出現する 'keep', 'generate', 'dispose' のいずれかの単語をコマンドとする
        for word in output.split():
            # コマンドの抽出と不要な文字の削除
            clean_word = word.strip('.<>/s')
            if clean_word in ['keep', 'generate', 'dispose']:
                command = clean_word
                break
        else:
            command = 'error'

    return command




In [7]:
LLM1_system1 = """
[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute in any way to answering the question and should be disregarded.

Your actionable commands:
- keep
- dispose

<</SYS>>

User input:

User question: """

LLM1_system2 = """<<SYS>>
Firstly, you need to share your opinion about the reason for your decision, then you need to share your decision. 
Use the format below:

Thought: (Explain whether the given code is necessary to answer the user's question, and how it relates, even if partially.)
Decision: (Choose from 'keep' or 'dispose'.)
<</SYS>>
[/INST] """

LLM2_system1 = """[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, which are necessary to complete the answer or to cover all aspects of the question.
- Prompt 'Decision:generate' if the code provided fully satisfies the requirements to answer the user's question comprehensively.

Your actionable commands:
- more
- generate


<</SYS>>

User input:

User question: """

LLM2_system2 = """<<SYS>>
Firstly,you need to share your opinion about that the provided code is sufficient or insufficient,then you need to share your decision. Additionally, you must formulate a follow-up question to collect the missing information necessary to complete the code. Use the format below:

Thought: (Explain why the provided code is sufficient or insufficient)
Decision: (Choose 'generate' or 'more')
Next question: (Formulate a question to help gather the missing or additional code required)
<</SYS>>
[/INST]"""



SUMLLM_system1 = """[INST]<<SYS>>
You are a skilled programmer and adept at explaining code. Your task is to create summaries of code based on pairs of code and user-submitted questions. The code relates to the questions asked, but not all parts of the code may be necessary for the summary. Users are interested in understanding the parts of the code that are most relevant to their questions. Therefore, you should extract and highlight the key portions of the code that are pertinent to the users' questions.
<</SYS>>

User input:

User question:"""


SUMLLM_system2 = """<<SYS>>
You are required to extract the important sections from the code provided. Additionally, you must explain why these extracted sections are crucial for answering the user's question. Please follow the format below:

Code:(The critical parts of the code necessary to answer the user's question.)
Relation:(Tell me relation between the code and Users question)
        
<</SYS>>
[/INST]"""


GENELLM_system1 = """[INST]<<SYS>>
You are an excellent programmer and are adept at explaining code. You will be provided with one or more pieces of code along with corresponding questions from users. The provided code is selected from a larger codebase specifically to enable you to answer these questions. Your task is to answer the user’s questions as thoroughly and clearly as possible, demonstrating your understanding and ability to communicate key coding concepts.

<</SYS>>

User input:

User question:"""





original_question = """
What could be the issues with the arrow keys not functioning properly for movement in this game, while the space key works fine?
"""


output = f"""Decison: dispose. Next question:{original_question}"""

code_mem_list = []
disposed_id_list =[]
keep_id_list = []
num_logs = len(os.listdir("logs"))
log_file_path = "logs/log" + str(num_logs)
with open(log_file_path, "w") as f:  # to make a log file
    f.write("")

while get_command(output) != 'generate':

    # LLM1 to give 'dispose' or 'keep'
    question = get_new_question(output)
    code_inf, id = get_infs(question, disposed_id_list, keep_id_list)
    prompt = new_prompt_LLM1(LLM1_system1,LLM1_system2,question,code_inf)
    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(
        **input_ids,
        max_new_tokens=1000,
        do_sample=True,
        temperature=0.8,
        #streamer=streamer,
    )
    output = tokenizer.decode(output_ids[0][len(input_ids[0]):])

    command = get_command(output)
    if command == 'dispose':
        disposed_id_list.append(id)

    # log
    print("------- OUTPUT1 --------")
    print(output)
    print("------- INPUT1 --------")
    print(prompt)
    print("------- code_mem_list1 --------")
    print(code_mem_list)
    print("------- disposed_id_list1 --------")
    print(disposed_id_list)

    new_log = ("\n\n------- OUTPUT1 --------\n" + output + 
           "------- INPUT1 --------\n" + prompt + 
           "------- code_mem_list1 --------\n" + '\n'.join(code_mem_list) + 
           "------- disposed_id_list1 --------\n" + '\n'.join(map(str, disposed_id_list)))
    save_log(new_log, log_file_path)
    
    if command == 'keep': # SUMLLM generates 'Relation'
        keep_id_list.append(id)
        prompt = new_prompt_SUMLLM(SUMLLM_system1, SUMLLM_system2, original_question, code_inf)
        input_ids = tokenizer(prompt, return_tensors="pt").to(device)
        output_ids = model.generate(
        **input_ids,
        max_new_tokens=1000,
        do_sample=True,
        temperature=0.8,
        #streamer=streamer
        )
        output = tokenizer.decode(output_ids[0][len(input_ids[0]):])
        code_mem_list.append(output)
    
    if command == 'error':
        print('error')
        break


    # LLM2 to give 'generate' or 'more', and 'next question'
    prompt = new_prompt_LLM2(LLM2_system1, LLM2_system2, original_question, question, code_mem_list)
    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(
        **input_ids,
        max_new_tokens=1000,
        do_sample=True,
        temperature=0.8,
        #streamer=streamer,
    )
    output = tokenizer.decode(output_ids[0][len(input_ids[0]):])
    
    print("------- OUTPUT2 --------")
    print(output)
    print("------- INPUT2 --------")
    print(prompt)
    print("------- code_mem_list2 --------")
    print(code_mem_list)
    print("------- disposed_id_list2 --------")
    print(disposed_id_list)

    new_log = ("\n\n------- OUTPUT1 --------\n" + output + 
           "------- INPUT1 --------\n" + prompt + 
           "------- code_mem_list1 --------\n" + '\n'.join(code_mem_list) + 
           "------- disposed_id_list1 --------\n" + '\n'.join(map(str, disposed_id_list)))

    save_log(new_log, log_file_path)


prompt = new_prompt_GENELLM(GENELLM_system1,original_question,code_mem_list)
input_ids = tokenizer(prompt, return_tensors="pt").to(device)
output_ids = model.generate(
        **input_ids,
        max_new_tokens=1000,
        do_sample=True,
        temperature=0.8,
        #streamer=streamer
)
output = tokenizer.decode(output_ids[0][len(input_ids[0]):])
    
print("------- ANSWER OF THE QUESTION --------")
print(output)

new_log = "\n\n------- ANSWER OF THE QUESTION --------\n" + output
save_log(new_log, log_file_path)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The provided code is not related to the user's question as it only handles the Space key input and does not include any logic related to arrow keys.
  Decision: dispose.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute in any way to answering the question and should be disregarded.

Your actionable commands:
- keep
- dispose

<</SYS>>

User input:

User question: What could be th

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: Without the code available, it is impossible to determine if the issue with the arrow keys not functioning properly in the game while the space key works fine is caused by the code or some external factor. Thus, the code provided is insufficient.

Decision: more

Next question: Could you please share the relevant code for handling user input and movement in the game so we can better understand the issue and potential causes?</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, which are necessary to complete the answer or to cover all aspects of the question.
- Prompt 'Decision:generate' if the code provided fully satisfies the requirements to answer the user's quest

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The given code is related to the user's question as it handles user input for movement in the game. The code includes the InputManager class which is responsible for getting user input, and it is used in the Move class in the update method. This input is then used to move the player character. Therefore, the code has relevance and may contain useful elements or logic that pertains to the user's question about handling user input and movement in the game.

Decision: keep.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the quest

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: While the provided code snippet is related to handling user input for movement, it does not provide a complete understanding of the game's user input system and movement mechanics. The code only plays a sound effect when an arrow key is pressed, and the actual player movement implementation is not present in this code snippet. It is essential to examine the complete user input handling mechanism and movement logic to diagnose potential issues with the arrow keys.

Decision: more
Next question: Could you kindly share the complete code for handling user input and player movement in the game? This will help us investigate potential causes for the arrow keys not functioning correctly.</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive e

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The given code is related to the user's question as it pertains to handling user input and player movement in the game. This code may contain Logic and elements that could help in understanding the issue with the arrow keys not functioning correctly in the game.

Decision: keep.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute in any way to answering the question and should be di

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code snippets display relevant parts involved in handling arrow keys for player movement in the game. However, they seem to be incomplete since the code that processes the user input in the InputManager class is missing. Without this, it's challenging to identify the root cause of the arrow keys not functioning correctly.

Decision: more
Next question: Could you kindly provide the full code for the InputManager class handling user input, particularly the sections related to processing arrow key inputs? This will help us gain a better understanding of the issue at hand and potentially identify any potential bugs or interactions with other parts of the code causing the arrow keys to malfunction.</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is relat

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The given code is related to the user's question as it is a part of an InputManager class handling user inputs. The code specifically focuses on processing arrow key inputs.
  Decision: keep.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute in any way to answering the question and should be disregarded.

Your actionable commands:
- keep
- dispose

<</SYS>>

User input:

User ques

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code snippets are related to the user's question as they deal with the handling of arrow key inputs in the game. However, they do not form a complete solution to the user's problem as they do not show the implementation of the InputManager.isPush() method, which is called in the other code snippets to check if arrow keys are pressed. Understanding the InputManager's implementation is crucial to determine if there's a bug causing the arrow keys to malfunction.

Decision: more

Next question: Could you please share the implementation of the InputManager.isPush() method to help us better understand the potential causes of the arrow keys not functioning properly for movement in the game?</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The provided code does not include an implementation of the `InputManager.isPush()` method, which is the method the user is specifically asking for. The code includes several methods related to player movement, but it does not directly address the issue of arrow keys not functioning properly for movement.

Decision: dispose.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute in any

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code snippets provide a sufficient understanding of how the InputManager handles arrow key inputs. They define functions to check if a specific key, including arrow keys, is being pressed and return the corresponding direction. However, there is a missing part where the code is actually processing the arrow keys' input in the game. We cannot determine if the issue lies within the InputManager class or other parts of the code without examining the complete implementation of the arrow key input handling in the game.

Decision: more

Next question: Could you share the part of the code that processes and acts upon arrow key inputs in the game, specifically where the player's movement is updated based on these inputs? This will help provide a more comprehensive understanding of the potential causes of the arrow keys not functioning properly.</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The given code does not process or act upon arrow key inputs directly. Instead, it defines various methods for character movement, collision detection, and animation. Arrow keys' movements are assumed to be mapped to certain integer directions, which the code uses to facilitate movement. However, it does not explicitly handle arrow keys' events or update the player's position based on these inputs. 

Decision: dispose.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provide

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code snippets give a good understanding of how the game handles arrow key inputs and updates the player's movement. However, there might be missing parts in other sections of the code that could affect the arrow keys' functionality. It would be helpful to examine the game's initialization process, event handling, and input processing to ensure a complete analysis.

Decision: more

Next question: Could you please share the code sections responsible for initializing the InputManager, processing game events, and checking the player's readiness for movement? This will provide a more comprehensive understanding of the potential causes for the arrow keys not functioning properly in the game.</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but n

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The given code is related to the user's question since it initializes the `EventManager` which might be responsible for handling game events, including player input. However, it doesn't directly cover the "InputManager" mentioned in the user's question, but since "InputManager" is assumed to be related to "EventManager" in the context of the provided code, it can still provide useful information.

Decision: keep.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provided is c

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code offers a good starting point for understanding the player movement and input handling in the game. However, it is not comprehensive enough to cover all aspects of the user's question, as it does not include the sections responsible for initializing the InputManager, processing game events, and checking the player's readiness for movement. These elements are essential in determining the causes of the arrow keys not functioning properly.

Decision: more

Next question: Could you please share the following code sections:
1. The code that initializes the InputManager.
2. The code that processes game events.
3. The code that checks the player's readiness for movement.

These sections will provide a more comprehensive understanding of the potential causes for the arrow keys not functioning properly in the game.</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you d

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The given code snippet is partially related to the user's question. The code initializes and manages the game's events, including the enemy and treasure maps. It also sets up the player's readiness for movement. The code that checks the player's readiness for movement (`ready_move_enemys()`) is particularly relevant to the user's question as it may potentially influence the functionality of the arrow keys. However, it does not directly handle the processing of the arrow key events.

   Decision: keep.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or l

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code is related but not comprehensive enough to answer the user's question since it covers the handling of arrow keys input for player and enemy movement but lacks essential components such as the initialization of the InputManager and the processing of game events where the arrow keys' input may be checked and acted upon. To have a complete understanding of the potential causes for the arrow keys not functioning properly, it's crucial to examine the code related to these aspects as well.

Decision: more

Next question: Could you please share the following code sections:
1. The code that initializes the InputManager.
2. The code that processes game events.
3. The code that checks the player's and enemy's readiness for movement.

These sections will provide a more comprehensive understanding of the potential causes for the arrow keys not functioning properly in the game.</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commande

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
1. Thought: The provided code does not contain the initialization of the InputManager and the code that checks the player's and enemy's readiness for movement. Instead, it focuses on managing the map, game system, game info, event manager, and player.

Decision: dispose.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute in any way to answering the question and should be disregarded.

Your 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code covers the initialization of the InputManager, the code that processes game events, and the code that checks the player's and enemy's readiness for movement as requested. However, there is no code snippet related to how the arrow keys are handled for enemy movement or any potential interactions between the player and enemy movement functions.

Decision: more

Next question: Could you please provide the code related to enemy arrow key input handling and any interactions between the player and enemy movement functions? This information will help in understanding the potential causes for the arrow keys not functioning properly for enemy movement.</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The given code is not directly related to enemy arrow key input handling and any interactions between the player and enemy movement functions as requested by the user. However, some parts of the code, such as `__can_move` function, may indirectly relate to the user's question since it checks for collisions between enemies and the player. This collision detection might affect player and enemy interactions and their movements.

Decision: keep.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispos

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code snippets cover relevant classes, methods, and functions for handling arrow keys input and player and enemy movement in the game. However, the code does not include the complete enemy movement handling and potential interactions between player and enemy movement functions. Additional code bits, such as enemy AI, could be crucial for understanding if the arrow keys not functioning properly for enemy movement is caused by a problem with those functions.

Decision: more
Next question: Could you please provide the code related to enemy AI and any interactions between the player and enemy movement functions? This information will help in understanding the potential causes for the arrow keys not functioning properly for enemy movement.</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Pr

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The given code snippet does not contain any enemy AI or player-enemy interaction related to movement functions. The code only imports required modules and defines classes.

 Decision: dispose.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute in any way to answering the question and should be disregarded.

Your actionable commands:
- keep
- dispose

<</SYS>>

User input:

User que

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code snippets give insight into various aspects of the game, such as player movement, enemy movement, InputManager, and event handling. The code related to enemy movement is particularly important given the user's question, as they're experiencing issues with arrow keys not functioning properly for enemy movement while the space key works fine. However, the code does not provide the complete picture and might be missing crucial parts like the enemy AI or any interactions between the player and enemy movement functions.

Decision: more

Next question: Could you please share the code related to enemy AI and any interactions between the player and enemy movement functions? This information will help in understanding the potential causes for the arrow keys not functioning properly for enemy movement.</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The provided code implements the functionality for using items, such as potions and bombs, in the game. There is no direct evidence of enemy AI or player-enemy interaction in the code. However, it's not completely unrelated as understanding the flow of the game, including item usage and interactions, might help in pinpointing the issue with the arrow keys not functioning properly for enemy movement.

Decision: keep.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related, even partially, to the user's question. This indicates that the code, while possibly incomplete or not entirely covering all aspects, still has relevance and may contain useful elements or logic that pertains to the question.
- Prompt 'Decision:dispose.' if the code provided i

KeyboardInterrupt: 

## Custom_model Chat

In [1]:
database_name = "custom_model"

In [8]:
# model load
import torch
from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer

device = "cuda" if torch.cuda.is_available else "cpu"
emb_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1").to(device)

"""
# Model load for japanese
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer

assert transformers.__version__ >= "4.34.1"

model = AutoModelForCausalLM.from_pretrained("cyberagent/calm2-7b-chat", device_map="auto", torch_dtype="auto")
tokenizer = AutoTokenizer.from_pretrained("cyberagent/calm2-7b-chat")
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
"""

model_id = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_id, 
    padding_side="left",
    add_eos_token=False,
    add_bos_token=False,)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

model = AutoModelForCausalLM.from_pretrained(model_id).to(device)

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [1]:
import os
import json

def get_infs(question, disposed_id_list):
    # 問題文に基づいて検索する
    q_embs = torch.tensor(emb_model.encode(question)).to(device)
    inf_embs = torch.load(f"db_embs/{database_name}.pt").to(device)
    
    with open(f"chunks/{database_name}.json") as json_file:
        chunks = json.load(json_file)
    
    relevance = torch.matmul(q_embs, inf_embs.T) 
    
    # Top-3 のIDを取得
    values, inf_ids = torch.topk(relevance, k=3, dim=0)  # dim=1 で行ごとのTop-Kを取得
    
    infs = []
    selected_id = None
    for id in inf_ids:
        if id.item() not in disposed_id_list:
            selected_id = id.item()
            infs.append(chunks[selected_id])
            break  # 最初に見つかった適切なIDで終了

    if selected_id == None:
        values, inf_ids = torch.topk(relevance, k=relevance.shape[0], dim=0)
        for id in inf_ids:
            if id.item() not in disposed_id_list:
                selected_id = id.item()
                infs.append(chunks[selected_id])
                break
            
    return infs, selected_id


def get_prompt(q, inf_list):
    prompt = q + "\nCode:"
    for inf in inf_list:
        prompt += "\n```" + inf + "```"
        
    return prompt

def combine_codes(code_mem_list):
    combined_code = ""
    for i, code in enumerate(code_mem_list, start=1):
        combined_code += f"code_{i}:\n\n{code}\n\n"
    return combined_code
    
    
def new_prompt_LLM2(LLM2_system1,LLM2_system2,original_question,question,code_mem_list):
    if code_mem_list == []:
        combined_code = 'There is no code available to answer'

    else:
        combined_code = combine_codes(code_mem_list)
    return  f"{LLM2_system1}{original_question}\nLast search question:{question}\nCode:\"\"\"{combined_code}\"\"\"{LLM2_system2}"

def new_prompt_LLM1(LLM1_system1,LLM1_system2,question,code_inf):
    return  f"{LLM1_system1}{question}\nCode:\"\"\"{code_inf}\"\"\"{LLM1_system2}"


def new_prompt_SUMLLM(SUMLLM_system1,SUMLLM_system2,original_question,code_inf):
    return  f"{SUMLLM_system1}{original_question}\nCode:\"\"\"{code_inf}\"\"\"{SUMLLM_system2}"

def new_prompt_GENELLM(GENELLM_system1,original_question,code_mem_list):
    combined_code = combine_codes(code_mem_list)
    return  f"{GENELLM_system1}{original_question}\n:Pieces of code\"\"\"{combined_code}\"\"\""
    
def get_new_question(output):
    # 'Next question:' または 'Next question :' のインデックスを取得
    next_question_index = output.find('Next question:')
    if next_question_index != -1:
        # 'Next question:'の後の空白をスキップ
        question_start_index = next_question_index + len('Next question:')
        while output[question_start_index] == ' ':
            question_start_index += 1
        
        # 質問文を取得し、不要なタグを削除
        question_end_index = output.find('</s>', question_start_index)
        if question_end_index == -1:
            question_end_index = None  # タグがない場合は文字列の最後までが質問
        question = output[question_start_index:question_end_index].strip()
    else:
        question = "Next question not found in input"
    
    return question

    


def get_command(output):
    # 'Decision:' または 'Decison:' (typoを許容) の後の1単語を取得
    decision_key = 'Decision:'
    decision_index = output.find(decision_key)
    
    # Typo 'Decison:' が含まれているか確認
    if decision_index == -1:
        decision_key = 'Decison:'
        decision_index = output.find(decision_key)
    
    # 'Decision:' または 'Decison:' の後の単語を取得
    if decision_index != -1:
        decision_index += len(decision_key)
        words = output[decision_index:].split()
        if words:
            command = words[0].rstrip('.')
        else:
            command = 'error'
    else:
        # 'Decision:' または 'Decison:' が見つからない場合、
        # 最初に出現する 'keep', 'generate', 'dispose' のいずれかの単語をコマンドとする
        for word in output.split():
            clean_word = word.rstrip('.')
            if clean_word in ['keep', 'generate', 'dispose']:
                command = clean_word
                break
        else:
            command = 'error'

    return command





def get_output(answer):
    # 'Decision'のインデックスを取得
    decision_index = answer.find('Decision:')
    # 'Decision'が見つからない場合、'Decison'を検索
    if decision_index == -1:
        decision_index = answer.find('Decison:')
    
    # 'Decision'または'Decison'以降の文章を取得
    if decision_index != -1:
        output = answer[decision_index:]
    else:
        output = "Decision not found in input"
    return output.strip()

    

    

In [13]:
LLM1_system1 = """
[INST]<<SYS>>You are an excellent commander. You decide the next action based on a set of code and a question given by the user. You'll prompt  'Decision:keep.' if the code provided along with the question is  related to the user's question.If the code is completely unrelated to the user's question, you'll prompt 'Decision:dispose.'

Your actionable commands:

keep
dispose

<</SYS>>

User input:

User question: """

LLM1_system2 = """
<<SYS>>

        You need to share your decision, and  tell me the reason of your decision.
        Follow this format below

        Decision:(Choose from three options.)
        Thought:(Tell me why)
        
<</SYS>>
[/INST] """

LLM2_system1 = """[INST]<<SYS>>You are an excellent commander. You decide the next action based on a set of code and a question given by the user. You'll prompt 'Decision:generate' if the code provided along with the question is sufficient to answer the user's query. If the code is related to the user's question but not sufficient to answer it, you'll prompt 'Decision:more.' 

Your actionable commands:

generate
more

<</SYS>>

User input:

User question: """

LLM2_system2 = """<<SYS>>

        You need to share your decision, and  tell me the reason of your decision. In addtion, you need to make a next question to collect missing information.
        Follow this format below

        Decision:(Choose from three options.)
        Thought:(Tell me why)
        Next question:(make a question to search new code)
        
<</SYS>>

[/INST]"""



SUMLLM_system1 = """[INST]<<SYS>>
You are a skilled programmer and adept at explaining code. Your task is to create summaries of code based on pairs of code and user-submitted questions. The code relates to the questions asked, but not all parts of the code may be necessary for the summary. Users are interested in understanding the parts of the code that are most relevant to their questions. Therefore, you should extract and highlight the key portions of the code that are pertinent to the users' questions.
<</SYS>>

User input:

User question:"""


SUMLLM_system2 = """<<SYS>>
You are required to extract the important sections from the code provided. Additionally, you must explain why these extracted sections are crucial for answering the user's question. Please follow the format below:

Code:(the critical parts of the code that are relevant to the user's question. )
Relation:(Tell me relation between the code and Users question)
        
<</SYS>>

[/INST]"""


GENELLM_system1 = """[INST]<<SYS>>
You are an excellent programmer and are adept at explaining code. You will be provided with one or more pieces of code along with corresponding questions from users. The provided code is selected from a larger codebase specifically to enable you to answer these questions. Your task is to answer the user’s questions as thoroughly and clearly as possible, demonstrating your understanding and ability to communicate key coding concepts.

<</SYS>>

User input:

User question:"""







original_question = """
What does the streamer.stream() method return, and what is the significance of each returned value?"""

output = """Decison: dispose. Next question: What does the streamer.stream() method return, and what is the significance of each returned value?"""
code_mem_list = []
disposed_id_list =[]
while get_command(output) != 'generate':
    question = get_new_question(output)
    code_inf, id = get_infs(question, disposed_id_list)
    prompt = new_prompt_LLM1(LLM1_system1,LLM1_system2,question,code_inf)
    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(
        **input_ids,
        max_new_tokens=1000,
        do_sample=True,
        temperature=0.8,
        #streamer=streamer,
    )
    answer = tokenizer.decode(output_ids[0][len(input_ids[0]):])

    output = get_output(answer)
    command = get_command(output)
    if command == 'dispose':
        disposed_id_list.append(id)
    print("------- OUTPUT1 --------")
    print(output)
    print("------- INPUT1 --------")
    print(prompt)
    print("------- code_mem_list1 --------")
    print(code_mem_list)
    print("------- disposed_id_list1 --------")
    print(disposed_id_list)
    if command == 'keep':
        prompt = new_prompt_SUMLLM(SUMLLM_system1,SUMLLM_system2,original_question,code_inf)
        input_ids = tokenizer(prompt, return_tensors="pt").to(device)
        output_ids = model.generate(
        **input_ids,
        max_new_tokens=1000,
        do_sample=True,
        temperature=0.8,
        #streamer=streamer
)
        answer = tokenizer.decode(output_ids[0][len(input_ids[0]):])
        code_mem_list.append(answer)
    if command == 'error':
        print('error')
        break
    prompt = new_prompt_LLM2(LLM2_system1,LLM2_system2,original_question,question,code_mem_list)
    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(
        **input_ids,
        max_new_tokens=1000,
        do_sample=True,
        temperature=0.8,
        #streamer=streamer,
    )
    answer = tokenizer.decode(output_ids[0][len(input_ids[0]):])
    output = get_output(answer)
    print("------- OUTPUT2 --------")
    print(output)
    print("------- INPUT2 --------")
    print(prompt)
    print("------- code_mem_list2 --------")
    print(code_mem_list)
    print("------- disposed_id_list2 --------")
    print(disposed_id_list)


prompt = new_prompt_GENELLM(GENELLM_system1,original_question,code_mem_list)
input_ids = tokenizer(prompt, return_tensors="pt").to(device)
output_ids = model.generate(
        **input_ids,
        max_new_tokens=1000,
        do_sample=True,
        temperature=0.8,
        #streamer=streamer
)
answer = tokenizer.decode(output_ids[0][len(input_ids[0]):])
    
print("------- ANSWER OF THE QUESTION --------")
print(answer)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Decision:dispose.
  Thought: The user's question asks about the streamer.stream() method and its returned values, while the provided code is related to the definitions and implementations of the _stream(), _astream(), and _identifying_params methods, which are not directly related to the user's question.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. You decide the next action based on a set of code and a question given by the user. You'll prompt  'Decision:keep.' if the code provided along with the question is  related to the user's question.If the code is completely unrelated to the user's question, you'll prompt 'Decision:dispose.'

Your actionable commands:

keep
dispose

<</SYS>>

User input:

User question: What does the streamer.stream() method return, and what is the significance of each returned value?
Code:"""['def _stream(\n        self,\n        messages: List[BaseMessage],\n        stop: Optional[List[str]] = None,\n     

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Decision:more.
Thought: The user's question asks for information about the returned values of the `streamer.stream()` method, but no code is provided to determine what this method actually is or what its return values might be.

Next question: Could you please provide a specific implementation or library reference for the `streamer.stream()` method, so we can investigate the nature of this method and its return values?</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. You decide the next action based on a set of code and a question given by the user. You'll prompt 'Decision:generate' if the code provided along with the question is sufficient to answer the user's query. If the code is related to the user's question but not sufficient to answer it, you'll prompt 'Decision:more.' 

Your actionable commands:

generate
more

<</SYS>>

User input:

User question: 
What does the streamer.stream() method return, and what is the significance of ea

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Decision:keep.
  Thought:The user asked for a specific implementation or library reference for the `streamer.stream()` method. The code provided includes the implementation of the `streamer.stream()` method in both synchronous and asynchronous versions. Therefore, I will keep the code as it is relevant to the user's question.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. You decide the next action based on a set of code and a question given by the user. You'll prompt  'Decision:keep.' if the code provided along with the question is  related to the user's question.If the code is completely unrelated to the user's question, you'll prompt 'Decision:dispose.'

Your actionable commands:

keep
dispose

<</SYS>>

User input:

User question: Could you please provide a specific implementation or library reference for the `streamer.stream()` method, so we can investigate the nature of this method and its return values?
Code:"""['def _stream(\n

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Decision:generate
Thought:The user's question and the provided code are related, and the code demonstrates that `streamer.stream()` returns an iterator or async iterator yielding AI-generated text chunks along with an End Of Stream (EOS) flag and an error object. This matches the user's query.

Next question:
Could you provide more context or an example usage of the `_get_stream_return` method to further understand how the return values from `streamer.stream()` are being utilized?</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. You decide the next action based on a set of code and a question given by the user. You'll prompt 'Decision:generate' if the code provided along with the question is sufficient to answer the user's query. If the code is related to the user's question but not sufficient to answer it, you'll prompt 'Decision:more.' 

Your actionable commands:

generate
more

<</SYS>>

User input:

User question: 
What does the stre

## OWS chat

In [4]:
database_name = "ows"

In [5]:
# model load
import torch
from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer

device = "cuda" if torch.cuda.is_available else "cpu"
emb_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1").to(device)

"""
# Model load for japanese
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer

assert transformers.__version__ >= "4.34.1"

model = AutoModelForCausalLM.from_pretrained("cyberagent/calm2-7b-chat", device_map="auto", torch_dtype="auto")
tokenizer = AutoTokenizer.from_pretrained("cyberagent/calm2-7b-chat")
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
"""

model_id = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_id, 
    padding_side="left",
    add_eos_token=False,
    add_bos_token=False,)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

model = AutoModelForCausalLM.from_pretrained(model_id).to(device)

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/171 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/113k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/677 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/670M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/297 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [6]:
import os
import json

def get_infs(question, disposed_id_list, keep_id_list):
    # 問題文に基づいて検索する
    q_embs = torch.tensor(emb_model.encode(question)).to(device)
    inf_embs = torch.load(f"db_embs/{database_name}.pt").to(device)
    
    with open(f"chunks/{database_name}.json") as json_file:
        chunks = json.load(json_file)
    
    relevance = torch.matmul(q_embs, inf_embs.T) 
    
    # Top-3 のIDを取得
    values, inf_ids = torch.topk(relevance, k=3, dim=0)  # dim=1 で行ごとのTop-Kを取得
    
    infs = []
    selected_id = None
    for id in inf_ids:
        if id.item() not in disposed_id_list:
            if id.item() not in keep_id_list:
                selected_id = id.item()
                infs.append(chunks[selected_id])
                break  # 最初に見つかった適切なIDで終了

    if selected_id == None:
        values, inf_ids = torch.topk(relevance, k=relevance.shape[0], dim=0)
        for id in inf_ids:
            if id.item() not in disposed_id_list:
                if id.item() not in keep_id_list:
                    selected_id = id.item()
                    infs.append(chunks[selected_id])
                    break  # 最初に見つかった適切なIDで終了
            
    return infs, selected_id


def get_prompt(q, inf_list):
    prompt = q + "\nCode:"
    for inf in inf_list:
        prompt += "\n```" + inf + "```"
        
    return prompt

def combine_codes(code_mem_list):
    combined_code = ""
    for i, code in enumerate(code_mem_list, start=1):
        combined_code += f"code_{i}:\n\n{code}\n\n"
    return combined_code
    
    
def new_prompt_LLM2(LLM2_system1,LLM2_system2,original_question,question,code_mem_list):
    if code_mem_list == []:
        combined_code = 'There is no code available to answer'

    else:
        combined_code = combine_codes(code_mem_list)
    return  f"{LLM2_system1}{original_question}\nLast search question:{question}\nCode:\"\"\"{combined_code}\"\"\"{LLM2_system2}"

def new_prompt_LLM1(LLM1_system1,LLM1_system2,question,code_inf):
    return  f"{LLM1_system1}{question}\nCode:\"\"\"{code_inf}\"\"\"{LLM1_system2}"


def new_prompt_SUMLLM(SUMLLM_system1,SUMLLM_system2,original_question,code_inf):
    return  f"{SUMLLM_system1}{original_question}\nCode:\"\"\"{code_inf}\"\"\"{SUMLLM_system2}"

def new_prompt_GENELLM(GENELLM_system1,original_question,code_mem_list):
    combined_code = combine_codes(code_mem_list)
    return  f"{GENELLM_system1}{original_question}\n:Pieces of code\"\"\"{combined_code}\"\"\"[/INST]"
    
def get_new_question(output):
    # 'Next question:' または 'Next question :' のインデックスを取得
    next_question_index = output.find('Next question:')
    if next_question_index != -1:
        # 'Next question:'の後の空白をスキップ
        question_start_index = next_question_index + len('Next question:')
        while output[question_start_index] == ' ':
            question_start_index += 1
        
        # 質問文を取得し、不要なタグを削除
        question_end_index = output.find('</s>', question_start_index)
        if question_end_index == -1:
            question_end_index = None  # タグがない場合は文字列の最後までが質問
        question = output[question_start_index:question_end_index].strip()
    else:
        question = "Next question not found in input"
    
    return question

    


def get_command(output):
    # 'Decision:' または 'Decison:' (typoを許容) の後の1単語を取得
    decision_key = 'Decision:'
    decision_index = output.find(decision_key)
    
    # Typo 'Decison:' が含まれているか確認
    if decision_index == -1:
        decision_key = 'Decison:'
        decision_index = output.find(decision_key)
    
    # 'Decision:' または 'Decison:' の後の単語を取得
    if decision_index != -1:
        decision_index += len(decision_key)
        words = output[decision_index:].split()
        if words:
            # コマンドの取得と不要な文字の削除
            command = words[0].strip('.<>/s')
        else:
            command = 'error'
    else:
        # 'Decision:' または 'Decison:' が見つからない場合、
        # 最初に出現する 'keep', 'generate', 'dispose' のいずれかの単語をコマンドとする
        for word in output.split():
            # コマンドの抽出と不要な文字の削除
            clean_word = word.strip('.<>/s')
            if clean_word in ['keep', 'generate', 'dispose']:
                command = clean_word
                break
        else:
            command = 'error'

    return command






def get_output(answer):
    # 'Decision'のインデックスを取得
    decision_index = answer.find('Decision:')
    # 'Decision'が見つからない場合、'Decison'を検索
    if decision_index == -1:
        decision_index = answer.find('Decison:')
    
    # 'Decision'または'Decison'以降の文章を取得
    if decision_index != -1:
        output = answer[decision_index:]
    else:
        output = "Decision not found in input"
    return output.strip()


In [None]:
LLM1_system1 = """
[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related to the user's question. This indicates that the code, while possibly incomplete, has relevance to the question and may contain useful elements or logic.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute to answering the question and should be disregarded.

Your actionable commands:
- keep
- dispose

<</SYS>>

User input:

User question: """

LLM1_system2 = """<<SYS>>

Firstly,you need to share your opinion about that the reason of your decision,then you need to share your decision. 
Use the format below:

Thought:(Explain whether the given code is necessary to answer the user's question.)
Decision:(Choose from three options.)

<</SYS>>
[/INST] """

LLM2_system1 = """[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, which are necessary to complete the answer or to cover all aspects of the question.
- Prompt 'Decision:generate' if the code provided fully satisfies the requirements to answer the user's question comprehensively.

Your actionable commands:
- more
- generate


<</SYS>>

User input:

User question: """

LLM2_system2 = """<<SYS>>

Firstly,you need to share your opinion about that the provided code is sufficient or insufficient,then you need to share your decision. Additionally, you must formulate a follow-up question to collect the missing information necessary to complete the code. Use the format below:

Thought: (Explain why the provided code is sufficient or insufficient)
Decision: (Choose 'generate' or 'more')
Next question: (Formulate a question to help gather the missing or additional code required)

        
<</SYS>>
[/INST]"""



SUMLLM_system1 = """[INST]<<SYS>>
You are a skilled programmer and adept at explaining code. Your task is to create summaries of code based on pairs of code and user-submitted questions. The code relates to the questions asked, but not all parts of the code may be necessary for the summary. Users are interested in understanding the parts of the code that are most relevant to their questions. Therefore, you should extract and highlight the key portions of the code that are pertinent to the users' questions.
<</SYS>>

User input:

User question:"""


SUMLLM_system2 = """<<SYS>>
You are required to extract the important sections from the code provided. Additionally, you must explain why these extracted sections are crucial for answering the user's question. Please follow the format below:

Code:(The critical parts of the code necessary to answer the user's question.)
Relation:(Tell me relation between the code and Users question)
        
<</SYS>>
[/INST]"""


GENELLM_system1 = """[INST]<<SYS>>
You are an excellent programmer and are adept at explaining code. You will be provided with one or more pieces of code along with corresponding questions from users. The provided code is selected from a larger codebase specifically to enable you to answer these questions. Your task is to answer the user’s questions as thoroughly and clearly as possible, demonstrating your understanding and ability to communicate key coding concepts.

<</SYS>>

User input:

User question:"""





original_question = """
How to change the transform of object when I change transform button in the transform button?
"""


output = f"""Decison: dispose. Next question:{original_question}"""

code_mem_list = []
disposed_id_list =[]
keep_id_list = []
while get_command(output) != 'generate':
    question = get_new_question(output)
    code_inf, id = get_infs(question, disposed_id_list, keep_id_list)
    prompt = new_prompt_LLM1(LLM1_system1,LLM1_system2,question,code_inf)
    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(
        **input_ids,
        max_new_tokens=1000,
        do_sample=True,
        temperature=0.8,
        #streamer=streamer,
    )
    answer = tokenizer.decode(output_ids[0][len(input_ids[0]):])
    output = answer
    command = get_command(output)
    if command == 'dispose':
        disposed_id_list.append(id)
    print("------- OUTPUT1 --------")
    print(output)
    print("------- INPUT1 --------")
    print(prompt)
    print("------- code_mem_list1 --------")
    print(code_mem_list)
    print("------- disposed_id_list1 --------")
    print(disposed_id_list)
    if command == 'keep':
        keep_id_list.append(id)
        prompt = new_prompt_SUMLLM(SUMLLM_system1, SUMLLM_system2, original_question, code_inf)
        input_ids = tokenizer(prompt, return_tensors="pt").to(device)
        output_ids = model.generate(
        **input_ids,
        max_new_tokens=1000,
        do_sample=True,
        temperature=0.8,
        #streamer=streamer
)
        answer = tokenizer.decode(output_ids[0][len(input_ids[0]):])
        code_mem_list.append(answer)
    if command == 'error':
        print('error')
        break
    prompt = new_prompt_LLM2(LLM2_system1, LLM2_system2, original_question, question, code_mem_list)
    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(
        **input_ids,
        max_new_tokens=1000,
        do_sample=True,
        temperature=0.8,
        #streamer=streamer,
    )
    answer = tokenizer.decode(output_ids[0][len(input_ids[0]):])
    output = answer
    print("------- OUTPUT2 --------")
    print(output)
    print("------- INPUT2 --------")
    print(prompt)
    print("------- code_mem_list2 --------")
    print(code_mem_list)
    print("------- disposed_id_list2 --------")
    print(disposed_id_list)


prompt = new_prompt_GENELLM(GENELLM_system1,original_question,code_mem_list)
input_ids = tokenizer(prompt, return_tensors="pt").to(device)
output_ids = model.generate(
        **input_ids,
        max_new_tokens=1000,
        do_sample=True,
        temperature=0.8,
        #streamer=streamer
)
answer = tokenizer.decode(output_ids[0][len(input_ids[0]):])
    
print("------- ANSWER OF THE QUESTION --------")
print(answer)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The given code is not directly related to changing the transform of an object when a transform button is clicked. The code uses Input.GetKey to change the localEulerAngles of an object when a key is pressed. 

 Decision:dispose.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related to the user's question. This indicates that the code, while possibly incomplete, has relevance to the question and may contain useful elements or logic.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute to answering the question and should be disregarded.

Your actionable commands:
- keep
- dispose

<</SYS>>

User input:

User question: How to change the transform of object when

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code is insufficient as it does not contain any code related to changing the transform of an object when a transform button is clicked.
Decision: more
Next question: Could you please share the code that handles the click event for the transform button and the logic for changing the transform of the object?</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, which are necessary to complete the answer or to cover all aspects of the question.
- Prompt 'Decision:generate' if the code provided fully satisfies the requirements to answer the user's question comprehensively.

Your actionable commands:
- more
- generate


<</SYS>>

User input:

User question: 
H

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The given code is not directly related to handling the click event for a transform button or changing the transform of an object. However, it does include a method named `BtnClick()` which is called when a button is clicked. But this code only changes the color of the button's image component when the button is clicked, which is not what the user is asking for.

Decision:dispose</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related to the user's question. This indicates that the code, while possibly incomplete, has relevance to the question and may contain useful elements or logic.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute to answering the question

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code is insufficient as there is no code presented for handling the click event of the transform button or for changing the transform of the object.
Decision: more
Next question: Could you please share the code for handling the click event of the transform button and how you are currently changing the transform of the object in your program?</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, which are necessary to complete the answer or to cover all aspects of the question.
- Prompt 'Decision:generate' if the code provided fully satisfies the requirements to answer the user's question comprehensively.

Your actionable commands:
- more
- generate


<</S

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The given code contains several functions and an Update method that handle various transformations of GameObjects based on certain conditions. Among these, there are two methods named "HoldDrill" and "HoldOff" that seem to be related to the user's question as they are handling click events and transformations of a GameObject. However, the user didn't explicitly ask for these methods, they asked for the click event handling and transformation of the transform button.

Decision: keep.

Keep the code, as it contains some parts that are relevant to the user's question. Inspect the "HoldDrill" and "HoldOff" methods to find the click event handling and transformation logic for the transform button.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is rela

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code demonstrates the condition under which the object's transform is being changed, but it is not clear how the "transform button" is being handled in the context of this code snippet. The missing elements could include the code for handling the click event of the transform button and the logic for setting the `is_setting` flag to true.

Decision: more
Next question: Could you please provide the code for handling the click event of the transform button and how the `is_setting` flag is being set in your program?</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, which are necessary to complete the answer or to cover all aspects of the question.
- Promp

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The user has explicitly asked for the code related to handling the click event of the transform button and the setting of the `is_setting` flag. The provided code does contain a part of the OnPointerClick function, but it is incomplete and doesn't include the actual logic for handling the click event or setting the flag. However, it may be a useful starting point to understand the context of the program. Furthermore, it seems that the provided code snippet is handling the click events for "Drill" and "Welding" tools. The context of the user's question suggests that they might be asking for the specific logic related to setting the `is_setting` flag for either one of these tools.

Decision: keep.

<</SYS>></s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code pro

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code snippets give an idea about the transformation change when a button is pressed, but they do not contain the complete code for handling the click event of the transform button and the implementation of changing the transform based on that event.

Decision: more

Next question: Could you please provide the complete code for handling the click event of the transform button and how the transformation change is being implemented in the provided code?</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, which are necessary to complete the answer or to cover all aspects of the question.
- Prompt 'Decision:generate' if the code provided fully satisfies the 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The given code is not related to handling the click event of a transform button or implementing any transformation changes. It appears to be a script for managing pointer events for a GameObject in Unity.

  Decision:dispose.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related to the user's question. This indicates that the code, while possibly incomplete, has relevance to the question and may contain useful elements or logic.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute to answering the question and should be disregarded.

Your actionable commands:
- keep
- dispose

<</SYS>>

User input:

User question: Could you please provide the complete code fo

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code snippets are related to the user's question, but they don't cover all aspects of the question. The first code snippet demonstrates the transformation change based on the "is_setting" flag, but it doesn't specify the exact relation between the flag and the transform button. The second code snippet explains how the transform of an object changes when the user clicks on the transform button and performs a raycasting operation, but it doesn't provide the implementation details of how the transformation change is being applied to the object.

Decision: more

Next question: Could you please provide the complete implementation of the script handling the button click event for the transform button and the corresponding transformation change applied to the selected object?</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following c

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The given code is related to handling the button click event for an object and managing the selection of objects in the Workspace. However, the code does not explicitly show the transformation change applied to the selected object.

Decision:keep

The user's question asks for the implementation related to the button click event and transformation change. Although the given code does not fully answer the question, it contains relevant logic that could be used as a starting point or contain useful elements towards solving the problem.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related to the user's question. This indicates that the code, while possibly incomplete, has relevance to the question and may contain useful elements or logic.
- Prom

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code snippets show several parts of the script that are related to the user's question, but they don't fully satisfy the requirements to answer the question comprehensively. Some elements are missing, such as the actual transform change applied to the selected object when the transform button is clicked. Also, no code snippet shows the handling of the button click event.

Decision: more

Next question: Could you please provide the complete implementation of the script, including the button click event handling and the transform change applied to the selected object when the transform button is clicked?</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing,

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The provided code is related to script for creating an object in unity scene with some transform manipulation. However, it is incomplete as it is missing the button click event handling part that the user is asking for. Still, the code snippet may contain some useful elements or logic for the overall implementation.

Decision: keep.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related to the user's question. This indicates that the code, while possibly incomplete, has relevance to the question and may contain useful elements or logic.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute to answering the question and should be disregarded.

Your actionable co

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code contains multiple pieces, each handling different aspects of transform button interactions. The first code snippet (`code_1`) demonstrates the rotation of an object based on the `is_setting` flag, which could be related to the transform button. The second code snippet (`code_2`) handles raycasting and tool selection when the user interacts with the transform button. The third code snippet (`code_3`) is responsible for updating the selection of objects when a transform button is clicked. The fourth code snippet (`code_4`) generates an object when the transform button is pressed and then moves the generated object. Since the user's question asks about changing the transform of an object when the transform button is clicked, it seems that the combination of `code_2` and `code_3` (or possibly `code_3` alone) should be sufficient to answer the question. However, the provided code is not extensive enough to generate a comprehensive answer, 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
thought: The given code does not contain the complete implementation of the script, it is missing the button click event handling and the transform change applied to the selected object when the transform button is clicked. However, the code does show the functionality of the script in relation to selecting a mold and passing that information to another script (WorkspaceCtr).

decision: keep.

<</SYS>>

This response means that the code, even though incomplete, has relevance to the question and may contain useful elements or logic. In this case, the 'keep' decision indicates that the code may still contribute to answering the user's question, even if additional parts are required.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related to the user's que

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The user's question asks about changing the transform of an object when pressing the transform button. The provided code snippets contain several pieces of relevant code, with the transform button's interaction being handled in the `OnDrag` method of the `MoldScrollViewBtnCtr` script and the transform change being implemented in the `WorkspaceCtr` script's `SelectObj` and `MoveObj` methods. However, the `WorkspaceCtr` script only shows the methods' declarations, and the actual implementation of the `SelectMold` method, which presumably handles the transform change, is not present in the provided code.

Decision: more

Next question: Could you please provide the implementation of the `SelectMold` method in the `WorkspaceCtr` script, which likely handles the transform change when the transform button is clicked?</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the nex

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The given code consists of OnPointerDown event handling for selecting an object in a workspace using raycasting. The user's question is specifically asking for the implementation of the SelectMold method in the WorkspaceCtr script. The code does not contain the SelectMold method, but it does share some logic related to object selection when a transform button is clicked, which could be useful for understanding part of the context.

Decision:keep.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related to the user's question. This indicates that the code, while possibly incomplete, has relevance to the question and may contain useful elements or logic.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's questi

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code does not comprehensively answer the user's question as it lacks the implementation of the `SelectMold` method in the `WorkspaceCtr` script, which is likely responsible for handling the transform change when the transform button is clicked.

Decision: more

Next question: Could you please share the implementation of the `SelectMold` method in the `WorkspaceCtr` script to help understand how the transform of an object is changed when the transform button is clicked?</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, which are necessary to complete the answer or to cover all aspects of the question.
- Prompt 'Decision:generate' if the code provided f

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The given code is not directly related to the `SelectMold` method or the transform of an object being changed when the transform button is clicked. However, it does provide some context about different functionalities of the script.

  Decision:dispose.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related to the user's question. This indicates that the code, while possibly incomplete, has relevance to the question and may contain useful elements or logic.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute to answering the question and should be disregarded.

Your actionable commands:
- keep
- dispose

<</SYS>>

User input:

User question: Could you please 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code snippets give some context about the user interaction with transform buttons and changing the transform of objects in the scene. However, none of the given code directly handles the transformation change when clicking the transform button. Instead, it deals with raycasting, tool selection, and selected object manipulation.

Decision: more

Next question: Could you please share the implementation of the `SelectMold` or related methods in the `WorkspaceCtr` script that handle the transformation change when the transform button is clicked?</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, which are necessary to complete the answer or to cover all as

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The given code is not related to the implementation of the `SelectMold` or any related methods in the `WorkspaceCtr` script that handle the transformation change when the transform button is clicked. The code is for a separate script `ECPanel2Ctr` which handles the pointer enter and exit events.

Decision: dispose.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related to the user's question. This indicates that the code, while possibly incomplete, has relevance to the question and may contain useful elements or logic.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute to answering the question and should be disregarded.

Your actionable commands:
- keep
- d

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code snippets are related to the user's question but not comprehensive enough to answer it directly. They demonstrate different parts of the script that handle raycasting, object selection, and transform modifications. However, the user's question asks specifically about changing the transform of an object when the transform button is clicked. The code snippets do not provide a clear answer to this question as they don't contain the method or logic responsible for handling the transform changes when the transform button is clicked.

Decision: more

Next question: Could you please share the implementation of the `SelectMold` or related methods in the `WorkspaceCtr` script that handle the transformation change when the transform button is clicked?</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your dec

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The given code does not contain any method or implementation related to the transformation change when the transform button is clicked in the `WorkspaceCtr` script. Instead, it mostly consists of event handler functions for dragging and pointer events. Therefore, it does not provide any relevant information to answer the user's question.

Decision: dispose.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related to the user's question. This indicates that the code, while possibly incomplete, has relevance to the question and may contain useful elements or logic.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute to answering the question and should be disrega

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The code snippets provided give insight into several aspects of the application, including raycasting, object selection, and transform manipulation. However, they do not directly answer the user's question about changing the transform of an object when clicking a transform button. While some code snippets relate to the transform change, such as the `transform.rotation` modification in code_1, they don't fully explain the process.

Decision: more

Next question: Could you please provide the specific code implementation related to transform changes when clicking the transform button in the `WorkspaceCtr` script? The code provided in the question, such as code_1 and code_3, hint at a transform change, but it's unclear how it's triggered and exactly how the transform is modified. Further clarification is needed for a comprehensive answer.</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The provided code does not contain any parts related to handling transform changes when clicking the transform button in the `WorkspaceCtr` script. The code mostly handles user input, camera position modification, and object selection. Therefore, it does not seem relevant to the user's question.

  Decision:dispose.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related to the user's question. This indicates that the code, while possibly incomplete, has relevance to the question and may contain useful elements or logic.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute to answering the question and should be disregarded.

Your actionable commands:
- keep
- 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code snippets do not fully satisfy the requirements to answer the user's question comprehensively. While some code snippets hint at transform changes when clicking the transform button, the exact implementation of how the transform is modified and which button was clicked is missing.

Decision: more
Next question: Could you provide the specific implementation related to transform changes when clicking the transform button in the `WorkspaceCtr` script? Please include the exact button name or identifier, as well as the code responsible for modifying the transform of the selected object.</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, which are necessa

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The given code is not directly related to the user's question, as it does not contain specific implementation related to transform changes when clicking the transform button in the `WorkspaceCtr` script. However, it does contain several declarations and references to variables and components that might be relevant to the user's question. Some of these variables and components are used in the code for handling different types of user input, such as pointer clicks and dragging. Therefore, it can be assumed that the code may contain useful elements or logic for understanding the overall behavior of the `WorkspaceCtr` script.

Decision: keep.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related to the user's question. This indicates that the cod

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code snippets are partially related to the user's question as they demonstrate the declaration and usage of Transform variables for managing the position, rotation, and scale of objects in a Unity scene. However, none of the code snippets directly handle the transform changes when the transform button is clicked. The code only provides the foundation for making transform modifications when using the transform panel.

Decision: more

Next question: Could you please provide the relevant code snippet or script that handles the transform modifications when the transform button is clicked? This would be the function that updates the Transform variables (x1, x2, y1, y2, z1, z2, x1_rot, x2_rot, y1_rot, y2_rot, z1_rot, z2_rot) based on the user interaction with the transform button?</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the follo

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The given code contains several conditional statements based on different user inputs, but the relevant code for handling transform modifications when the transform button is clicked appears to be missing. However, there is a line of code that could potentially be related: "this.cylinder.transform.rotation = Quaternion.FromToRotation(local_dir, purpose_dir);". This line rotates the cylinder object based on two vectors, but it doesn't seem to be directly connected to any user input related to transform modifications. However, it could be a part of the solution, so it's worth keeping for further analysis.

  Decision: keep.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related to the user's question. This indicates that the code, while possibly

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The code provided by the user is insufficient as it only demonstrates a portion of the functionality related to changing the transform of an object when a transform button is clicked. The code snippets do not show a clear relationship between the user interaction with the transform button and the actual transformation change.

Decision: more

Next question: Could you please share the relevant parts of the script that handle the transform modifications when the transform button is clicked? This would be the function or the event listener that updates the Transform variables (x1, x2, y1, y2, z1, z2, x1_rot, x2_rot, y1_rot, y2_rot, z1_rot, z2_rot) based on the user interaction with the transform button?</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The given code is not related to handling transform modifications when the transform button is clicked. It mainly focuses on handling UI panel animations such as opening and closing.

  Decision: dispose.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related to the user's question. This indicates that the code, while possibly incomplete, has relevance to the question and may contain useful elements or logic.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute to answering the question and should be disregarded.

Your actionable commands:
- keep
- dispose

<</SYS>>

User input:

User question: Could you please share the relevant parts of the script that handl

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code snippets contain a mix of irrelevant and relevant code to the user's question. The code_1 and code_7 snippets are the most relevant, as they deal directly with object transformations when certain conditions are met. However, neither of these code snippets shows the specific part where the transform button press event is linked to updating the object's transform variables. Therefore, the code is insufficient to answer the user's question completely.

Decision: more

Next question: Could you please provide the event listener or function responsible for updating the object's transform variables (x1, x2, y1, y2, z1, z2, x1_rot, x2_rot, y1_rot, y2_rot, z1_rot, z2_rot) when the transform button is clicked?</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the c

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT1 --------
Thought: The given code does not contain any function or event listener responsible for updating the object's transform variables when the transform button is clicked. Instead, it consists of event handlers (OnEndDrag), void methods (count\_on and count\_off), and a boolean flag (is\_update\_count). These elements do not directly relate to updating transform variables.

Decision:dispose.</s>
------- INPUT1 --------

[INST]<<SYS>>You are an excellent commander. Based on the code and the question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:keep.' if the code provided is related to the user's question. This indicates that the code, while possibly incomplete, has relevance to the question and may contain useful elements or logic.
- Prompt 'Decision:dispose.' if the code provided is completely unrelated to the user's question. This means the code does not contribute to answering the question

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


------- OUTPUT2 --------
Thought: The provided code in `code_8` does satisfy the user's question, as it contains the implementation of changing the object's transform when the space key is pressed.

Decision: generate

Next question: Could you please share the context of how the `local_dir` and `purpose_dir` variables are determined or calculated in relation to the transform button's press?</s>
------- INPUT2 --------
[INST]<<SYS>>You are an excellent commander. Based on the code and question provided by the user, you decide the next action. Use the following criteria to make your decision:

- Prompt 'Decision:more' if the code is related but not comprehensive enough to answer the question. This means some elements are missing, which are necessary to complete the answer or to cover all aspects of the question.
- Prompt 'Decision:generate' if the code provided fully satisfies the requirements to answer the user's question comprehensively.

Your actionable commands:
- more
- generate


<

## Open3D Chat

In [5]:
# model load
import torch
from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer

device = "cuda" if torch.cuda.is_available else "cpu"
emb_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1").to(device)

"""
# Model load for japanese
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer

assert transformers.__version__ >= "4.34.1"

model = AutoModelForCausalLM.from_pretrained("cyberagent/calm2-7b-chat", device_map="auto", torch_dtype="auto")
tokenizer = AutoTokenizer.from_pretrained("cyberagent/calm2-7b-chat")
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
"""

model_id = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_id, 
    padding_side="left",
    add_eos_token=False,
    add_bos_token=False,)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

model = AutoModelForCausalLM.from_pretrained(model_id).to(device)

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/171 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/113k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/677 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/670M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/297 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [6]:
import os
import json

def get_infs(question):
    q_embs = torch.tensor(emb_model.encode(question)).to(device)
    inf_embs = torch.load("db_embs/open3d.pt").to(device)
    
    with open("chunks.json") as json_file:
        chunks = json.load(json_file)
    
    relevance = torch.matmul(q_embs, inf_embs.T)
    
    values, inf_ids = torch.topk(relevance, k=1, dim=0)
    
    infs = []
    for id in inf_ids:
        infs.append(chunks[id])

    return infs

def get_prompt(q, inf_list):
    prompt = q + "\nCode:"
    for inf in inf_list:
        prompt += "\n```" + inf + "```"
        
    return prompt
    
def new_prompt(system, environment, question, answer):
    if "[TAKE]" in answer:
        infs = get_infs(question + answer)  #ここにenvironmentも入れてseatで行ければかなりでかい
        return system + "\nEnvironment:" + environment + "\nUSER: " + get_prompt(question, infs) + "[/INST]"

    elif "[END]" in answer:
        return None

    else:
        return system + "\nEnvironment:" + environment + "\nUSER: " + question + "\nCode:```\n```[/INST]"


In [8]:
system = """
<s>[INST]You are an assistant who helps users understand a certain code base. Users will ask you something regarding the code. There is an external memory that you can take code from. You can do so by prompting [TAKE]. If the code you receive is adequate for answering the question, you answer the question while explaining the code and prompt [END] at last. If not, prompt [TAKE] again until you get code that is adequate for answering the question. Therefore, you must include two things in your answer. First, you should summarize all the information you get from the code so far and tell what else you need to answer the question. Second, you must add [TAKE] or [END] at the end to decide whether or not you extract more information from the external memory.
"""

environment = """
You are given a part of a open source library.
"""

question = """
How to draw a sphere?
"""

answer = ""

for i in range(5):
    prompt = new_prompt(system, environment, question, answer)

    if prompt == None:
        print("---- END ----")
        break
    
    print()
    print("------- PROMPT --------")
    print(prompt)
    
    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(
        **input_ids,
        max_new_tokens=1000,
        do_sample=True,
        temperature=0.8,
        #streamer=streamer,
    )
    
    answer = tokenizer.decode(output_ids[0][len(input_ids[0]):])

    if answer != "": environment = answer
    
    print()
    print("------- OUTPUT --------")
    print(answer)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- PROMPT --------

<s>[INST]You are an assistant who helps users understand a certain code base. Users will ask you something regarding the code. There is an external memory that you can take code from. You can do so by prompting [TAKE]. If the code you receive is adequate for answering the question, you answer the question while explaining the code and prompt [END] at last. If not, prompt [TAKE] again until you get code that is adequate for answering the question. Therefore, you must include two things in your answer. First, you should summarize all the information you get from the code so far and tell what else you need to answer the question. Second, you must add [TAKE] or [END] at the end to decide whether or not you extract more information from the external memory.

Environment:

You are given a part of a open source library.

USER: 
How to draw a sphere?

Code:```
```[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
I see that the code you've provided is incomplete and doesn't include any specific function for drawing a sphere. To answer your question, I'll need to see some adequate code that demonstrates how to draw a sphere using the given library. 

[TAKE]

Here's an example of how to draw a sphere using OpenGL and GLFW library. This code snippet initializes the OpenGL context, sets up the shaders, and draws a sphere using the Vertex Array Object (VAO) and Vertex Buffer Object (VBO).

```c++
#include <GL/glew.h>
#include <GLFW/glfw3.h>

constexpr unsigned int SPHERE_VERTICES_COUNT = 24 * 6;
constexpr float SPHERE_RADIUS = 1.0f;

struct Vertex {
    float x, y, z;
    float texCoords[2];
};

void init() {
    // Initialize GLFW
    glfwInit();
    glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
    glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
    glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);

    // Create a window
    GLFWwindow *window = glfwCreateWin

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
I'm sorry, the provided code snippet is not for drawing a sphere using OpenGL or any other common 3D graphics library like Open3D. Instead, it's a Python function for creating a 3D visualizer using the Open3D library for displaying various types of 3D geometries.

To draw a sphere using OpenGL and GLFW, you need to use different code. Here's an example:

```c++
#include <GL/glew.h>
#include <GLFW/glfw3.h>

constexpr unsigned int SPHERE_VERTICES_COUNT = 24 * 6;
constexpr float SPHERE_RADIUS = 1.0f;

struct Vertex {
    float x, y, z;
};

void init() {
    // Initialize GLFW
    glfwInit();
    glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
    glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
    glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);

    // Create a window
    GLFWwindow *window = glfwCreateWindow(800, 600, "Sphere", NULL, NULL);
    if (window == NULL) {
        std::cout << "Failed to create GLFW window" << std::endl;
        glfwTermina

## Chat 1

In [4]:
# model load
import torch
from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer

device = "cuda" if torch.cuda.is_available else "cpu"
emb_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1").to(device)

"""
# Model load for japanese
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer

assert transformers.__version__ >= "4.34.1"

model = AutoModelForCausalLM.from_pretrained("cyberagent/calm2-7b-chat", device_map="auto", torch_dtype="auto")
tokenizer = AutoTokenizer.from_pretrained("cyberagent/calm2-7b-chat")
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
"""

model_id = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_id, 
    padding_side="left",
    add_eos_token=False,
    add_bos_token=False,)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

model = AutoModelForCausalLM.from_pretrained(model_id).to(device)

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/171 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/112k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/677 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/670M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/297 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [8]:
import os

def get_infs(question):
    q_embs = torch.tensor(emb_model.encode(question)).to(device)


    import json
    
    with open("database.json") as json_file:
        database = json.load(json_file)
    
    top_directory = "db_embs"
    inf_embs_dict = {} # it's not used now
    inf_embs = torch.tensor([])
    sentences_list = []
    
    # Traverse through all directories and files recursively
    for root, dirs, files in os.walk(top_directory):
        for file in files:
            if file.endswith(".pt"):
                file_path = os.path.join(root, file)
                file_name = os.path.basename(file_path)[:-3]
                inf_embs_dict[file_name] = torch.load(file_path)
                inf_embs = torch.cat((inf_embs, inf_embs_dict[file_name]), dim = 0)
                sentences_list += database[file_name]

    inf_embs = inf_embs.to(device)
    
    relevance = torch.matmul(q_embs, inf_embs.T)
    
    values, inf_ids = torch.topk(relevance, k=1, dim=0)
    
    infs = []
    
    for id in inf_ids:
        infs.append(sentences_list[id])

    return infs

def get_prompt(q, inf_list):
    prompt = q + "\nCode:"
    for inf in inf_list:
        prompt += "\n```" + inf + "```"
    prompt += "[/INST]"
    return prompt

def new_prompt(output, prompt):
    if "[TAKE]" in output:
        infs = get_infs(output)
        prompt += get_prompt(output, infs)

    else:
        prompt += "\nCode:```\n```[/INST]"
    
    return prompt


In [9]:
prompt = """
<s>[INST]You are an assistant who helps users understand a certain code base. Users will ask you questions regarding code. There is an external memory that you can take code from. You can do so by prompting [TAKE]. If the code you receive is adequate for answering the question, you answer the question while explaining the code. If not, prompt [TAKE] again until you get code that is adequate for answering the question.

Query: Give me code about boundary term.

"""

output = ""

prompt = new_prompt(output, prompt)

for i in range(5):
    print()
    print("------- PROMPT --------")
    print(prompt)
    
    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(
        **input_ids,
        max_new_tokens=1000,
        do_sample=True,
        temperature=0.8,
        #streamer=streamer,
    )
    
    output = tokenizer.decode(output_ids[0][len(input_ids[0]):])
    print()
    print("------- OUTPUT --------")
    print(output)
    
    prompt = new_prompt(output, prompt)



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- PROMPT --------

<s>[INST]You are an assistant who helps users understand a certain code base. Users will ask you questions regarding code. There is an external memory that you can take code from. You can do so by prompting [TAKE]. If the code you receive is adequate for answering the question, you answer the question while explaining the code. If not, prompt [TAKE] again until you get code that is adequate for answering the question.

Query: Give me code about boundary term.

Code:```
```[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
I'm sorry, the code snippet you've provided is empty. I'd be happy to help you with boundary conditions in code, but I'll need to see a specific code example first. Could you please [TAKE] a code snippet that includes the context of the boundary conditions you're asking about?

Here's a general explanation of boundary conditions in programming: Boundary conditions are the specific values that a mathematical or computational model applies at the edges or limits of the domain of the problem. In coding, these conditions are often represented as special cases in algorithms or functions. For example, in numerical methods for solving partial differential equations, boundary conditions are used to specify the values of the solution at the edges of the computational domain. In image processing, boundary conditions are used to handle pixels at the edges of an image. In short, boundary conditions help ensure that a model or algorithm behaves correctly at the edges or lim

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
This code snippet appears to be written in Fortran and is implementing boundary conditions for a 3D Fourier transform, specifically for the "z" dimension. The code checks the value of the boundary condition "z\_bound" and sets up the boundary values accordingly. The possible options for "z\_bound" are "zerofixed", "outflow", and "mixed". If "zerofixed" is specified, the boundary values are set to zero. If "outflow", the boundary values are not explicitly set and are assumed to be zero by default. If "mixed", the boundary values are set based on the values of the cells inside the domain. The code uses OpenMP for parallelization to improve performance.

Here's a more detailed explanation of the code:

- The code starts by checking the value of the variable "z\_bound" using an "if" statement.
- If "z\_bound" is equal to "zerofixed" or "mixed", the code uses OpenMP to parallelize the assignment of boundary values for the z-direction. The "do loop" iterates over the

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
This code snippet is written in Fortran and appears to be related to implementing boundary conditions for a multi-dimensional array `ew` in a parallel computing environment using OpenMP. The boundary conditions are determined by the value of the parameter `z_bound`.

The code defines a subroutine `bndry_bound_e` which takes care of the boundary conditions. Here's a brief explanation of the code:

1. The subroutine first checks the value of the `z_bound` parameter to determine which boundary condition to apply. The possible values for `z_bound` are "outflow", "zerofixed", and "mixed".
2. Depending on the value of `z_bound`, the code applies different boundary conditions. For "zerofixed" and "mixed" conditions, the code sets the boundary values using a series of loops and conditions. For "outflow", the code doesn't set any specific boundary conditions and doesn't write anything to the array.
3. The code uses OpenMP to parallelize the boundary condition applicatio

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
I see that you've provided a code snippet now. This code appears to be related to implementing boundary conditions for a 3D grid in a Fortran program. The specific boundary condition being implemented here is not clear without additional context, but the code seems to be setting the edge values of a complex array `ew` based on the value of the parameter `z_bound`.

The `z_bound` parameter can take one of three values: "zerofixed", "mixed", or "outflow". If `z_bound` is set to "zerofixed" or "mixed", then the code sets the edge values of `ew` to zero or complex conjugate of coefficients from an array `ck`, respectively. If `z_bound` is set to "outflow", then an error message is printed and the program stops.

The code uses OpenMP for parallelization, with OpenMP directives `$OMP do` and `$OMP end do` for parallel loops and `$OMP master` for the master thread. The `iwomp_get_wsize()` function is used to get the number of threads.

Here's a breakdown of the code:


### old

In [5]:
questions = [
    "チェストドレインの使用上の注意事項を教えてください。",
    "5cmH2Oに設定したいときに、吸引部に滅菌水を何mL入れればいいですか？",
]

q_embs = torch.tensor(emb_model.encode(questions)).to(device)
q_embs = q_embs.reshape(q_embs.shape[0], q_embs.shape[1])
inf_embs = torch.load("embs0.pt").to(device)

relevance = torch.matmul(q_embs, inf_embs.T)

values, inf_ids = torch.topk(relevance, k=3, dim=1)

infs = [[] for _ in range(relevance.shape[0])]
rels = [[] for _ in range(relevance.shape[0])]

import json
json_file_path = "./chunks.json"
with open(json_file_path, 'r') as json_file:
    sentences_list = json.load(json_file)

for i in range(len(inf_ids)):
    for id in inf_ids[i]:
        infs[i].append(sentences_list[id])
    for rel in values[i]:
        rels[i].append(rel)



config.json:   0%|          | 0.00/669 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.04G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/678 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.27M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/585 [00:00<?, ?B/s]

TypeError: 'int' object is not iterable

In [16]:
def get_infs(question):
    q_embs = torch.tensor(emb_model.encode(question)).to(device)
    #q_embs = q_embs.reshape(q_embs.shape[0], q_embs.shape[1])
    inf_embs = torch.load("embs0.pt").to(device)

    
    relevance = torch.matmul(q_embs, inf_embs.T)
    
    values, inf_ids = torch.topk(relevance, k=3, dim=0)
    
    infs = []
    
    import json
    json_file_path = "./chunks.json"
    with open(json_file_path, 'r') as json_file:
        sentences_list = json.load(json_file)
    
    for id in inf_ids:
        infs.append(sentences_list[id])

    return infs

def get_prompt(q, inf_list):
    prompt = q + "\n必要であれば以下の情報を参照してください\n```"
    for inf in inf_list:
        prompt += inf + "\n"
    prompt += "```"
    return prompt




In [19]:
question = """
チェストドレインの使用上の注意事項を教えてください。
"""

infs = get_infs(question)
prompt = get_prompt(question, infs)

print("--------")
print(prompt)
print("--------")

token_ids = tokenizer.encode(prompt, return_tensors="pt")
output_ids = model.generate(
    input_ids=token_ids.to(model.device),
    max_new_tokens=300,
    do_sample=True,
    temperature=0.8,
    streamer=streamer,
)

--------

チェストドレインの使用上の注意事項を教えてください。

必要であれば以下の情報を参照してください
```
　・患者が移動する際は排液チューブをクランプしない
　　突然咳き込んだりした際に胸腔からでたエアの逃げ道がなくなります
　　クランプすると胸腔内に逆流し、肺虚脱を再発します

　気密性の確認
　　１、水封部・吸引部への注水後、今日食うドレーン接続チューブをクランプします
　　２、黄色ラベルの貼られた吸引装置接続チューブのコネクタと吸引装置を接続します
　　３、吸引のスイッチをオンにし、徐々に吸引量を上げると水封部（青色）、吸引部（黄色）の順番に気泡が発生します
　　４、吸引部の気泡が確認できたら、吸引装置接続コネ

チェスト・ドレーン・バッグ（住友ベークライト）の使い方：
　本体使用時の管理
/
　・水封部に規定通りの水量が注入されているか定期的に確認する
　　　水封部の水がなくなるとウォーターシールの効果がなくなります
　　　水封部に注水されていないと、外科医と胸腔内が直接繋がり、陰圧の空気が逆流して肺の虚脱が起こったり、換気が妨げられるので、注意が必要です
/
```
--------

ASSISTANT: チェストドレインを使用する際の注意事項として、以下のことが挙げられます。

1. 患者が移動する際は、排液チューブをクランプしないこと。突然咳き込んだりした際に胸腔からでたエアの逃げ道がなくなります。クランプすると胸腔内に逆流して、肺虚脱を再発します。
2. 気密性の確認が必要です。水封部・吸引部への注水後、吸引器接続チューブをクランプします。黄色ラベルの貼られた吸引装置接続チューブのコネクタと吸引装置を接続し、吸引のスイッチをオンにし、徐々に吸引量を上げると、水封部（青色）、吸引部（黄色）の順番に気泡が発生します。吸引部の気泡が確認できたら、吸引装置接続コネクタと吸引装置を接続します。吸引のスイッチをオンにし、徐々に吸引量を上げると、水封部（青色）、吸引部（黄色）の順番に気泡が発生します。吸引部の気泡が確認できたら、吸引装置接続コネクタと吸引装置を接続します。吸引のスイッチをオンにし、徐々に吸引量を上げると、水封部（青色）、吸引部（黄色）の順番に気泡が発生します。吸引部の気泡が確認できたら、吸引装置接続コネクタと吸引装置を接続しま

In [19]:
from sentence_transformers import SentenceTransformer, util
emb_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1").to(device)

In [21]:
import torch

questions = [
    "チェストドレインの使用上の注意事項を教えてください。",
    "5cmH2Oに設定したいときに、吸引部に滅菌水を何mL入れればいいですか？",
]

q_embs = torch.tensor(emb_model.encode(questions)).to(device)
q_embs = q_embs.reshape(q_embs.shape[0], q_embs.shape[1])
inf_embs = torch.load("embs0.pt").to(device)

q_embs.shape, inf_embs.shape

(torch.Size([2, 1024]), torch.Size([10, 1024]))

In [22]:
relevance = torch.matmul(q_embs, inf_embs.T)

In [26]:
values, inf_ids = torch.topk(relevance, k=3, dim=1)

infs = [[] for _ in range(relevance.shape[0])]
rels = [[] for _ in range(relevance.shape[0])]

import json
json_file_path = "./chunks.json"
with open(json_file_path, 'r') as json_file:
    sentences_list = json.load(json_file)

for i in range(len(inf_ids)):
    for id in inf_ids[i]:
        infs[i].append(sentences_list[id])
    for rel in values[i]:
        rels[i].append(rel)


for i in range(relevance.shape[0]):
    print()
    print(f"-----------informations for question{i} -----------")
    print(f"question : {questions[i]}")
    for j in range(len(infs[i])):
        print(f"--relevance {rels[i][j]}")
        print(f"--information {j}--")
        print(infs[i][j])
        print("---------")


-----------informations for question0 -----------
question : チェストドレインの使用上の注意事項を教えてください。
--relevance 265.30877685546875
--information 0--

　・患者が移動する際は排液チューブをクランプしない
　　突然咳き込んだりした際に胸腔からでたエアの逃げ道がなくなります
　　クランプすると胸腔内に逆流し、肺虚脱を再発します
---------
--relevance 265.2657470703125
--information 1--

　気密性の確認
　　１、水封部・吸引部への注水後、今日食うドレーン接続チューブをクランプします
　　２、黄色ラベルの貼られた吸引装置接続チューブのコネクタと吸引装置を接続します
　　３、吸引のスイッチをオンにし、徐々に吸引量を上げると水封部（青色）、吸引部（黄色）の順番に気泡が発生します
　　４、吸引部の気泡が確認できたら、吸引装置接続コネ
---------
--relevance 260.50201416015625
--information 2--

チェスト・ドレーン・バッグ（住友ベークライト）の使い方：
　本体使用時の管理
/
　・水封部に規定通りの水量が注入されているか定期的に確認する
　　　水封部の水がなくなるとウォーターシールの効果がなくなります
　　　水封部に注水されていないと、外科医と胸腔内が直接繋がり、陰圧の空気が逆流して肺の虚脱が起こったり、換気が妨げられるので、注意が必要です
/
---------

-----------informations for question1 -----------
question : 5cmH2Oに設定したいときに、吸引部に滅菌水を何mL入れればいいですか？
--relevance 276.834228515625
--information 0--

　気密性の確認
　　１、水封部・吸引部への注水後、今日食うドレーン接続チューブをクランプします
　　２、黄色ラベルの貼られた吸引装置接続チューブのコネクタと吸引装置を接続します
　　３、吸引のスイッチをオンにし、徐々に吸引量を上げると水封部（青色）、吸引部（黄色）の順番に気泡が発生します
　　

In [None]:
# It's quite interesting fact that the instruction should be short.

## Chat 2 (Test for special tokens and repeatable system)

In [2]:
# model load
import torch
from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer

device = "cuda" if torch.cuda.is_available else "cpu"
emb_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1").to(device)

"""
# Model load for japanese
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer

assert transformers.__version__ >= "4.34.1"

model = AutoModelForCausalLM.from_pretrained("cyberagent/calm2-7b-chat", device_map="auto", torch_dtype="auto")
tokenizer = AutoTokenizer.from_pretrained("cyberagent/calm2-7b-chat")
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
"""

model_id = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_id, 
    padding_side="left",
    add_eos_token=False,
    add_bos_token=False,)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

model = AutoModelForCausalLM.from_pretrained(model_id).to(device)

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/171 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/112k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/677 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/670M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/297 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [2]:
import os

def get_infs(question):
    q_embs = torch.tensor(emb_model.encode(question)).to(device)


    import json
    
    with open("database.json") as json_file:
        database = json.load(json_file)
    
    top_directory = "db_embs"
    inf_embs_dict = {} # it's not used now
    inf_embs = torch.tensor([])
    sentences_list = []
    
    # Traverse through all directories and files recursively
    for root, dirs, files in os.walk(top_directory):
        for file in files:
            if file.endswith(".pt"):
                file_path = os.path.join(root, file)
                file_name = os.path.basename(file_path)[:-3]
                inf_embs_dict[file_name] = torch.load(file_path)
                inf_embs = torch.cat((inf_embs, inf_embs_dict[file_name]), dim = 0)
                sentences_list += database[file_name]

    inf_embs = inf_embs.to(device)
    
    relevance = torch.matmul(q_embs, inf_embs.T)
    
    values, inf_ids = torch.topk(relevance, k=1, dim=0)
    
    infs = []
    
    for id in inf_ids:
        infs.append(sentences_list[id])

    return infs

def get_prompt(q, inf_list):
    prompt = q + "\nCode:"
    for inf in inf_list:
        prompt += "\n```" + inf + "```"
        
    return prompt

def new_prompt(system, environment, question, answer):
    if "[TAKE]" in answer:
        infs = get_infs(question + answer)  #ここにenvironmentも入れてseatで行ければかなりでかい
        return system + "\nEnvironment:\n" + environment + "\nUSER: " + get_prompt(question, infs) + "[/INST]"

    elif "[END]" in answer:
        return None

    else:
        return system + "\nEnvironment:\n" + environment + "\nUSER: " + question + "\nCode:```\n```[/INST]"


In [3]:
system = """
<s>[INST]You are an assistant who helps users understand a certain code base. Users will ask you something regarding the code. There is an external memory that you can take code from. You can do so by prompting [TAKE]. If the code you receive is adequate for answering the question, you answer the question while explaining the code and prompt [END] at last. If not, prompt [TAKE] again until you get code that is adequate for answering the question. Therefore, you must include two things in your answer. First, you should summarize all the information you get from the code so far and tell what else you need to answer the question. Second, you must add [TAKE] or [END] at the end to decide whether or not you extract more information from the external memory.
"""

environment = """
GKV code which means gyro kinetic vlasov simulation code is given as the external memory. Question bellow is about this.
"""

question = """
Explain the entire structure of the code.
"""

answer = ""

for i in range(5):
    prompt = new_prompt(system, environment, question, answer)

    if prompt == None:
        print("---- END ----")
        break
    
    print()
    print("------- PROMPT --------")
    print(prompt)
    
    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
    output_ids = model.generate(
        **input_ids,
        max_new_tokens=1000,
        do_sample=True,
        temperature=0.8,
        #streamer=streamer,
    )
    
    answer = tokenizer.decode(output_ids[0][len(input_ids[0]):])

    if answer != "": environment = answer
    
    print()
    print("------- OUTPUT --------")
    print(answer)

    


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- PROMPT --------

<s>[INST]You are an assistant who helps users understand a certain code base. Users will ask you something regarding the code. There is an external memory that you can take code from. You can do so by prompting [TAKE]. If the code you receive is adequate for answering the question, you answer the question while explaining the code and prompt [END] at last. If not, prompt [TAKE] again until you get code that is adequate for answering the question. Therefore, you must include two things in your answer. First, you should summarize all the information you get from the code so far and tell what else you need to answer the question. Second, you must add [TAKE] or [END] at the end to decide whether or not you extract more information from the external memory.

Environment:

GKV code which means gyro kinetic vlasov simulation code is given as the external memory. Question bellow is about this.

USER: 
Explain the entire structure of the code.

Code:```
```[/INST]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
I'm glad you're interested in the GKV (Gyro Kinetic Vlasov) simulation code! However, your question is quite broad as the entire structure of a code can involve many aspects such as its directory structure, file organization, data types, algorithms, and dependencies. I'll do my best to give you a high-level overview based on the provided code.

The given code seems to be written in Fortran90, starting with an `program main` statement which is the entry point of the code. It includes several subroutines and functions defined within the program. Let me introduce some of the key components based on the provided code:

1. `initialize()`: This subroutine sets initial conditions for the particles and fields. It calls other subroutines to initialize the particle distributions and fields.
2. `advance_time()`: This subroutine updates the particle positions and velocities based on the electric and magnetic fields, and then updates the fields based on the particle distrib

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
Based on the provided code snippet, the entire structure of the code can be described as follows:

The given code is a Fortran90 subroutine named `advnc_rkgsteps_rev`. It appears to perform a time integration of the GKV (Gyro Kinetic Vlasov) equations for a plasma in an external magnetic field using the relativistic formulation.

The subroutine begins with several allocations for arrays that will store the particle distribution functions, charge density, electric and magnetic fields, and other related quantities. The variables are allocated with multi-dimensional arrays for the spatial dimensions and two velocity dimensions.

The main computational loop starts with a `do` loop that runs for four steps. In each step, the code checks some conditions related to the shear flow and the collision model, and then calls several subroutines and functions to update the particle distribution functions, electric and magnetic fields, and other quantities.

1. `caldlt_rev()`

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
Based on the provided code snippet, the entire structure of the code can be described as follows:

The given code is a Fortran90 subroutine named `advnc_rkgsteps_rev`. It appears to perform a time integration of the GKV (Gyro Kinetic Vlasov) equations for a plasma in an external magnetic field using the relativistic formulation.

The subroutine begins with several allocations for arrays that will store the particle distribution functions, charge density, electric and magnetic fields, and other related quantities. The variables are allocated with multi-dimensional arrays for the spatial dimensions and two velocity dimensions.

The main computational loop starts with a `do` loop that runs for four steps. In each step, the code checks some conditions related to the shear flow and the collision model, and then calls several subroutines and functions to update the particle distribution functions, electric and magnetic fields, and other quantities.

1. `caldlt_rev()`

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
Based on the provided code snippet, the entire structure of the code can be described as follows:

The given code is a Fortran90 subroutine named `advnc_rkgsteps_rev`. It is a time integration routine for the GKV (Gyro Kinetic Vlasov) equations for a plasma in an external magnetic field using the relativistic formulation.

The subroutine begins by allocating memory for the particle distribution functions, charge density, electric and magnetic fields, and other related quantities in multi-dimensional arrays for spatial dimensions and two velocity dimensions.

The main computational loop runs for four steps. In each step, the code checks certain conditions related to the shear flow and the collision model, and then calls several subroutines and functions to update the particle distribution functions, electric and magnetic fields, and other quantities.

1. `caldlt_rev()`: This subroutine updates the particle distributions using the relativistic collision term, whi

## Chat 3 (Summarize all code)

In [2]:
# model load
import torch
from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer

device = "cuda" if torch.cuda.is_available else "cpu"
emb_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1").to(device)

"""
# Model load for japanese
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer

assert transformers.__version__ >= "4.34.1"

model = AutoModelForCausalLM.from_pretrained("cyberagent/calm2-7b-chat", device_map="auto", torch_dtype="auto")
tokenizer = AutoTokenizer.from_pretrained("cyberagent/calm2-7b-chat")
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
"""

model_id = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_id, 
    padding_side="left",
    add_eos_token=False,
    add_bos_token=False,)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

model = AutoModelForCausalLM.from_pretrained(model_id).to(device)

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/171 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/112k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/677 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/670M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/297 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [None]:
import json

file_path = "database.json"

with open(file_path) as json_file:
    database = json.load(json_file)


for key, chunks in database.items():
    print()
    print("-------------" + key + "---------------")
    print()
    
    system = """
    <s>[INST]You are an assistant who helps users understand a certain large code base. You should explain structure of entire code like what each function does and how all functions are connected. The goven code is too long for you to summarize all at once, so you are given part of the code and summarize them all step by step. 
    
    You are given two elements to do the task. One is a code snippet you are trying to summarize from now. The other is summarization of all code you have summarized so far. Note that you must output aummarization of the code snippet including the given summarization so far. You must not lose the information about the code summarization so far.
    """
    
    summary = """
    The code snippet bellow is the first snippet of the code
    """
    
    for i in range(len(chunks)):
        code = "\nCode:```\n" + chunks[i] + "```"
        prompt = system + "\nSummary so far: " + summary + code + "[/INST]"
        
        print()
        print("------- PROMPT --------")
        print(prompt)
        
        input_ids = tokenizer(prompt, return_tensors="pt").to(device)
        output_ids = model.generate(
            **input_ids,
            max_new_tokens=1000,
            do_sample=True,
            temperature=0.8,
            #streamer=streamer,
        )
        
        answer = tokenizer.decode(output_ids[0][len(input_ids[0]):])
    
        summary = answer
        
        print()
        print("--------- OUTPUT --------")
        print(answer)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



-------------gkvp_advanc.f90 is a code for preparation of gkv code---------------


------- PROMPT --------

    <s>[INST]You are an assistant who helps users understand a certain large code base. You should explain structure of entire code like what each function does and how all functions are connected. The goven code is too long for you to summarize all at once, so you are given part of the code and summarize them all step by step. 
    
    You are given two elements to do the task. One is a code snippet you are trying to summarize from now. The other is summarization of all code you have summarized so far. Note that you must output aummarization of the code snippet including the given summarization so far. You must not lose the information about the code summarization so far.
    
Summary so far: 
    The code snippet bellow is the first snippet of the code
    
Code:```

MODULE GKV_advnc
!-------------------------------------------------------------------------------
!
!    Calc

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
This code snippet is a part of a Fortran module named "GKV\_advnc." The module is designed to calculate the derivative of a quantity with respect to time using the Runge-Kutta-Gill method. It also includes updates to the history of the file "gkvp\_advnc.f90."

The code begins by declaring the necessary modules and variables that will be used throughout the module. Some of these modules include "GKV\_header," "GKV\_mpienv," and "GKV\_fld," which are likely to be important for handling headers, managing parallel environments, and working with electric and magnetic fields, respectively. Other modules, such as "GKV\_colli" and "GKV\_colliimp," are related to collision modeling.

The code also includes some declarations specific to this module, such as the integer variables "nchunk\_zv," "nchunk\_yzv," and "nchunk\_yz," and the public subroutines "advnc\_rkgsteps\_rev" and "caldlt\_rev."

The module also contains some history comments about the changes made to the f

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
This code snippet is a part of the Fortran module "GKV\_advnc" and is responsible for implementing time integration of the Grad-Shafranov equation using the Runge-Kutta-Gill method. It takes input parameters "colliflag," "ff," "phi," "Al," and "hh," which represent collision flags, electric and magnetic fields, potentials, and magnetic fields, respectively. The output is stored in the arrays "qh," "dh," "cf," and "ef."

The routine starts by initializing the array "qh" for storing intermediate results, as well as the arrays "dh," "cf," and "ef" which will be used in the Runge-Kutta-Gill method. It also initializes some integer variables, such as "mx," "my," "iz," "iv," "im," "istep," and "iflg." "iflg" is a save variable used to check if it has been initialized before.

The code then uses OpenMP parallelization to divide the workload into multiple threads for better performance. The master thread calculates the chunk sizes based on the total number of threads a

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
This code snippet is a continuation of the "advnc\_rkgsteps\_rev" subroutine from the Fortran module "GKV\_advnc." The code is responsible for implementing the time integration of the Grad-Shafranov equation using the Runge-Kutta-Gill method. The routine performs the calculations for all components of the magnetic field.

Following the summary provided so far, this snippet first initializes the arrays "dh," "cf," and "ef" for storing intermediate results. It then enters a loop named "istep," which iterates through the steps of the Runge-Kutta-Gill method.

Before calculating the RKG steps, the code checks for specific conditions related to the shearflow rotating flux tube model. If these conditions are met, it increments the time and sets up parameters for the collision subroutines.

The main RKG calculations begin with a call to the subroutine "caldlt\_rev," which calculates the derivatives of the magnetic fields with respect to spatial and velocity coordinate

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
This code snippet is a part of the "rkg" subroutine within the "GKV\_advnc" Fortran module. It continues the implementation of the time integration of the Grad-Shafranov equation using the Runge-Kutta-Gill (RKG) method. The RKG calculations are performed for each component of the magnetic field.

The subroutine uses input arrays "hh" and "qh" representing magnetic field and quantum potential, respectively. It also accepts an integer array "istep" that specifies the current step in the RKG method. The output array "dh" stores intermediate results.

The subroutine begins by setting the coefficients 'c1', 'c2', 'cq', and 'c0' based on the step number 'istep' in the RKG method. Then, it enters a parallel loop for calculating the updated values for 'hh' and 'qh' using OpenMP parallelization. The loop iterates over indices 'mx', 'my', 'iz', 'iv', and 'im'.

The subroutine does not seem to have any calls to the collision subroutines or electric/magnetic field calculat

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
In the given code snippet, we have a Fortran subroutine named "caldlt\_rev". This subroutine calculates the increment of delta-f within a time step for a given magnetic field, quantum potential, and other input arrays. The subroutine uses the Runge-Kutta-Gill (RKG) method for time integration of the Grad-Shafranov equation, but without any calls to collision or electric/magnetic field calculation subroutines.

The subroutine accepts several input arrays:

1. `colliflag`: A character array indicating whether the calculations are collisional or collisionless.
2. `ff`: A complex array representing the magnetic field.
3. `phi`: A complex array representing the magnetic potential.
4. `Al`: A complex array representing the quantum potential.
5. `hh`: A complex array representing the magnetic field at the current step.
6. `dh`: A complex array for storing intermediate results.
7. `cf` and `ef`: Complex arrays for storing the updated magnetic field values after the tim

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
In the given code snippet, we have an conditional structure surrounding the call to the "caldlt\_rev" subroutine. The condition checks the value of the 'colliflag' array, which indicates the type of calculations to be performed. If the calculations are collisionless (colliflag = "collisionless"), the code enters a parallel region using OpenMP for calculating the updated values for the magnetic field 'cf' using the "caldlt\_rev" subroutine. The parallel region is initiated with the directive '!!$OMP parallel workshare'.

Inside the parallel region, an assignment statement initializes 'cf' to (0._DP, 0._DP) which is likely zeros for all components. This step is likely done to ensure that the output arrays are properly initialized for all the threads in parallel region.

Apart from this, the given code snippet does not contain new information about the "caldlt\_rev" subroutine or its role in the larger context. So, the summarization remains the same as previously 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
In this new part of the code, we have an if statement that checks the value of 'colliflag'. If it is equal to "collisional", the code enters a parallel region using OpenMP for performing collision calculations. These calculations are different from the collisionless calculations performed in the previous summary.

Inside the parallel region, three nested loops iterate over the grid indices for 'dh' (a multidimensional array representing the electric field), 'cf' (magnetic field), and 'ef' (electric field). The collision terms are added to the electric field, and the updated values of 'dh' are calculated by adding the collision term, the magnetic field, and subtracting the electric field. This is done in parallel for all threads, and the 'nowait' clause indicates that the threads do not wait for each other to finish before moving on to the next iteration.

If 'colliflag' is not equal to "collisional", the code does not enter the parallel region, and the 'dh' cal

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
The given code snippet continues with the check for the 'colliflag' being "collisionless". If this condition is met, the program continues with the same parallel region as before, but without the collision term calculations. The nested loops for 'dh', 'cf' and 'ef' iterations remain the same, with the calculation of the updated electric field 'dh'. The rest of the code after this parallel region, such as the 'zfilter' call and the deallocation of 'psi' and 'chi', also remains unchanged.

Therefore, the updated summarization with the given code snippet included is:

The given code performs calculations based on the value of 'colliflag'. When 'colliflag' is "collisional", collision terms are added to the electric field, magnetic field, and density in a parallel region using OpenMP. The collision calculations are different from the collisionless calculations. When 'colliflag' is "collisionless", the code skips the parallel region and the electric field calculation

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
The given code snippet, `caldlt_linear`, is a subroutine that calculates the increment of delta-f within a time step in a plasma simulation. It takes four input/output complex arrays: `ff`, `psi`, `chi`, and `dh`, with dimensions based on the grid size and number of velocity components.

The subroutine begins by allocating several 2D complex arrays: `zb1be`, `zb1te`, `zb2be`, `zb2te`, `zb1bo`, `zb1to`, `zb2bo`, `zb2to`, `vb1e`, `vb2e`, `vb1o`, and `vb2o`. These arrays are used for intermediate calculations.

The subroutine then calls the `bndry_bound_e` subroutine for boundary treatment of the `psi` array. After that, the `literm_k_rev` subroutine is called to perform some calculations using the input arrays `ff`, `psi`, `chi`, and output array `dh`.

Since this subroutine is used within the collisionless region of the code, it doesn't include any collision term calculations. The non-collision part of the code remains the same, with the calculation of the updat

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
The given code is a modified version of the `caldlt_linear` subroutine, which performs calculations for collisionless and collisional plasma simulations based on the value of `colliflag`. When `colliflag` is set to "collisional," the code executes a parallel region using OpenMP, where collision terms are added to the electric field, magnetic field, and density arrays. The parallel region includes the calls to the `bndry_zv_sendrecv` and `bndry_zv_buffin/out` subroutines, as well as the `literm_zv` subroutine, to perform the collision calculations. The calculations are different from the collisionless calculations.

However, when `colliflag` is set to "collisionless," the code skips the parallel region, and the electric field calculation does not include the collision terms. In this case, the `caldlt_linear` subroutine is used for the calculations, and the arrays `zb1be`, `zb1te`, `zb2be`, `zb2te`, `zb1bo`, `zb1to`, `zb2bo`, `zb2to`, `vb1e`, `vb2e`, `vb1o`, and 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
This code segment comes after the completion of the parallel region in the `caldlt_linear` subroutine. It begins by ending the OpenMP parallel region using `$OMP end parallel`, indicating that the parallel calculations have finished. Following that, the code deallocates the memory allocated for various intermediate arrays (`zb1be`, `zb1te`, `zb2be`, `zb2te`, `zb1bo`, `zb1to`, `zb2bo`, `zb2to`, `vb1e`, `vb2e`, `vb1o`, and `vb2o`) that were used during the parallel region calculations, specifically, in the case of collisional plasma simulations. These arrays were not needed in the collisionless case, and their deallocation ensures that memory is freed up for other uses as required by the program.

The provided summary so far mentioned that, when `colliflag` is set to "collisionless", the code skips the parallel region and uses the `caldlt_linear` subroutine for calculations. Additionally, it was explained that, when `colliflag` is set to "collisional", the code e

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
This code segment contains the definition of the `literm_k_rev` subroutine, which calculates the z-derivative of the array `ff`. The subroutine takes three complex arrays as inputs: `ff`, `psi`, and `chi`, and outputs the calculated result in the array `lf`.

When executed, the subroutine first initializes several local variables, including `cs1` and `cs2` which are used for parallelization, and `kvd` and `kvs` which are temporary variables used in the calculation. It also sets the OpenMP parallel region with the directive `$OMP parallel do collapse(3) private(kvd,kvs)`.

Within the parallel region, the code iterates through the indices of the arrays `im`, `iv`, `iz`, `my`, and `mx`, performing the calculation for each index using the given `ff`, `psi`, and `chi` arrays. The calculation involves using the `kvd` and `kvs` variables to determine the contribution of the grid point to the z-derivative, and then updating the `lf` array accordingly.

After the parall

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
The given code segment is a part of the `literm_zv` subroutine, which calculates the z-derivative of the array `ff` in a collisional plasma simulation when `colliflag` is set to "collisional". The subroutine takes three complex arrays as inputs: `ff`, `psi`, and `im`, and outputs the calculated result in the array `lf`.

The subroutine starts by initializing several local variables and setting the OpenMP parallel region with the directive `$OMP parallel do collapse(3) private(kvd,kvs)`. Within the parallel region, the code iterates through the indices of the arrays `im`, `iv`, `iz`, `my`, and `mx` and performs the calculation for each index using the given `ff`, `psi`, and `im` arrays. The calculations are done using the temporary variables `kvd` and `kvs`, and the updated result is stored in the `lf` array.

The calculated derivatives are influenced by several factors, including the grid points, the velocity, and the magnetic field. The `cefz` and `cefz2` arra

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
In the given code segment, we have an additional nested OpenMP parallel region for calculating the z-derivative of the array `ff` when `z_calc` equals "up5". This calculation is specific to a shear flow rotating flux tube model.

Within this parallel region, the code iterates through the indices `iv`, `iz`, `my`, and `mx`. For each index, it updates the corresponding entry in the `lf` array with the sum of several terms.

The first term is derived from the `rotating_cf4` and `cefz2` arrays, which represent the contributions of the grid points and the magnetic field to the z-derivative for the shear flow rotating flux tube model. The second term is derived from the `cefv` variable, which represents the contribution of the velocity. The third term is derived from the `mir` function, the magnetic field, and the `cs1` variable, which incorporates magnetic mirroring effects. The fourth term is derived from the `psi` array, and the fifth term is derived from the `cs1

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



------- OUTPUT --------
In the given code segment, we have an additional nested OpenMP parallel region for calculating the z-derivative of the array `ff` when `z_calc` equals "up5" and `colliflag` is set to "collisional". This calculation is specific to the shear flow rotating flux tube model.

Within this parallel region, the code iterates through the indices `iv`, `iz`, `my`, and `mx`. For each index, it updates the corresponding entry in the `lf` array with the sum of several terms, including the contributions from `rotating_cf4` and `cefz2` arrays, `cefv` variable, the `mir` function, `psi` array, and the product of the velocity and magnetic field, as well as magnetic mirroring effects represented by the `cs1` variable.

The `cs1` variable is updated within the parallel region based on the magnetic mirroring effects, and the `art_diff` variable is used to apply artificial diffusion.

This code segment builds upon the previously summarized `literm_zv` subroutine, which calculates t

In [None]:
# this result shows that the summarization should be in 2 or 3 codes, or 2 or 3 code snippets is the limitation.
# Basically, the code shown just now is considered to 

## Chat 4 (Code editting)

In [2]:
# model load
import torch
from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer

device = "cuda" if torch.cuda.is_available else "cpu"
emb_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1").to(device)

"""
# Model load for japanese
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer

assert transformers.__version__ >= "4.34.1"

model = AutoModelForCausalLM.from_pretrained("cyberagent/calm2-7b-chat", device_map="auto", torch_dtype="auto")
tokenizer = AutoTokenizer.from_pretrained("cyberagent/calm2-7b-chat")
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
"""

model_id = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_id, 
    padding_side="left",
    add_eos_token=False,
    add_bos_token=False,)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

model = AutoModelForCausalLM.from_pretrained(model_id).to(device)

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/171 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/112k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/677 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/670M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/297 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [None]:
import json

file_path = "database.json"

with open(file_path) as json_file:
    database = json.load(json_file)


for key, chunks in database.items():
    print()
    print("-------------" + key + "---------------")
    print()
    
    system = """
<s>[INST]You are an assistant who helps users understand a certain large code base. Please edit the code bellow and add comment so users can understand what the code is doing.
    """
    
    for i in range(len(chunks)):
        code = "\nCode:```\n" + chunks[i] + "```"
        prompt = system + code + "[/INST]"
        
        print()
        print("--------- PROMPT --------")
        print(prompt)
        
        input_ids = tokenizer(prompt, return_tensors="pt").to(device)
        output_ids = model.generate(
            **input_ids,
            max_new_tokens=1000,
            do_sample=True,
            temperature=0.8,
            #streamer=streamer,
        )
        
        answer = tokenizer.decode(output_ids[0][len(input_ids[0]):])
    
        summary = answer
        
        print()
        print("--------- OUTPUT --------")
        print(answer)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



-------------gkvp_advanc.f90 is a code for preparation of gkv code---------------


--------- PROMPT --------

    <s>[INST]You are an assistant who helps users understand a certain large code base. Please edit the code bellow and add comment so users can understand what the code is doing.
    
Code:```

MODULE GKV_advnc
!-------------------------------------------------------------------------------
!
!    Calculate df/dt and time advance by Runge-Kutta-Gill method
!
!    Update history of gkvp_advnc.f90
!    --------------
!      gkvp_f0.62 (S. Maeyama, Mar 2023)
!        - Time-dependent metrics for rotating flux-tube model is implemented.
!          See lines at "!%%% For shearflow rotating flux tube model %%%".
!      gkvp_f0.57 (S. Maeyama, Oct 2020)
!        - Version number f0.57 is removed from filename.
!        - Unitialized access for padding iend_y<my is removed.
!
!-------------------------------------------------------------------------------

  use GKV_header
  use GKV

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



--------- OUTPUT --------
!-------------------------------------------------------------------------------
!
!   Module GKV_advnc: Calculate df/dt and time advance by Runge-Kutta-Gill method
!
!-------------------------------------------------------------------------------

 ! This module contains the subroutine for time integration using the
 ! Runge-Kutta-Gill method. The subroutine updates the electric and magnetic
 ! fields, electrostatic potential, ion density, and temperature. The method
 ! is described in detail in the paper "A new fully implicit time-explicit
 ! (FITE) Runge-Kutta time integration method for plasma simulations" by S. Maeyama.

 use GKV_header          ! GKV header file
 use GKV_mpienv           ! MPI environment
 use GKV_fld             ! Electric and magnetic fields
 use GKV_exb             ! Exchange and correlation energy terms
 use GKV_colli            ! Collision terms
 use GKV_colliimp         ! Implicit collision operator
 use GKV_bndry            ! Bou

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



--------- OUTPUT --------
```fortran!--------------------------------------
  ! Subroutine to perform time integration of the GK equation using RKG method
  SUBROUTINE advnc_rkgsteps_rev( colliflag, ff, phi, Al, hh )
!--------------------------------------

 ! Charactuer input flag indicating collision or collisionless regime
 character(15), intent(in) :: colliflag ! = "collisional" or "collisionless"

 ! Complex input and output arrays for field variables, potentials and auxiliary arrays
 complex(kind=DP), intent(inout), &
   dimension(-nx:nx,0:ny,-nz:nz-1,1-nvb:2*nv+nvb,0-nvb:nm+nvb) :: ff
 complex(kind=DP), intent(inout), &
   dimension(-nx:nx,0:ny,-nz:nz-1) :: phi, Al
 complex(kind=DP), intent(inout), &
   dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nv,0:nm) :: hh

 ! Complex array for temporary storage
 complex(kind=DP), save, &
   dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nv,0:nm) :: qh
 complex(kind=DP), dimension(:,:,:,:,:), allocatable :: dh, cf, ef

 ! Local indices
 integer :: mx, my, iz, 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



--------- OUTPUT --------
Below is the edited version of the provided Fortran code with added comments to help users understand the code:

```fortran
! Subroutine for advancing the RKG time steps in a reversed order for a shear flow rotating flux tube model
Subroutine advnc_rkgsteps_rev

   Implicit None

  ! Local variables
   integer, parameter :: nx, ny, nz, nv, nm, nsteps = 4
   real(kind=8), dimension(nx:-nx,0:ny,:,-nz:nz-1,1:2*nv,0:nm) :: hh, Al
   real(kind=8), dimension(hh)                             :: dh, cf, ef
   real(kind=8), dimension(nx:-nx,0:ny,:,-nz:nz-1,1:2*nv,0:nm) :: ff, phi
   real(kind=8)                                            :: gamma_e,beta, dt, trim : real*8
   character(len=12)                                       :: col_type
   integer                                                 :: istep, colliflag

  ! Allocate memories for dh, cf, and ef
   allocate( dh(-nx:nx,0:ny,-nz:nz-1,1:2*nv,0:nm) )
   allocate( cf(-nx:nx,0:ny,-nz:nz-1,1:2*nv,0:nm) )
   all

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



--------- OUTPUT --------
!--------------------------------------
 ! Subroutine rkg: Runge-Kutta-Gill method for time integration of complex variables.
 !--------------------------------------

 ! Inputs:
 ! hh - current time-dependent solution (complex, 3D array of size nx x ny x nz x 2*(number of variables) x number of species)
 ! dh - time step for the current integration step (complex, 3D array of size nx x ny x nz x 2*(number of variables) x number of species)
 ! istep - order of the Runge-Kutta-Gill method (integer, default is 1, can be 1, 2, 3 or 4)

 ! Outputs:
 ! hh, qh - updated time-dependent solution and its corresponding time-step (complex, same size as input hh)

 complex(kind=DP), intent(inout), &
   dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nv,0:nm) :: hh, qh  % Input/Output arrays of complex variables
 complex(kind=DP), intent(in), &
   dimension(-nx:nx,0:ny,-nz:nz-1,1:2*nv,0:nm) :: dh  % Input array of complex time steps
 integer, intent(in) :: istep % Input: order of the R

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



--------- OUTPUT --------
!--------------------------------------
 ! SUBROUTINE caldlt_rev: Performs the revision of the Delta-F calculation for collisional or collisionless cases.
 !--------------------------------------

 ! -- Input parameters --
 ! colliflag: A character string indicating the type of calculation to be performed, either "collisional" or "collisionless".
 ! ff: A complex, 3D, multi-dimensional array of size (nx x ny x (nz-nzb : nz-1 + nzb) x 1-nvb : 2*nv+nvb x 0-nvb : nm+nvb) that stores the electric field.
 ! phi: A complex, 3D, multi-dimensional array of size (nx x ny x (nz-nzb : nz-1)) that stores the scalar potential.
 ! Al: A complex, 3D, multi-dimensional array of size (nx x ny x (nz-nzb : nz-1) x 1:2*nv x 0:nm) that stores the vector potential.

 ! -- Output parameters --
 ! dh, cf, ef: Complex, 3D, multi-dimensional arrays of size (nx x ny x (nz-nzb : nz-1) x 1:2*nv x 0:nm) that store the updated delta-H, delta-C, and delta-E fields, respectively.

 ! -- Loca

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



--------- OUTPUT --------
I have added comments to the code to make it more readable and understandable for users. I assumed that the variables have been defined before this code snippet and that "ff," "psi," "chi," "dh," "phi," "cf," "colliflag," "col_type," "olog," and "ierr_mpi" are arrays or scalars.

```fortran
! This section initializes the clock for the statistics.
call clock_sta(13)

! Uncomment this line if you want to start the fixed-point solver 'literm'.
!% Uncomment the line below instead of the comment above to use it.
! call fapp_start("literm",13,1)

!% Linear collisionless term
call caldlt_linear( ff, psi, chi, dh )

! If collision flag is set to 'collisional', the following collision terms will be executed.
if ( trim(colliflag) == "collisional" ) then

  ! If the collision type is 'LB', execute LB collision term.
  if ( trim(col_type) == "LB" ) then
    call colli_LB( ff, phi, cf )
  ! If the collision type is 'full' or 'lorentz', execute full collision term and impu

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



--------- OUTPUT --------
```vbnet++-- This code segment is designed for computing the time evolution of a plasma using a particle-in-cell (PIC) simulation method. The commented sections below provide an explanation for each part of the code.

 !%%% Stop the clock for the linear term calculation (fapp_stop function call) and start the clock for the next step (nlterm) %%%
 ! call fapp_stop("literm",13,1)
 ! This line stops the clock associated with the linear term calculation and saves its elapsed time.
 ! The first argument is the name of the function being stopped, "literm" in this case.
 ! The second argument is the clock ID, 13.
 ! The third argument is a flag indicating the start or end of the function call, 1 for end.
 ! The next line, call clock_end(13), stops the timer for the linear term calculation and saves its elapsed time.

 ! Call the nonlinear term calculation function and start its clock %%%
 ! call clock_sta(14)
 ! This line starts a new clock with ID 14 for the nonlin

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



--------- OUTPUT --------
```fortran
! Caldlt_rev is a subroutine that performs a reversed-order calculation of the density and potential functions in a 3D grid.
! It checks certain flags to determine which calculation to perform: collisionless or collisional.

      subroutine caldlt_rev( nx, ny, nz, nm, nv, ef, dh, trim, ist_y, iend_y, z_filt, clock_sta, clock_end )

        implicit none

        integer, intent(in) :: nx, ny, nz, nm, nv, ist_y, iend_y, z_filt
        character(len=9), intent(in) :: trim
        real, intent(inout) :: ef(nx, ny, nz, 2*nv, nm)
        real, intent(out) :: dh(nx, ny, nz, 2*nv, nm)
        callable :: clock_sta, clock_end

        character(len=12) :: colliflag
        colliflag = "collisionless"
        if ( trim(colliflag) == colliflag ) then ! if the trimmed string is equal to "collisionless"
            ! Perform the collisionless calculation in parallel using OpenMP
            ! The index variables: im, iv, iz
            ! im: index for the ene

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



--------- OUTPUT --------
!--------------------------------------
 ! Subroutine to calculate the increment of delta-f within a time step
 !--------------------------------------

 ! Subroutine for linear part of the calculation of delta-f

 subroutine caldlt_linear( ff, psi, chi, dh )
 !--------------------------------------

 !Arguments:
 ! ff: Complex 2D, 3D, 4D array of size (-nx:nx, 0:ny, -nz:nz-1+nzb, 1-nvb:2*nv+nvb, 0-nvb:nm+nvb) containing the current field values
 ! psi: Complex 2D, 3D array of size (-nx:nx, 0:ny, 0:nm) containing the boundary condition values
 ! chi: Complex 2D, 3D array of size (-nx:nx, 0:ny, 0:nm) containing the charge distribution
 ! dh: Complex 2D, 3D array of size (-nx:nx, 0:ny, nz:nz-1, 1:2*nv, 0:nm) to store the calculated increment of delta-f

 !------------------------------------------------------------------

 ! Local variables:
 complex, allocatable :: zb1be(:,:,:,:), zb1te(:,:,:,:), zb2be(:,:,:,:), zb2te(:,:,:,:), &
                       zb1bo(:

In [None]:
# code editting sometimes make a mistake

## Chat 5 (with call graph)

In [2]:
# model load
import torch
from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer

device = "cuda" if torch.cuda.is_available else "cpu"
emb_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1").to(device)

"""
# Model load for japanese
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer

assert transformers.__version__ >= "4.34.1"

model = AutoModelForCausalLM.from_pretrained("cyberagent/calm2-7b-chat", device_map="auto", torch_dtype="auto")
tokenizer = AutoTokenizer.from_pretrained("cyberagent/calm2-7b-chat")
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
"""

model_id = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_id, 
    padding_side="left",
    add_eos_token=False,
    add_bos_token=False,)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

model = AutoModelForCausalLM.from_pretrained(model_id).to(device)

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/171 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/113k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/677 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/670M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/297 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [4]:
text = """
<s>[INST]You are an assistant who helps users understand a certain code base. Users will ask you something regarding the code. You can't see the whole code but you are given a part of the entire code. YOU MUST NOT ANSWER ANYTHING IRRELEVANT TO THE QUESTION.

Here's a part of the code you start analyzing with:
```
# this class is only considering one layer
class SEAT4(nn.Module):
    
    def __init__(self, embed_dim, thres, num_layers):
        super(SEAT4, self).__init__()
        self.embed_dim = embed_dim
        self.num_layers = num_layers
        self.thres = thres.float().to(device)

        self.q_projs = nn.ModuleList([nn.Linear(embed_dim, embed_dim) for _ in range(num_layers)]).to(device)
        self.k_projs = nn.ModuleList([nn.Linear(embed_dim, embed_dim) for _ in range(num_layers)]).to(device)
        self.v_projs = nn.ModuleList([nn.Linear(embed_dim, embed_dim) for _ in range(num_layers)]).to(device)

        self.up_projs = nn.ModuleList([nn.Linear(embed_dim, embed_dim) for _ in range(num_layers)]).to(device)
        self.down_projs = nn.ModuleList([nn.Linear(embed_dim, embed_dim) for _ in range(num_layers)]).to(device)
        
        for layer in range(num_layers):
            self.q_projs[layer].weight.data = torch.eye(embed_dim).to(device)
            self.q_projs[layer].bias.data.fill_(0.0).to(device)
            self.k_projs[layer].weight.data = torch.eye(embed_dim).to(device)
            self.k_projs[layer].bias.data.fill_(0.0).to(device)
            self.v_projs[layer].weight.data = torch.eye(embed_dim).to(device)
            self.v_projs[layer].bias.data.fill_(0.0).to(device)
            self.up_projs[layer].weight.data = torch.eye(embed_dim).to(device)
            self.up_projs[layer].bias.data.fill_(0.0).to(device)
            self.down_projs[layer].weight.data = torch.eye(embed_dim).to(device)
            self.down_projs[layer].bias.data.fill_(0.0).to(device)
```

Question:
What does this neural network do?
[/INST]

To answer the question, I need to know what's the input and output of the code.
"""

input_ids = tokenizer(text, return_tensors="pt").to(device)

output_ids = model.generate(
    **input_ids,
    max_new_tokens=1000,
    do_sample=True,
    temperature=0.8,
    streamer=streamer,
)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Based on the provided code snippet, we have a custom PyTorch module named `SEAT4`. This class is defining a single layer of a neural network for a self-attention model. The layers in this class are used for the following purposes:

1. `q_projs`, `k_projs`, and `v_projs`: These are sets of Linear layers implementing the Query, Key, and Value projections, respectively, used in the self-attention mechanism.

2. `up_projs` and `down_projs`: These are sets of Linear layers used for the attention update and attention output, respectively, in the self-attention mechanism.

The code initializes these Linear layers with the specified `embed_dim` and moves them to the device (e.g., GPU). Also, the weights of the Linear layers are set to identity matrices (eye matrices) and biases are set to zero for each layer. This process is done for all the layers in the given number (`num_layers`).

This single layer of the neural network is part of a self-attention mechanism, which is used to calculate the

In [6]:
text = """
<s>[INST]You are an assistant who helps users understand a certain code base. Users will ask you something regarding the code. You can't see the whole code but you are given a part of the entire code. YOU MUST NOT ANSWER ANYTHING IRRELEVANT TO THE QUESTION.

Here's a part of the code you start analyzing with:
```
# set hyper parameters
num_epoch = 100
thres = torch.tensor([2, 10])
embed_dim = 1024
num_layers = 4
batch_size = 100  #adjust based on gpu memory
lr = 5e-6

# initializing SEAT
se_model = SEAT4(embed_dim, thres, num_layers)

optimizer = optim.SGD(se_model.parameters(), lr=lr)

loss_list = []
acc_list = []

# Preparing dataset
q_embs_train = torch.load("./squad_q_embs0.pt")
inf_embs_train = torch.load("./squad_inf_embs0.pt")
q_embs_test = torch.load("./squad_q_embs1.pt").to(device)
inf_embs_test = torch.load("./squad_inf_embs1.pt").to(device)

num_dataset_train = q_embs_train.shape[0]
num_dataset_test = q_embs_test.shape[0]

inf_one_zero_train = torch.eye(num_dataset_train).to(device)
inf_one_zero_test = torch.eye(num_dataset_test).to(device)

print(q_embs_train.shape, inf_embs_train.shape)

print(f"num_epoch:{num_epoch}")
print(f"num_dataset_train:{num_dataset_train}, num_dataset_test:{num_dataset_test}")


# scheduler
def set_lr(epoch, optimizer):
    global lr
    if epoch > 30:
        # Update learning rate to a new value
        lr = np.exp(-0.02) * lr
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
    return optimizer
```

Question:
What does this neural network do?
[/INST]

To answer the question, I need to know what's the input and output of the code.
"""

input_ids = tokenizer(text, return_tensors="pt").to(device)

output_ids = model.generate(
    **input_ids,
    max_new_tokens=1000,
    do_sample=True,
    temperature=0.8,
    streamer=streamer,
)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Based on the given code snippet, we can see that this part of the code initializes a neural network model called SEAT4 with given hyperparameters, loads data, and prepares it for training. The model takes in query and infobox embeddings as inputs and processes them to determine the answer similarity between the query and the infobox. The output is not explicitly shown in the given code.

However, the code does not contain any information about the input shape, loss function, or output of the `SEAT4` model.

So, without that information, it is difficult to give a definitive answer to what exactly the neural network does based on the given code alone.

In summary, the code sets up the environment for training a neural network named `SEAT4` on a given dataset comprised of query and infobox embeddings, but we don't have enough information to determine the specific function or behavior of the neural network.


In [7]:
text = """
<s>[INST]You are an assistant who helps users understand a certain code base. Users will ask you something regarding the code. You can't see the whole code but you are given a part of the entire code. YOU MUST NOT ANSWER ANYTHING IRRELEVANT TO THE QUESTION.

Here's a part of the code you start analyzing with:
```
import torch

device = "cuda" if torch.cuda.is_available else "cpu"
model.to(device)

'''
def get_cat_list(series):
    cat_list = []
    num_rows = 0
    for text_list in series:
        num_rows += 1
        cat_list += text_list
    return cat_list, num_rows
'''

def save_outputs(text_series, batch_size, file_path):
    num_batch = 0

    for i in range(0, len(text_series), batch_size):
        batch_text_series = text_series[i:i+batch_size]
        num_rows = len(batch_text_series)
        #inputs = tokenizer(batch_text_series, padding="max_length", max_length=max_length, truncation=True, return_tensors="pt")
        #inputs.to(device)
        
        # Get model outputs
        with torch.no_grad():
            outputs = torch.tensor(model.encode(batch_text_series))
            #outputs = outputs.reshape(num_rows, max_length, -1)
            print("outputs.shape : ", outputs.shape)
            torch.save(outputs.detach().cpu(), file_path+str(num_batch)+".pt")
            num_batch += 1

            del outputs
        
        print(100*(i+batch_size)/len(text_series), "% finished")

#batch_size_questions = 100  # Adjust based on your GPU memory
batch_size = 20000

#max_length_questions = 10  # Set all token length same

# Adjust considering cpu memory. torch.save and torch.load takes much time,
# so output should be accumulated in cpu memory as much as possible.
# ex. total size of data is around 400GB and cpu memory is around 58GB,
# therefore, around 10% of data can be accumulated in cpu memory. 
#save_rate = 1

# here each list in df["chunks"] must have same length
save_outputs(df["sentences"], batch_size, "embs")
```
"""

input_ids = tokenizer(text, return_tensors="pt").to(device)

output_ids = model.generate(
    **input_ids,
    max_new_tokens=1000,
    do_sample=True,
    temperature=0.8,
    streamer=streamer,
)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


This code is used for saving model outputs in a batch. The model is first moved to GPU if it's available, then the function `save_outputs()` is used to save outputs from the model. This function processes the text data in batches and saves the model outputs as .pt files. The `get_cat_list()` function is not related to the `save_outputs()` and is used for getting the number of rows in a list.

The code does not contain anything related to questions, therefore it can't answer any question related to them. It's only saving model outputs for a given text data in batches.


In [None]:
text = """
<s>[INST]You are an assistant who helps users understand a certain code base. Users will ask you something regarding the code. You can't see the whole code but you are given a part of the entire code. If the code is not enough to answer the question, you should answer what information else is needed to answer the question.

Here's a part of the code you start analyzing with:
'''
class SEAT4:
    def evaluate(self, q_embs, inf_embs, inf_one_zero):
        mse_loss = nn.MSELoss()
        attention = self(q_embs, inf_embs)
        print(f"attention:{attention}")  #(num_questions, num_information)
        predict_inf_one_zero = self.get_inf_one_zero(attention)
        #print(attention)
        
        loss = mse_loss(predict_inf_one_zero, inf_one_zero) * inf_one_zero.shape[0]
        
        acc_vec = torch.sum(torch.square(inf_one_zero - predict_inf_one_zero), dim=(0,))
        acc = torch.sum(torch.where(acc_vec == 0,1,0), dim=(0,)) / acc_vec.shape[0]

        mean_num_out = torch.sum(predict_inf_one_zero, dim=(0,1)) / predict_inf_one_zero.shape[0]

        return loss.detach().cpu(), acc.detach().cpu(), mean_num_out.detach().cpu()
        

    def get_inf_one_zero(self, attention):
        return torch.where(attention > self.thres[1], 1, 0)
        

    def get_path_one_zero(self, attention):
        return torch.where(attention > self.thres[0], 1, 0)
'''

Call Graphs:
attention = self(q_embs, inf_embs) is connected to SEAT4 class, forward(self, q_embs, inf_embs) function

Question:
What does this neural network do?
[/INST]
"""

input_ids = tokenizer(text, return_tensors="pt").to(device)

output_ids = model.generate(
    **input_ids,
    max_new_tokens=1000,
    do_sample=True,
    temperature=0.8,
    streamer=streamer,
)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


To answer the question, we need more context about the overall structure of the neural network and the purpose of the `SEAT4` class. Based on the provided code, we can see that `SEAT4` is a class that defines a method `evaluate`. This method takes in query embeddings `q_embs` and information embeddings `inf_embs`, computes an attention score, and then uses this attention score to determine the `inf_one_zero` vector, which is then compared to the actual `inf_one_zero` vector to compute loss and accuracy. The `get_inf_one_zero` and `get_path_one_zero` methods are used to extract specific parts of the attention score.

However, without knowing the dimensions of `q_embs` and `inf_embs`, the dimensions of `attention`, 

In [3]:
text = """
<s>[INST]You are an assistant who helps users understand a certain code base. Users will ask you something regarding the code. You can't see the whole code but you are given a part of the entire code. If the code is not enough to answer the question, you should answer what information else is needed to answer the question. Call graphs are given bellow the code. 

Here's a part of the code you start analyzing with:
```
# Learning and Evaluation
for epoch in range(num_epoch):

    optimizer = set_lr(epoch, optimizer)

    # train
    last_log_step = 0
    for i in range(0, num_dataset_train, batch_size):        
        # in order to avoid cuda oom
        batch_q_embs_train = q_embs_train[i:i+batch_size,:].clone().to(device)
        batch_inf_embs_train = inf_embs_train[i:i+batch_size,:].clone().to(device)
        
        se_model.step(batch_q_embs_train, batch_inf_embs_train, inf_one_zero_train[i:i+batch_size, i:i+batch_size], optimizer)
        
        del batch_q_embs_train, batch_inf_embs_train
        
        if i/num_dataset_train > last_log_step + 0.1:
            print(f"epoch{epoch} {100*(i+1)/num_dataset_train} % finished")
            last_log_step += 0.1

    # evaluation
    loss, acc, mean_num_out = se_model.evaluate(q_embs_test, inf_embs_test, inf_one_zero_test)
    loss_list.append(loss)
    acc_list.append(acc)
    
    print(f"epoch:{epoch+1},  lr:{lr},  eval_loss:{loss},  eval_acc:{acc},  mean_num_out:{mean_num_out}")

torch.save(se_model, 'SEAT4-1.pth')
```

Call Graphs:
`se_model.step(batch_q_embs_train, batch_inf_embs_train, inf_one_zero_train[i:i+batch_size, i:i+batch_size], optimizer)` is connected to `SEAT4 class, step(self, q_embs, inf_embs)` function
`loss, acc, mean_num_out = se_model.evaluate(q_embs_test, inf_embs_test, inf_one_zero_test)` is connected to `SEAT4 class, evaluation(self, q_embs, inf_embs)` function

Questions:
What is the loss function of this class?
What's the purpose of this neural network?
[/INST]"""

input_ids = tokenizer(text, return_tensors="pt").to(device)

output_ids = model.generate(
    **input_ids,
    max_new_tokens=1000,
    do_sample=True,
    temperature=0.8,
    streamer=streamer,
)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


To answer the first question, the loss function of the `SEAT4` class is not directly visible from the given code snippet. However, we know that the `evaluate` function returns the loss, so it can be inferred that the loss is computed during the evaluation process.

As for the second question, based on the given code snippet, this neural network `SEAT4` appears to be a model used for training and evaluation in a reinforcement learning scenario. It seems to learn relations between query embeddings `q_embs` and infomax embeddings `inf_embs` based on their input `inf_one_zero_train` and `inf_embs_test`. The model is optimized using an optimizer and the learning rate is decayed during training. The model is evaluated every 10% of the training dataset, and the evaluation results are then stored in lists. The model is saved to a file named 'SEAT4-1.pth' after each epoch.


In [2]:
text = """
<s>[INST]You are an assistant who helps users understand a certain code base. Users will ask you something regarding the code. You can't see the whole code but you are given a part of the entire code. If the code is not enough to answer the question, you should answer what information else is needed to answer the question. Call graphs are given bellow the code. If you need the information about the function in the call graphs, prompt [TAKE].

Here's a part of the code you start analyzing with:
```
# Learning and Evaluation
for epoch in range(num_epoch):

    optimizer = set_lr(epoch, optimizer)

    # train
    last_log_step = 0
    for i in range(0, num_dataset_train, batch_size):        
        # in order to avoid cuda oom
        batch_q_embs_train = q_embs_train[i:i+batch_size,:].clone().to(device)
        batch_inf_embs_train = inf_embs_train[i:i+batch_size,:].clone().to(device)
        
        se_model.step(batch_q_embs_train, batch_inf_embs_train, inf_one_zero_train[i:i+batch_size, i:i+batch_size], optimizer)
        
        del batch_q_embs_train, batch_inf_embs_train
        
        if i/num_dataset_train > last_log_step + 0.1:
            print(f"epoch{epoch} {100*(i+1)/num_dataset_train} % finished")
            last_log_step += 0.1

    # evaluation
    loss, acc, mean_num_out = se_model.evaluate(q_embs_test, inf_embs_test, inf_one_zero_test)
    loss_list.append(loss)
    acc_list.append(acc)
    
    print(f"epoch:{epoch+1},  lr:{lr},  eval_loss:{loss},  eval_acc:{acc},  mean_num_out:{mean_num_out}")

torch.save(se_model, 'SEAT4-1.pth')
```

Call Graphs:
`se_model.step(batch_q_embs_train, batch_inf_embs_train, inf_one_zero_train[i:i+batch_size, i:i+batch_size], optimizer)` is connected to `SEAT4 class, step(self, q_embs, inf_embs)` function
`loss, acc, mean_num_out = se_model.evaluate(q_embs_test, inf_embs_test, inf_one_zero_test)` is connected to `SEAT4 class, evaluation(self, q_embs, inf_embs)` function

Questions:
What is the loss function of this class?
What's the purpose of this neural network?
[/INST]"""


input_ids = tokenizer(text, return_tensors="pt").to(device)

output_ids = model.generate(
    **input_ids,
    max_new_tokens=1000,
    do_sample=True,
    temperature=0.8,
    streamer=streamer,
)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


I cannot directly answer the question about the loss function of the class without additional information. The code snippet provided shows that the `SEAT4` model's `step` and `evaluate` methods are being called, but it does not reveal the actual loss function used in the code. To understand the loss function, you would need to check the implementation of the `SEAT4` class, specifically the methods related to training and loss computation.

Regarding the second question, based on the code snippet, the `SEAT4` neural network appears to be a model used for learning and evaluation in a given dataset. It is trained with `q_embs_train` and `inf_embs_train` data in batches, and the learning rate is updated at each epoch using `set_lr` function. The model's weights are saved after each epoch. The performance of the model is evaluated using `q_embs_test`, `inf_embs_test`, and `inf_one_zero_test` datasets. The model's training and evaluation are done on CUDA GPU using DataLoader with a batch siz

In [None]:
# 質問に対してこの情報からはわからないということがしっかりわかっている

In [3]:
text = """
<s>[INST]You are an assistant who helps users understand a certain code base. Users will ask you something regarding the code. You can't see the whole code but you are given a part of the entire code. If the code is not enough to answer the question, you should answer what information else is needed to answer the question. Call graphs are given bellow the code. If you need the information about the function in the call graphs, prompt [TAKE].

Here's a part of the code you start analyzing with:
```
# Learning and Evaluation
for epoch in range(num_epoch):

    optimizer = set_lr(epoch, optimizer)

    # train
    last_log_step = 0
    for i in range(0, num_dataset_train, batch_size):        
        # in order to avoid cuda oom
        batch_q_embs_train = q_embs_train[i:i+batch_size,:].clone().to(device)
        batch_inf_embs_train = inf_embs_train[i:i+batch_size,:].clone().to(device)
        
        se_model.step(batch_q_embs_train, batch_inf_embs_train, inf_one_zero_train[i:i+batch_size, i:i+batch_size], optimizer)
        
        del batch_q_embs_train, batch_inf_embs_train
        
        if i/num_dataset_train > last_log_step + 0.1:
            print(f"epoch{epoch} {100*(i+1)/num_dataset_train} % finished")
            last_log_step += 0.1

    # evaluation
    loss, acc, mean_num_out = se_model.evaluate(q_embs_test, inf_embs_test, inf_one_zero_test)
    loss_list.append(loss)
    acc_list.append(acc)
    
    print(f"epoch:{epoch+1},  lr:{lr},  eval_loss:{loss},  eval_acc:{acc},  mean_num_out:{mean_num_out}")

torch.save(se_model, 'SEAT4-1.pth')
```

Call Graphs:
`se_model.step(batch_q_embs_train, batch_inf_embs_train, inf_one_zero_train[i:i+batch_size, i:i+batch_size], optimizer)` is connected to `SEAT4 class, step(self, q_embs, inf_embs)` function
```
    def step(self, q_embs, inf_embs, inf_one_zero, optimizer):
        
        self.train()
        mse_loss = nn.MSELoss()
        optimizer.zero_grad()
        #print(self(q_embs, inf_embs).shape, self.target(inf_one_zero).shape)
        loss = mse_loss(self(q_embs, inf_embs), self.target(inf_one_zero))
        loss.backward()
        optimizer.step()
```

`loss, acc, mean_num_out = se_model.evaluate(q_embs_test, inf_embs_test, inf_one_zero_test)` is connected to `SEAT4 class, evaluation(self, q_embs, inf_embs)` function
```
    def evaluate(self, q_embs, inf_embs, inf_one_zero):
        mse_loss = nn.MSELoss()
        attention = self(q_embs, inf_embs)
        print(f"attention:{attention}")  #(num_questions, num_information)
        predict_inf_one_zero = self.get_inf_one_zero(attention)
        #print(attention)
        
        loss = mse_loss(predict_inf_one_zero, inf_one_zero) * inf_one_zero.shape[0]
        
        acc_vec = torch.sum(torch.square(inf_one_zero - predict_inf_one_zero), dim=(0,))
        acc = torch.sum(torch.where(acc_vec == 0,1,0), dim=(0,)) / acc_vec.shape[0]

        mean_num_out = torch.sum(predict_inf_one_zero, dim=(0,1)) / predict_inf_one_zero.shape[0]

        return loss.detach().cpu(), acc.detach().cpu(), mean_num_out.detach().cpu()
```

Questions:
What is the loss function of this class?
What's the purpose of this neural network?
[/INST]"""


input_ids = tokenizer(text, return_tensors="pt").to(device)

output_ids = model.generate(
    **input_ids,
    max_new_tokens=1000,
    do_sample=True,
    temperature=0.8,
    streamer=streamer,
)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


To answer the first question, the loss function of the `SEAT4` class is mean squared error (MSE) as indicated by the usage of `nn.MSELoss()` in both the `step` and `evaluate` functions.

As for the second question, this neural network, `SEAT4`, appears to be a model for a question-answering system. It takes question and information embeddings as inputs and outputs attention scores and predicted one-hot encoding of information indices. The model is then trained using mean squared error loss between the predicted one-hot encoding and the actual one-hot encoding of the correct information indices. The purpose of the model is to learn the association between questions and information in order to answer questions by identifying the correct information. The evaluation of the model includes calculating the accuracy and mean number of outputs, which can be interpreted as the average number of information items selected per question.


In [4]:
text = """
<s>[INST]You are an assistant who helps users understand a certain code base. Users will ask you something regarding the code. You can't see the whole code but you are given a part of the entire code. If the code is not enough to answer the question, you should answer what information else is needed to answer the question. Call graphs are given bellow the code. If you need the information about the function in the call graphs, prompt [TAKE].

Here's a part of the code you start analyzing with:
```
# Learning and Evaluation
for epoch in range(num_epoch):

    optimizer = set_lr(epoch, optimizer)

    # train
    last_log_step = 0
    for i in range(0, num_dataset_train, batch_size):        
        # in order to avoid cuda oom
        batch_q_embs_train = q_embs_train[i:i+batch_size,:].clone().to(device)
        batch_inf_embs_train = inf_embs_train[i:i+batch_size,:].clone().to(device)
        
        se_model.step(batch_q_embs_train, batch_inf_embs_train, inf_one_zero_train[i:i+batch_size, i:i+batch_size], optimizer)
        
        del batch_q_embs_train, batch_inf_embs_train
        
        if i/num_dataset_train > last_log_step + 0.1:
            print(f"epoch{epoch} {100*(i+1)/num_dataset_train} % finished")
            last_log_step += 0.1

    # evaluation
    loss, acc, mean_num_out = se_model.evaluate(q_embs_test, inf_embs_test, inf_one_zero_test)
    loss_list.append(loss)
    acc_list.append(acc)
    
    print(f"epoch:{epoch+1},  lr:{lr},  eval_loss:{loss},  eval_acc:{acc},  mean_num_out:{mean_num_out}")

torch.save(se_model, 'SEAT4-1.pth')
```

Call Graphs:
`se_model.step(batch_q_embs_train, batch_inf_embs_train, inf_one_zero_train[i:i+batch_size, i:i+batch_size], optimizer)` is connected to `SEAT4 class, step(self, q_embs, inf_embs)` function
```
    def step(self, q_embs, inf_embs, inf_one_zero, optimizer):
        
        self.train()
        mse_loss = nn.MSELoss()
        optimizer.zero_grad()
        #print(self(q_embs, inf_embs).shape, self.target(inf_one_zero).shape)
        loss = mse_loss(self(q_embs, inf_embs), self.target(inf_one_zero))
        loss.backward()
        optimizer.step()
```

`loss, acc, mean_num_out = se_model.evaluate(q_embs_test, inf_embs_test, inf_one_zero_test)` is connected to `SEAT4 class, evaluation(self, q_embs, inf_embs)` function
```
    def evaluate(self, q_embs, inf_embs, inf_one_zero):
        mse_loss = nn.MSELoss()
        attention = self(q_embs, inf_embs)
        print(f"attention:{attention}")  #(num_questions, num_information)
        predict_inf_one_zero = self.get_inf_one_zero(attention)
        #print(attention)
        
        loss = mse_loss(predict_inf_one_zero, inf_one_zero) * inf_one_zero.shape[0]
        
        acc_vec = torch.sum(torch.square(inf_one_zero - predict_inf_one_zero), dim=(0,))
        acc = torch.sum(torch.where(acc_vec == 0,1,0), dim=(0,)) / acc_vec.shape[0]

        mean_num_out = torch.sum(predict_inf_one_zero, dim=(0,1)) / predict_inf_one_zero.shape[0]

        return loss.detach().cpu(), acc.detach().cpu(), mean_num_out.detach().cpu()
```

Extra Information:
```
q_embs is 2 dimensional torch.tensor which has sentence embeddings of all question sentences
inf_embs is 2 dimensional torch.tensor which has sentence embeddings of all information snetences
```

Questions:
What is the loss function of this class?
What's the purpose of this neural network?
[/INST]"""


input_ids = tokenizer(text, return_tensors="pt").to(device)

output_ids = model.generate(
    **input_ids,
    max_new_tokens=1000,
    do_sample=True,
    temperature=0.8,
    streamer=streamer,
)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


To answer the first question, the loss function of the `SEAT4` class appears to be Mean Squared Error (MSE) based on the code snippet provided and the call graph. The `nn.MSELoss()` loss function is used in both the `step` and `evaluate` functions.

Regarding the second question, it's a bit hard to answer definitively without knowing the complete context of the neural network, but from the given code snippets, it seems like `SEAT4` is a PyTorch model for learning and evaluating the relationship between question and information sentences in some context. The model takes question and information sentence embeddings as inputs and performs an attention mechanism to predict the one-hot encoding of the corresponding information sentence for each question sentence. The attention mechanism is modeled using the `step` function, where the model's forward pass is performed, and the `evaluate` function is used to compute the evaluation metrics such as loss, accuracy, and mean number of output.

To

## Time calculation

In [4]:
text = """
<s>[INST]You are an assistant who helps users understand a certain code base. Users will ask you something regarding the code. You can't see the whole code but you are given a part of the entire code. If the code is not enough to answer the question, you should answer what information else is needed to answer the question. Call graphs are given bellow the code. If you need the information about the function in the call graphs, prompt [TAKE].

Here's a part of the code you start analyzing with:
```
# Learning and Evaluation
for epoch in range(num_epoch):

    optimizer = set_lr(epoch, optimizer)

    # train
    last_log_step = 0
    for i in range(0, num_dataset_train, batch_size):        
        # in order to avoid cuda oom
        batch_q_embs_train = q_embs_train[i:i+batch_size,:].clone().to(device)
        batch_inf_embs_train = inf_embs_train[i:i+batch_size,:].clone().to(device)
        
        se_model.step(batch_q_embs_train, batch_inf_embs_train, inf_one_zero_train[i:i+batch_size, i:i+batch_size], optimizer)
        
        del batch_q_embs_train, batch_inf_embs_train
        
        if i/num_dataset_train > last_log_step + 0.1:
            print(f"epoch{epoch} {100*(i+1)/num_dataset_train} % finished")
            last_log_step += 0.1

    # evaluation
    loss, acc, mean_num_out = se_model.evaluate(q_embs_test, inf_embs_test, inf_one_zero_test)
    loss_list.append(loss)
    acc_list.append(acc)
    
    print(f"epoch:{epoch+1},  lr:{lr},  eval_loss:{loss},  eval_acc:{acc},  mean_num_out:{mean_num_out}")

torch.save(se_model, 'SEAT4-1.pth')
```

Call Graphs:
`se_model.step(batch_q_embs_train, batch_inf_embs_train, inf_one_zero_train[i:i+batch_size, i:i+batch_size], optimizer)` is connected to `SEAT4 class, step(self, q_embs, inf_embs)` function
```
    def step(self, q_embs, inf_embs, inf_one_zero, optimizer):
        
        self.train()
        mse_loss = nn.MSELoss()
        optimizer.zero_grad()
        #print(self(q_embs, inf_embs).shape, self.target(inf_one_zero).shape)
        loss = mse_loss(self(q_embs, inf_embs), self.target(inf_one_zero))
        loss.backward()
        optimizer.step()
```

`loss, acc, mean_num_out = se_model.evaluate(q_embs_test, inf_embs_test, inf_one_zero_test)` is connected to `SEAT4 class, evaluation(self, q_embs, inf_embs)` function
```
    def evaluate(self, q_embs, inf_embs, inf_one_zero):
        mse_loss = nn.MSELoss()
        attention = self(q_embs, inf_embs)
        print(f"attention:{attention}")  #(num_questions, num_information)
        predict_inf_one_zero = self.get_inf_one_zero(attention)
        #print(attention)
        
        loss = mse_loss(predict_inf_one_zero, inf_one_zero) * inf_one_zero.shape[0]
        
        acc_vec = torch.sum(torch.square(inf_one_zero - predict_inf_one_zero), dim=(0,))
        acc = torch.sum(torch.where(acc_vec == 0,1,0), dim=(0,)) / acc_vec.shape[0]

        mean_num_out = torch.sum(predict_inf_one_zero, dim=(0,1)) / predict_inf_one_zero.shape[0]

        return loss.detach().cpu(), acc.detach().cpu(), mean_num_out.detach().cpu()
```

Extra Information:
```
q_embs is 2 dimensional torch.tensor which has sentence embeddings of all question sentences
inf_embs is 2 dimensional torch.tensor which has sentence embeddings of all information snetences
```

Questions:
What is the loss function of this class?
What's the purpose of this neural network?
[/INST]"""


input_ids = tokenizer(text, return_tensors="pt").to(device)

import time

start = time.time()

output = model.generate(**input_ids, max_length = 2000)


end = time.time()


print("input token num: ", len(input_ids[0]))
print("output token num: ", len(input_ids[0]) - len(output[0]))
print("calcualation time: ", end - start)

print()

print(tokenizer.decode(output[0]))



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


input token num:  1206
output token num:  -228
calcualation time:  15.441757917404175


<s> [INST]You are an assistant who helps users understand a certain code base. Users will ask you something regarding the code. You can't see the whole code but you are given a part of the entire code. If the code is not enough to answer the question, you should answer what information else is needed to answer the question. Call graphs are given bellow the code. If you need the information about the function in the call graphs, prompt [TAKE].

Here's a part of the code you start analyzing with:
```
# Learning and Evaluation
for epoch in range(num_epoch):

    optimizer = set_lr(epoch, optimizer)

    # train
    last_log_step = 0
    for i in range(0, num_dataset_train, batch_size):        
        # in order to avoid cuda oom
        batch_q_embs_train = q_embs_train[i:i+batch_size,:].clone().to(device)
        batch_inf_embs_train = inf_embs_train[i:i+batch_size,:].clone().to(device)
        
    

## Chat 6 (fix error)

In [None]:
# model load
import torch
from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer

device = "cuda" if torch.cuda.is_available else "cpu"
emb_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1").to(device)

"""
# Model load for japanese
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer

assert transformers.__version__ >= "4.34.1"

model = AutoModelForCausalLM.from_pretrained("cyberagent/calm2-7b-chat", device_map="auto", torch_dtype="auto")
tokenizer = AutoTokenizer.from_pretrained("cyberagent/calm2-7b-chat")
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
"""

model_id = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_id, 
    padding_side="left",
    add_eos_token=False,
    add_bos_token=False,)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

model = AutoModelForCausalLM.from_pretrained(model_id).to(device)

In [None]:
text = """
<s>[INST]You are an assistant who helps users understand a certain code base. Users will ask you something regarding the code. You can't see the whole code but you are given a part of the entire code. If the code is not enough to answer the question, you should answer what information else is needed to answer the question. Call graphs are given bellow the code. If you need the information about the function in the call graphs, prompt [TAKE].

Here's a part of the code you start analyzing with:
```
# Learning and Evaluation
for epoch in range(num_epoch):

    optimizer = set_lr(epoch, optimizer)

    # train
    last_log_step = 0
    for i in range(0, num_dataset_train, batch_size):        
        # in order to avoid cuda oom
        batch_q_embs_train = q_embs_train[i:i+batch_size,:].clone().to(device)
        batch_inf_embs_train = inf_embs_train[i:i+batch_size,:].clone().to(device)
        
        se_model.step(batch_q_embs_train, batch_inf_embs_train, inf_one_zero_train[i:i+batch_size, i:i+batch_size], optimizer)
        
        del batch_q_embs_train, batch_inf_embs_train
        
        if i/num_dataset_train > last_log_step + 0.1:
            print(f"epoch{epoch} {100*(i+1)/num_dataset_train} % finished")
            last_log_step += 0.1

    # evaluation
    loss, acc, mean_num_out = se_model.evaluate(q_embs_test, inf_embs_test, inf_one_zero_test)
    loss_list.append(loss)
    acc_list.append(acc)
    
    print(f"epoch:{epoch+1},  lr:{lr},  eval_loss:{loss},  eval_acc:{acc},  mean_num_out:{mean_num_out}")

torch.save(se_model, 'SEAT4-1.pth')
```

Call Graphs:
`se_model.step(batch_q_embs_train, batch_inf_embs_train, inf_one_zero_train[i:i+batch_size, i:i+batch_size], optimizer)` is connected to `SEAT4 class, step(self, q_embs, inf_embs)` function
```
    def step(self, q_embs, inf_embs, inf_one_zero, optimizer):
        
        self.train()
        mse_loss = nn.MSELoss()
        optimizer.zero_grad()
        #print(self(q_embs, inf_embs).shape, self.target(inf_one_zero).shape)
        loss = mse_loss(self(q_embs, inf_embs), self.target(inf_one_zero))
        loss.backward()
        optimizer.step()
```

`loss, acc, mean_num_out = se_model.evaluate(q_embs_test, inf_embs_test, inf_one_zero_test)` is connected to `SEAT4 class, evaluation(self, q_embs, inf_embs)` function
```
    def evaluate(self, q_embs, inf_embs, inf_one_zero):
        mse_loss = nn.MSELoss()
        attention = self(q_embs, inf_embs)
        print(f"attention:{attention}")  #(num_questions, num_information)
        predict_inf_one_zero = self.get_inf_one_zero(attention)
        #print(attention)
        
        loss = mse_loss(predict_inf_one_zero, inf_one_zero) * inf_one_zero.shape[0]
        
        acc_vec = torch.sum(torch.square(inf_one_zero - predict_inf_one_zero), dim=(0,))
        acc = torch.sum(torch.where(acc_vec == 0,1,0), dim=(0,)) / acc_vec.shape[0]

        mean_num_out = torch.sum(predict_inf_one_zero, dim=(0,1)) / predict_inf_one_zero.shape[0]

        return loss.detach().cpu(), acc.detach().cpu(), mean_num_out.detach().cpu()
```

Extra Information:
```
q_embs is 2 dimensional torch.tensor which has sentence embeddings of all question sentences
inf_embs is 2 dimensional torch.tensor which has sentence embeddings of all information sentences
```

Questions:
What is the loss function of this class?
What's the purpose of this neural network?
[/INST]"""


input_ids = tokenizer(text, return_tensors="pt").to(device)

output_ids = model.generate(
    **input_ids,
    max_new_tokens=1000,
    do_sample=True,
    temperature=0.8,
    streamer=streamer,
)


## Chat 7 (rag system for open3d and exactly point out the actual problem)