In [1]:
import google.generativeai as genai
import os

genai.configure(api_key='AIzaSyDcsi9U5RgrnT3BG34Q0SMbbIvBc5kyFG0')
safe = [
    {
        "category": "HARM_CATEGORY_HARASSMENT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_HATE_SPEECH",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
        "threshold": "BLOCK_NONE",
    },
]
system = "You are an ambitious AI researcher who is looking to publish a paper that will contribute significantly to the field.",

model = genai.GenerativeModel("gemini-1.5-flash",system_instruction=system,safety_settings=safe,generation_config=genai.types.GenerationConfig(
        # Only one candidate for now.
        candidate_count=1,
        #stop_sequences=["x"],
        #max_output_tokens=20,
        #temperature=1.0,
    ),)


In [2]:
import json
def extract_json_between_markers(llm_output):
    json_start_marker = "```json"
    json_end_marker = "```"

    # Find the start and end indices of the JSON string
    start_index = llm_output.find(json_start_marker)
    if start_index != -1:
        start_index += len(json_start_marker)  # Move past the marker
        end_index = llm_output.find(json_end_marker, start_index)
    else:
        return None  # JSON markers not found

    if end_index == -1:
        return None  # End marker not found

    # Extract the JSON string
    json_string = llm_output[start_index:end_index].strip()
    try:
        parsed_json = json.loads(json_string)
        return parsed_json
    except json.JSONDecodeError:
        return None 

In [80]:
prev_ideas_string = """
[
  {
    "Name": "adaptive block size",
    "Title": "Adaptive Block Size: Dynamic Context Window Adjustment for Efficient Training",
    "Experiment": "Modify the model to dynamically adjust its block size during training, starting with a smaller block size and gradually increasing it. This could potentially lead to faster initial training and better long-range dependency learning.",
    "Interestingness": 6,
    "Feasibility": 4,
    "Novelty": 4
  },
  {
    "Name": "layerwise learning rates",
    "Title": "Layer-wise Learning Rate Adaptation: Optimizing Training Dynamics in Transformer Models",
    "Experiment": "Implement layer-wise learning rates, where each transformer layer has its own learning rate. Modify the configure_optimizers function to assign different learning rates to different layers, with deeper layers having lower learning rates. Compare the training dynamics, convergence speed, and final performance with the baseline model.",
    "Interestingness": 4,
    "Feasibility": 6,
    "Novelty": 2
  }
]

"""

idea_first_prompt = """{task_description}

Here are the ideas that you have already generated:

'''
{prev_ideas_string}
'''

Come up with the next impactful and creative idea for research experiments.

Respond in the following format:

THOUGHT:
<THOUGHT>

NEW IDEA JSON:
```json
<JSON>
```

In <THOUGHT>, first briefly discuss your intuitions and motivations for the idea. Detail your high-level plan, necessary design choices and ideal outcomes of the experiments. Justify how the idea is different from the existing ones.

In <JSON>, provide the new idea in JSON format with the following fields:
- "Name": Search keywords used for searching similar ideas.
- "Title": A title for the idea, will be used for the report writing.
- "Experiment": An outline of the implementation. E.g. which functions need to be added or modified, how results will be obtained, ...
- "abstract": An outline of idea, why it is useful. what problems it solves, ....
- "Interestingness": A rating from 1 to 10 (lowest to highest).
- "Feasibility": A rating from 1 to 10 (lowest to highest).
- "Novelty": A rating from 1 to 10 (lowest to highest).

Be cautious and realistic on your ratings.
This JSON will be automatically parsed, so ensure the format is precise.
"""
#You will have {num_reflections} rounds to iterate on the idea, but do not need to use them all.

task_description = "You are given the following file to work with, which trains multiple small language models on multiple datasets of text at the character level."

msg = idea_first_prompt.format(
                    task_description = task_description,
                    #code=code,
                    prev_ideas_string=prev_ideas_string,
                    #num_reflections=num_reflections,
                )


In [81]:
def get_response_from_llm(user="",system=""):
    p = [ 
        #{"role": "user","parts": "System prompt: "+ system},
        {"role": "user", "parts":  user}
        ]
    response = model.generate_content(p)
    return response.text
    
    


In [82]:
import requests
import time

def search_for_papers(query, result_limit=10):
    if not query:
        return None
    rsp = requests.get(
        "https://api.semanticscholar.org/graph/v1/paper/search",
        #headers={"X-API-KEY": S2_API_KEY},
        params={
            "query": query,
            "limit": result_limit,
            "fields": "title,authors,venue,year,abstract,citationStyles,citationCount",
        },
    )
    print(f"Response Status Code: {rsp.status_code}")
    print(
        f"Response Content: {rsp.text[:500]}"
    )  # Print the first 500 characters of the response content
    rsp.raise_for_status()
    results = rsp.json()
    total = results["total"]
    time.sleep(1.0)
    if not total:
        return None

    papers = results["data"]
    return papers



def search_for_papers(query, result_limit=10):
    if not query:
        return None
    rsp = requests.get(
        "https://dblp.org/search/publ/api",
        params={
            "q": query,
            "format": "json",
            "h":result_limit,
        },
    )
    print(f"Response Status Code: {rsp.status_code}")
    
    rsp.raise_for_status()
    results = rsp.json()

    try:
        papers = results['result']['hits']['hit']
    except : 
        papers = []
    return papers


In [110]:
user_prompt = idea_first_prompt.format(
                    task_description = task_description,
                    prev_ideas_string=prev_ideas_string,
                )
idea_string = get_response_from_llm(user_prompt)        
print(idea_string)
idea_json = extract_json_between_markers(idea_string)



THOUGHT:
I'm intrigued by the idea of exploring how different character-level encodings might affect the performance and generalization capabilities of the model. It's commonly accepted that tokenization greatly impacts language models, and while character-level encoding seems like a natural choice, it might be possible to design more effective representations.  I want to investigate if alternative encoding schemes can lead to more efficient training and improved performance, especially in scenarios where the dataset contains a high number of unique characters or specific domain-specific vocabulary.

The experiment will focus on comparing the performance of models trained with different character-level encoding strategies. I'll introduce novel encoding schemes, like using character bigrams or trigrams or even incorporating a "meta-character" that represents rare or unknown characters. Each encoding will be evaluated based on its impact on training time, model size, and performance on d

In [113]:
novelty_prompt = '''You have this idea:
"""
{idea}
{abstract}
"""

The last searched queries are:
"""
{last_query}
"""

These ideas are the search results which means they are already explored:
"""
{last_query_results}
"""

Respond in the following format:

THOUGHT:
<THOUGHT>

RESPONSE:
```json
<JSON>
```

In <THOUGHT>, first briefly reason over the novelty of the idea based on provided explored ideas. if there is similarity between ideas provide the exact title that idea.
If you have made your decision, add "Decision made: novel." or "Decision made: not novel." to your thoughts and JSON. Be harsh in your decision.

In <JSON>, respond in JSON format with ONLY the following field:
- "Novelty" : true or false based on your reasoning.
- "Query": two small words to search the literature. The query must be different from all the last searched queries.

This JSON will be automatically parsed, so ensure the format is precise.'''


In [114]:
#search
kw =[]
kw.append( idea_json['Name'] )
print(idea_json['Title'])
for i in range(5):
    print(i)
    print(kw[-1])
    search_result = search_for_papers(kw[-1],30)
    titles = [r['info']['title'] for r in search_result]
    display(titles)

    np = novelty_prompt.format(idea = idea_json['Title'],last_query_results=titles,last_query = kw,abstract=idea_json['abstract'])
    r = get_response_from_llm(np)
    j = extract_json_between_markers(r)
    kw.append( j['Query'] )
    print(r)
    
    if j['Novelty']==False and search_result!=[]:
        print('NOT NEW!!!!!')
        break
    
    print('---------------------------------------------------')


Beyond Basic Characters: Exploring Novel Encoding Strategies for Character-level Language Models
0
character encoding variations
Response Status Code: 200


[]

THOUGHT:
While the idea of exploring character-level encoding schemes is interesting, it appears to be a well-explored area. The specific variations mentioned, such as character bigrams and trigrams, are commonly used techniques in character-level language modeling.  The idea of a 'meta-character' for rare characters could be a slight novelty, but it's likely that similar approaches for handling rare characters have been investigated before.  

Decision made: not novel. 

RESPONSE:
```json
{
"Novelty": false,
"Query": "rare character handling"
}
``` 

---------------------------------------------------
1
rare character handling
Response Status Code: 200


[]

THOUGHT:
The idea is not entirely novel. Character bigrams and trigrams are common techniques used in character-level language models, and handling rare characters is a well-studied problem. The "meta-character" concept for rare characters could be considered an interesting approach, but it might be viewed as a slight variation on existing methods for rare character handling.  Decision made: not novel.

RESPONSE:
```json
{
"Novelty": false,
"Query": "rare character compression"
}
``` 

---------------------------------------------------
2
rare character compression
Response Status Code: 200


[]

THOUGHT:
The idea seems interesting, but the search queries reveal a lack of novelty. The concept of exploring different character-level encoding schemes has already been investigated, as seen in the "character encoding variations" search query. The "rare character handling" and "rare character compression" queries further suggest that using bigrams, trigrams, and meta-characters to handle rare characters is not a new concept.  

Decision made: not novel. 

RESPONSE:
```json
{
"Novelty": false,
"Query": "encoding impact"
}
``` 

---------------------------------------------------
3
encoding impact
Response Status Code: 200


['Impact of a Computer System and the Encoding Staff Organization on the Encoding Stays and on Health Institution Financial Production in France.',
 'Transparent Risks: The Impact of the Specificity and Visual Encoding of Uncertainty on Decision Making.',
 'The impact of video encoding parameters on QoE of simulated FPV drone control.',
 'FAW: Flag aligned word-based encoding for in-place integers compression.',
 'BERLib: A Basic Encoding Rules implementation.',
 'A Comparative Study on the Impact of Categorical Encoding on Black Box Model Interpretability.',
 'An Empirical Study on the Impact of Positional Encoding in Transformer-Based Monaural Speech Enhancement.',
 'Impact of Input Encoding and ADC Resolution on Matrix-Vector Multiplication Accuracy.',
 'An Empirical Study on the Impact of Positional Encoding in Transformer-based Monaural Speech Enhancement.',
 'Dissociating the Impact of Memorability on Electrophysiological Correlates of Memory Encoding Success.',
 'Impact of Featu

THOUGHT:
The provided search results focus on various aspects of encoding, but none directly address the specific combination of character bigrams, trigrams, and a 'meta-character' for rare characters within the context of character-level language models. While the idea of character-level encoding and rare character handling is explored, the proposed combination and its impact on model efficiency and performance are not covered.  

Decision made: novel.

RESPONSE:
```json
{
"Novelty": true,
"Query": "character bigrams model" 
}
``` 

---------------------------------------------------
4
character bigrams model
Response Status Code: 200


['Japanese language model based on bigrams and its application to on-line character recognition.']

THOUGHT:
The idea of using character bigrams, trigrams, and a "meta-character" for rare characters has been explored before. The provided search result "Japanese language model based on bigrams and its application to on-line character recognition" is a clear indication that this approach has been investigated in the past.  Therefore, the core idea of encoding variations is not novel. However, the specific focus on small language models and the analysis of trade-offs between encoding complexity and model performance could potentially offer a unique angle. 

Decision made: not novel. 

RESPONSE:
```json
{
"Novelty": false,
"Query": "encoding trade-offs"
}
``` 

NOT NEW!!!!!


In [39]:
j

{'Novelty': False,
 'Query': 'Character-level attention dynamics small language models'}

In [None]:
search_result