# Sparks
## Multi-modal, multi-agent AI model system capable of independently conducting research by formulating hypotheses, performing experiments, and adapting its strategy
A. Ghafarollahi, M.J. Buehler*

Massachusetts Institute of Technology

*mbuehler@MIT.EDU

# OpenAI API Key

In [None]:
your_openai_api_key = 'sk-'
os.environ['OPENAI_API_KEY'] = your_openai_api_key

# Query, Tools, Constraints

In [1]:
import json
import os


base_dir = 'conversations'

try:
    os.mkdir(base_dir)
except:
    print('Directory exists.')

tools =  '''
{
  'type': 'Python function',
  'function': {
    'name': 'calculate_force_from_seq',
    'description': (
      'This function takes a protein sequence in one-letter amino acid code, '
      'performs the required analysis or simulation, and returns the '
      'non-dimensional protein force (defined as the maximum force along the protein unfolding force-extension curve) as a float.'
      'The function might lead to None results. So, the output should always be checked.'
    ),
    'parameters': {
      'type': 'object',
      'properties': {
        'sequence': {
          'type': 'string',
          'description': (
            'Protein sequence represented using standard one-letter amino acid codes '
            '(e.g., "MKTFFV...").'
          )
        }
      },
      'required': ['sequence']
    }
  }
},
{
  'type': 'Python function',
  'function': {
    'name': 'calculate_energy_from_seq',
    'description': (
      'This function takes a protein sequence in one-letter amino acid code, '
      'performs the required analysis or simulation, and returns the '
      'non-dimensional protein energy (defined as the area under the protein unfolding force-extension curve) as a float.'
      'The function might lead to None results. So, the output should always be checked.'
    ),
    'parameters': {
      'type': 'object',
      'properties': {
        'sequence': {
          'type': 'string',
          'description': (
            'Protein sequence represented using standard one-letter amino acid codes '
            '(e.g., "MKTFFV...").'
          )
        }
      },
      'required': ['sequence']
    }
  }
},
  {
  'type': 'Python function',
  'function': {
    'name': 'design_protein_from_length',
    'description': (
      'This function creates random protein sequences of a given length. '
      'It returns the amino acid sequence (string format) of the generated protein.'
    ),
    'parameters': {
      'type': 'object',
      'properties': {
        'length': {
          'type': 'integer',
          'description': 'Desired number of amino acids in the protein sequence'
        }
      },
      'required': ['length']
    }
  }
},
{
  'type': 'Python function',
  'function': {
    'name': 'design_protein_from_CATH',
    'description': (
      'This function creates random alpha-helix, beta-sheet, or mixed protein sequences of a given length. '
      'It returns the amino acid sequence (string format) of the generated protein. '
      'The function can be used to create proteins biased toward alpha-helix (CATH=1), beta-sheet (CATH=2), or mixed structures (CATH=3). '
      'However, the output may not fully reflect the desired structural class, especially for short sequences.'
      'To ensure the proteins follow the desired structure, the secondary streucture of folded sequences should be analyzed.'
    ),
    'parameters': {
      'type': 'object',
      'properties': {
        'length': {
          'type': 'integer',
          'description': 'Desired number of amino acids in the protein sequence'
        },
        'cath': {
          'type': 'string',
          'description': 'Structure bias: "1" for alpha-helix, "2" for beta-sheet, "3" for mixed'
        }
      },
      'required': ['length', 'cath']
    }
  }
},
{
  'type': 'Python function',
  'function': {
    'name': 'analyze_protein_structure',
    'description': (
      'This function returns the secondary structure percentage content of a protein from a PDB file. '
      'The output is a dictionary with 8-class DSSP secondary structure elements as keys: '
      '{"H", "B", "E", "G", "I", "T", "S", "P", "-"}. Each key maps to the corresponding percentage value (float).'
    ),
    'parameters': {
      'type': 'object',
      'properties': {
        'protein_struct': {
          'type': 'string',
          'description': 'Protein structure file in PDB format'
        }
      },
      'required': ['protein_struct']
    }
  }
},
{
  'type': 'Python function',
  'function': {
    'name': 'fold_protein',
    'description': (
      'This function takes a protein amino acid sequence, folds it and creates the protein PDB file. Returns the PDB file name (string format)'
      'of the resulting 3D folded structure. The sequence should be in FASTA format and contain only valid '
      'amino acid characters (no special symbols like "-").'
    ),
    'parameters': {
      'type': 'object',
      'properties': {
        'sequence': {
          'type': 'string',
          'description': 'Protein sequence in FASTA format (only amino acid characters, no special symbols).'
        },
        'name': {
          'type': 'string',
          'description': 'Name to be used for saving the folded protein structure.'
        }
      },
      'required': ['sequence', 'name']
    }
  }
}

     '''

constraints = '''
1- During the research, assume protein lengths to be equal or between 30 and 80.
2- Consider protein sequences in intervals of 10.
3- For computational efficiency, restrict the total number of samples per protein length to 10.
'''

constraints = json.dumps(constraints, indent=4)

total_followup_rounds = 3
query = 'Discover a novel principle in short protein peptides.'

Directory exists.


In [2]:
from Sparks_functions import *

In [None]:
# Idea generation
generate_idea(query, tools, constraints)
# Idea testing
test_idea(query, tools, constraints)
initial_implementation()
# Refinement
last_exp_round = refine_approach(query, constraints, total_followup_rounds)
# Create plots
create_reason_plots(query, last_exp_round)
# Documentation
create_introduction(query)
create_methods(query, tools, constraints, last_exp_round)
create_results()
create_conclusion(query)
create_outlook(query, tools)
create_pdf()