In [10]:
# 用于生成蛋白随机序列

import datetime
import random
import csv


def generate_random_peptide(length):
    """
    生成随机蛋白序列
    
    :param length: 蛋白序列长度
    :return: 随机蛋白序列
    """
    
    # 定义氨基酸列表  
    amino_acids = 'ACDEFGHIKLMNPQRSTVWY' 
    
    # 输入长度的有效性检查
    if length <= 0:
        raise ValueError("蛋白序列长度必须是正整数")
    
    # 生成随机序列，使用join提升效率
    peptide = ''.join(random.choice(amino_acids) for _ in range(length))
    
    return peptide

# 写入文件的函数
def write_peptide_to_file(peptide_name, peptide,length,time,path = None, filetype = 'csv'):
    
    if filetype == 'csv':
        if path is None:
            path = "./seq/random_peptide.csv"
            
        # 定义csv文件的字段名，并写入name, sequence, parent, pair四个字段  
        fieldnames = ['name','sequence', 'length', 'time']  
            
        # # 写入头部
        # with open(path, 'w', newline='') as csvfile:
        #     writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        #     writer.writeheader()
        
        # 以追加模式打开csv文件，并写入数据
        with open(path, 'a', newline='') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            # 写入蛋白序列到csv文件
            row = {  
                'name': peptide_name,
                'sequence': peptide,
                'length': length,
                'time': time
            }  
            writer.writerow(row) 
    elif filetype == 'fasta':
        if path is None:
            path = "./seq/random_peptide.fasta"
            
        # 写入fasta文件
        with open(path, 'a') as fastafile:
            fastafile.write('>' + peptide_name +'\n')
            fastafile.write(peptide + '\n')
        
    return 1

# test

time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')

for length in range(41, 60):
    for _ in range(20):
        peptide = generate_random_peptide(length)
        peptide_name = "random_peptide_" + str(length) + "_" + str(_)
        write_peptide_to_file(peptide_name, peptide, length,time, filetype='fasta')       
    
    



: 

In [13]:
# 生成纯氨基酸序列
import csv


# 写入文件的函数
def write_peptide_to_file(peptide_name, peptide,length,time,path = None):
    
    if path is None:
        path = "./seq/pure_peptide.csv"
        
    # 定义csv文件的字段名，并写入name, sequence, parent, pair四个字段  
    fieldnames = ['name','sequence', 'length']  
        
    # # 写入头部
    # with open(path, 'w', newline='') as csvfile:
    #     writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    #     writer.writeheader()
    
    # 以追加模式打开csv文件，并写入数据
    with open(path, 'a', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        # 写入蛋白序列到csv文件
        row = {  
            'name': peptide_name,
            'sequence': peptide,
            'length': length,
        }  
        writer.writerow(row) 
        
    return 1

# test


# for length in range(20, 41):
#     for acid in 'ACDEFGHIKLMNPQRSTVWY':
#         peptide = str(acid) * length
#         peptide_name = "pure_peptide_" + str(length) + "_" + acid
#         write_peptide_to_file(peptide_name, peptide, length,time)  

for acid in 'ACDEFGHIKLMNPQRSTVWY':
    for length in range(30, 51):
        peptide = str(acid) * length
        peptide_name = "pure_peptide_" + str(length) + "_" + acid
        write_peptide_to_file(peptide_name, peptide, length,time)  
        
        
        
        
        
        
        
        

In [22]:
# 用于生成无义随机序列

import datetime
import random
import csv


def generate_random_nonsense_peptide(length,para):
    
    # 定义氨基酸列表  
    amino_acids = 'ACDEFGHIKLMNPQRSTVWY' 
    
    # 输入长度的有效性检查
    if length <= 0:
        raise ValueError("蛋白序列长度必须是正整数")
    
    if length % para!= 0:
        return 0
    
    # 生成随机序列，使用join提升效率
    peptide = ''.join(random.choice(amino_acids)*para for _ in range(int(length/para)))
    
    return peptide

# 写入文件的函数
def write_peptide_to_file(peptide_name, peptide,length,para,time,path = None):
    
    if path is None:
        path = "./seq/nonsense_peptide.csv"
        
    # 定义csv文件的字段名，并写入name, sequence, parent, pair四个字段  
    fieldnames = ['name','sequence', 'length','para', 'time']  
        
    # 写入头部
    # with open(path, 'w', newline='') as csvfile:
    #     writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    #     writer.writeheader()
    
    # 以追加模式打开csv文件，并写入数据
    with open(path, 'a', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        # 写入蛋白序列到csv文件
        row = {  
            'name': peptide_name,
            'sequence': peptide,
            'length': length,
            'para': para,
            'time': time
        }  
        writer.writerow(row) 
        
    return 1

# test

time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')

for length in range(21, 60):
    for para in range(4,7):
        peptide = generate_random_nonsense_peptide(length,para)
        peptide_name = "random_peptide_" + str(length) + "_" + str(para)
        if peptide != 0:
            write_peptide_to_file(peptide_name, peptide, length,para,time) 