In [2]:
# 分词练习
from nltk.tokenize import word_tokenize,sent_tokenize,WordPunctTokenizer

text = """
"How was your day at the Canton Fair?" "I'm about to reach a deal with my Chinese supplier of iceboxes. Good quality." "I'm also looking for iceboxes. Can you share the name and the position of the booth?" "Sure."
The dialogue was heard on the metro, instead of the Canton Fair complex, between buyers from Canada and Nigeria attending the 137th Canton Fair, held in Guangzhou, South China's Guangdong Province.
It started after the buyers helped each other board the packed metro train at Xingangdong Station near the Canton Fair complex during peak exit hours on Thursday.
"""

# Tokenize the text into words
words = word_tokenize(text)
print(words)

# Tokenize the text into sentences
sentences = sent_tokenize(text)
print(sentences)

# Tokenize the text into words using WordPunctTokenizer
tokenizer = WordPunctTokenizer()
words = tokenizer.tokenize(text)
print(words)

["''", 'How', 'was', 'your', 'day', 'at', 'the', 'Canton', 'Fair', '?', "''", '``', 'I', "'m", 'about', 'to', 'reach', 'a', 'deal', 'with', 'my', 'Chinese', 'supplier', 'of', 'iceboxes', '.', 'Good', 'quality', '.', "''", '``', 'I', "'m", 'also', 'looking', 'for', 'iceboxes', '.', 'Can', 'you', 'share', 'the', 'name', 'and', 'the', 'position', 'of', 'the', 'booth', '?', "''", '``', 'Sure', '.', "''", 'The', 'dialogue', 'was', 'heard', 'on', 'the', 'metro', ',', 'instead', 'of', 'the', 'Canton', 'Fair', 'complex', ',', 'between', 'buyers', 'from', 'Canada', 'and', 'Nigeria', 'attending', 'the', '137th', 'Canton', 'Fair', ',', 'held', 'in', 'Guangzhou', ',', 'South', 'China', "'s", 'Guangdong', 'Province', '.', 'It', 'started', 'after', 'the', 'buyers', 'helped', 'each', 'other', 'board', 'the', 'packed', 'metro', 'train', 'at', 'Xingangdong', 'Station', 'near', 'the', 'Canton', 'Fair', 'complex', 'during', 'peak', 'exit', 'hours', 'on', 'Thursday', '.']
['\n"How was your day at the Cant

In [13]:
import random
import numpy as np

# 遗传算法
POP_SIZE = 100  # 种群数量
GEN_SIZE = 32  # 基因长度
CROSS_RATE = 0.8  # 交叉概率
MUTATE_RATE = 0.01  # 变异概率
N_ITER = 100  # 迭代次数
BOUND = [-10, 10]  # 变量取值范围


# 定义目标函数
def get_fitness(x):
    return -x ** 2 + 16 * x - 10

def translate_pos(pop):
    value = pop.dot(2**np.arange(GEN_SIZE)[::-1]) / float(2**GEN_SIZE-1) * (BOUND[1] - BOUND[0]) + BOUND[0]
    return value

def fitness_pop(pop):
    return get_fitness(translate_pos(pop))

# 初始化种群
def init_population(pop_size=POP_SIZE, gen_size=GEN_SIZE):
    pop = np.random.randint(2, size=(pop_size, gen_size))
    return pop

# 选择函数，轮盘赌选择
def  select(pops):
    fitness = fitness_pop(pops)
    fitness = fitness - fitness.min() + 1e-6  # 防止出现负值
    fitness = fitness / fitness.sum()
    idx = np.random.choice(np.arange(POP_SIZE), size=POP_SIZE, replace=True, p=fitness)
    return pops[idx]


# 交叉函数，单点交叉
def crossover(parent1, parent2):
    cross_points = np.random.randint(low=0, high=GEN_SIZE)
    child = parent1
    child[cross_points:] = parent2[cross_points:]
    return child

# 变异函数，随机变异
def mutate(individual):
    if np.random.rand() < MUTATE_RATE:
        mutate_point = np.random.randint(0,GEN_SIZE)
        individual[mutate_point] = individual[mutate_point] ^ 1

# 进化函数，迭代进化
def evolve(population):
    new_population = []
    for farther in population:
        child = farther
        if np.random.rand() < CROSS_RATE:
            mother = population[np.random.randint(POP_SIZE)]
            child = crossover(farther, mother)
        mutate(child)
        new_population.append(child)

    new_population = select(np.array(new_population))

    return new_population

# 主函数
def main():
    population = init_population()
    for i in range(N_ITER):
        population = evolve(population)
        fitness = fitness_pop(population)
        best_idx = np.argmax(fitness)
        best_pos = translate_pos(population[best_idx])
        print(f"第{i+1}代最优解：{best_pos}, 目标函数值：{fitness[best_idx]}")

In [14]:
main()

第1代最优解：-0.25394454837170066, 目标函数值：-14.127600607594918
第2代最优解：-0.8806017625333276, 目标函数值：-24.865087664710046
第3代最优解：-0.8820400784914479, 目标函数值：-24.890635955928367
第4代最优解：-0.8817935248096003, 目标函数值：-24.886256217349743
第5代最优解：-0.8109927435431157, 目标函数值：-23.63359312676944
第6代最优解：-0.6169793919234028, 目标函数值：-20.252333840832616
第7代最优解：-0.6169793872667899, 目标函数值：-20.25233376058074
第8代最优解：-0.6169794803990474, 目标函数值：-20.252335365618237
第9代最优解：-0.616979382610177, 目标函数值：-20.252333680328867
第10代最优解：-0.4984936980759951, 目标函数值：-18.224395136237405
第11代最优解：-0.4984909180781081, 目标函数值：-18.224347884656083
第12代最优解：-0.6169793919234028, 目标函数值：-20.252333840832616
第13代最优解：-0.05535769044779215, 目标函数值：-10.888787521056388
第14代最优解：-0.05535769044779215, 目标函数值：-10.888787521056388
第15代最优解：-0.05470805569894388, 目标函数值：-10.87832186254146
第16代最优解：-0.05196152954640887, 目标函数值：-10.834084473295345
第17代最优解：-0.03981402843255033, 目标函数值：-10.638609611780833
第18代最优解：-0.0312095205372227, 目标函数值：-10.500326362767726
第19代最优解：-0.039794