一、Python 程序设计：21 点（Blackjack）游戏

In [68]:
import random
import re


class Card:
    def __init__(self, rank):
        self.rank = rank
    
    def value(self):
        if self.rank == "A":
            return 11
        elif self.rank in ["J", "Q", "K"]:
            return 10
        else:
            return int(self.rank)
    
    def __str__(self):
        return self.rank

In [69]:


class Cards:
    def __init__(self):
        cards = ["A", "2", "3", "4", "5", "6", "7", "8", "9", "10", "J", "Q", "K"]
        self.cards = cards * 4
        
    def shuffle(self):
        random.shuffle(self.cards)
        
    def get_card(self):
        return Card(self.cards.pop())

In [70]:
class Hand:
    def __init__(self):
        self.cards = []
        
    def add_card(self, card):
        self.cards.append(card)
        
    def get_value(self):
        value = sum(card.value() for card in self.cards)
        count_A = sum(1 for card in self.cards if card.rank == "A")
        while value > 21 and count_A > 0:
            value -= 10
            count_A -= 1
        return value
    
    def blackjack(self):
        return len(self.cards) == 2 and self.get_value() == 21
    
    def if_bust(self):
        return self.get_value() > 21
    
    def __str__(self):
         return " ".join(str(card) for card in self.cards)


In [71]:
class Player:
    def __init__(self, name, cards):
        self.name = name
        self.hand = Hand()
        self.cards = cards
    
    def hit (self):
        self.hand.add_card(self.cards.get_card())
        print(f"{self.name} 的牌是：{self.hand}")
        
    def stand(self):
        print(f"{self.name} 停牌")

In [72]:
class Computer:
    def __init__(self, cards):
        self.hand = Hand()
        self.name = "COM"
        self.cards = cards
        
    def turn(self):
        while self.hand.get_value() < 17:
            self.hit()
            
    def hit(self):
        self.hand.add_card(self.cards.get_card())
        print(f"{self.name} 的牌是：{self.hand}")
        

In [74]:
cards = Cards()
cards.shuffle()
player = Player("玩家", cards)
computer = Computer(cards)
player.hit()  
computer.hit()  
player.hit()  
computer.hit()  

if player.hand.blackjack() and not computer.hand.blackjack():
    print(f"{player.name} 21 点")
elif computer.hand.blackjack() and not player.hand.blackjack():
    print(f"{computer.name} 21 点")
elif player.hand.blackjack() and computer.hand.blackjack():
    print("平局")
else:
    while True:
        choice = input("请选择：1. 停牌 2. 继续")
        if choice == "1":
            break
        elif choice == "2":
            player.hit()
        else:
            print("输入错误")
    
    computer.turn()
    print(f"{player.name} 的牌是：{player.hand}")
    print(f"{computer.name} 的牌是：{computer.hand}")
    
    if player.hand.if_bust():
        print(f"{player.name} 爆牌")
    elif computer.hand.if_bust():
        print(f"{computer.name} 爆牌")
    elif player.hand.get_value() > computer.hand.get_value():
        print(f"{player.name} 胜")
    elif player.hand.get_value() < computer.hand.get_value():
        print(f"{computer.name} 胜")


玩家 的牌是：2
COM 的牌是：A
玩家 的牌是：2 4
COM 的牌是：A 3
玩家 的牌是：2 4 A
玩家 的牌是：2 4 A 5
玩家 的牌是：2 4 A 5 3
玩家 的牌是：2 4 A 5 3 J
COM 的牌是：A 3 6
玩家 的牌是：2 4 A 5 3 J
COM 的牌是：A 3 6
玩家 爆牌


二、词元化 + 文本序列预测

In [113]:
from collections import Counter


class WordTokenizer:
    def __init__(self, max_tokens=2000):
        self.max_tokens = max_tokens
        self.word_to_idx = {}
        self.idx_to_word = {}
        self.vocab_size = 0
    def fit(self, texts):
        processed_texts = [text.replace("\n", "<eos>") for text in texts]
        words = " ".join(processed_texts).split()
        word_count = Counter(words)
        most_common_words = word_count.most_common(self.max_tokens - 2)
        self.word_to_idx = {'<unk>' : 0, '<pad>' : 1}
        self.idx_to_word = {0 : '<unk>', 1 : '<pad>'}
        for i, word in enumerate(most_common_words):
            self.word_to_idx[word[0]] = i + 2
            self.idx_to_word[i + 2] = word[0]
        self.vocab_size = len(self.word_to_idx)
    def encode(self, text):
        processed_text = text.replace("\n", "<eos>")
        tokens = [self.word_to_idx[word] for word in processed_text.split()]
        return tokens
    def decode(self, index):
        return ' '.join([self.idx_to_word.get(id, '<unk>') for id in index])

In [114]:
import tensorflow as tf
def to_sequence_dataset(sequence, length, shuffle=False, seed=None, batch_size=32):
    ds = tf.data.Dataset.from_tensor_slices(sequence)
    ds = ds.window(length + 1, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda window_ds: window_ds.batch(length + 1))
    
    if shuffle:
        ds = ds.shuffle(buffer_size=100_000, seed=seed)
    ds = ds.batch(batch_size)
    return ds.map(lambda window: (window[:, :-1], window[:, -1])).prefetch(1)

In [115]:
import tensorflow as tf
def build_model(vocab_size, embedding_dim=100, sequence_length=5, lstm_units=128):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=sequence_length),
        tf.keras.layers.LSTM(lstm_units, return_sequences=False),
        tf.keras.layers.Dense(vocab_size, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [116]:
import numpy as np
with open("corpus.txt", "r", encoding="utf-8") as f:
    texts = f.read()

training_texts = texts.strip().split('\n')
tokenizer = WordTokenizer()
tokenizer.fit(training_texts)
encoded_sequence = []
for text in training_texts:
    encoded_sequence.extend(tokenizer.encode(text))

encoded_array = np.array(encoded_sequence)
train_ds = to_sequence_dataset(encoded_array, length=4, shuffle=True, seed=42)

In [123]:
model = build_model(tokenizer.vocab_size)
model.fit(train_ds, epochs=100)

Epoch 1/100




[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.0000e+00 - loss: 4.3177
Epoch 2/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.1545 - loss: 4.3052 
Epoch 3/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.2033 - loss: 4.2935 
Epoch 4/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.1707 - loss: 4.2808 
Epoch 5/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1789 - loss: 4.2642 
Epoch 6/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1626 - loss: 4.2433 
Epoch 7/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.1382 - loss: 4.2132 




Epoch 8/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1301 - loss: 4.1692 
Epoch 9/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.1220 - loss: 4.1040 
Epoch 10/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.1138 - loss: 3.9907 
Epoch 11/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1057 - loss: 3.8820 
Epoch 12/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1057 - loss: 3.8204 
Epoch 13/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1301 - loss: 3.7285 
Epoch 14/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.1707 - loss: 3.6141 
Epoch 15/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1626 - loss: 3.5092 
Epoch 16/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x2035d7f7e60>

In [124]:
prefix = input("请输入前缀：")
encoded_prefix = tokenizer.encode(prefix)
sequence_length = 4
if len(encoded_prefix) >= sequence_length:
    input_seq = encoded_prefix[-sequence_length:]
else:
    input_seq = [1] * (sequence_length - len(encoded_prefix)) + encoded_prefix

input_array = np.array([input_seq])

predictions = model.predict(input_array, verbose=0)
predicted_idx = np.argmax(predictions[0])

next_word = tokenizer.idx_to_word.get(predicted_idx, '<unk>')
prefix, next_word

('深度 学习', '学习')

In [125]:
num_words = 5
generated_text = prefix.split()
current_prefix = prefix

for _ in range(num_words):
    encoded_current = tokenizer.encode(current_prefix)
    
    if len(encoded_current) >= sequence_length:
        input_seq = encoded_current[-sequence_length:]
    else:
        input_seq = [1] * (sequence_length - len(encoded_current)) + encoded_current
        
    input_array = np.array([input_seq])
    predictions = model.predict(input_array, verbose=0)
    predicted_idx = np.argmax(predictions[0])
    next_word = tokenizer.idx_to_word.get(predicted_idx, '<unk>')
    
    generated_text.append(next_word)
    
    current_prefix = ' '.join(generated_text[-sequence_length:])

final_generated_text = ' '.join(generated_text)
final_generated_text


'深度 学习 学习 改变 改变 世界 学习'