#**Common Task 1:** Dataset preprocessing

**Description:** Use **Sympy or Mathematica** to generate datasets of functions with their Taylor expansions up the fourth order. Tokenize the dataset.

In [25]:
import sympy as sp

x = sp.symbols('x')

functions = [sp.sin(x), sp.exp(x), sp.ln(1 + x), sp.cos(x), sp.tan(x)]

taylor_data = {}

for func in functions:
    taylor_series = sp.series(func, x, 0, 5).removeO()
    taylor_data[str(func)] = str(taylor_series)

for func, taylor in taylor_data.items():
    print(f"Function: {func}\nTaylor Expansion: {taylor}\n")

Function: sin(x)
Taylor Expansion: -x**3/6 + x

Function: exp(x)
Taylor Expansion: x**4/24 + x**3/6 + x**2/2 + x + 1

Function: log(x + 1)
Taylor Expansion: -x**4/4 + x**3/3 - x**2/2 + x

Function: cos(x)
Taylor Expansion: x**4/24 - x**2/2 + 1

Function: tan(x)
Taylor Expansion: x**3/3 + x



#**Common Task 2:** Use LSTM model

Please train an **LSTM model** to learn the Taylor expansion of each function.
You can use a deep learning algorithm of your choice (in Keras/TF or Pytorch).


In [27]:
import re
from tensorflow.keras.preprocessing.text import Tokenizer

def tokenize(expression):
    tokens = re.findall(r'\d+|\w+|[+\-*/^()]', expression)
    return tokens

tokenized_data = {func: tokenize(taylor) for func, taylor in taylor_data.items()}

text_sequences = [' '.join(tokens) for tokens in tokenized_data.values()]

tokenizer = Tokenizer(filters='', lower=False)
tokenizer.fit_on_texts(text_sequences)

sequences = tokenizer.texts_to_sequences(text_sequences)

In [28]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

max_seq_length = max(len(seq) for seq in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_seq_length, padding='post')

# X = np.array([seq[:-1] for seq in padded_sequences.values()])
# y = np.array([seq[1:] for seq in padded_sequences.values()])
X = np.array([seq[:-1] for seq in padded_sequences])
y = np.array([seq[1:] for seq in padded_sequences])


In [29]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, Bidirectional

vocab_size = len(tokenizer.word_index) + 1

model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=50, input_length=max_seq_length - 1),
    Bidirectional(LSTM(100, return_sequences=True)),
    Bidirectional(LSTM(100, return_sequences=True)),
    Dense(vocab_size, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X, y, epochs=250, verbose=1)


Epoch 1/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step - accuracy: 0.0174 - loss: 2.4873
Epoch 2/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - accuracy: 0.4435 - loss: 2.4617
Epoch 3/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - accuracy: 0.4087 - loss: 2.4337
Epoch 4/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.3913 - loss: 2.3966
Epoch 5/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - accuracy: 0.3739 - loss: 2.3437
Epoch 6/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - accuracy: 0.3565 - loss: 2.2672
Epoch 7/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.3565 - loss: 2.1578
Epoch 8/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.3565 - loss: 2.0124
Epoch 9/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

<keras.src.callbacks.history.History at 0x7b64f836ead0>

In [30]:
def evaluate_sequence_accuracy(model, X, y, tokenizer):
    total_tokens = 0
    correct_tokens = 0

    preds = model.predict(X, verbose=0)
    pred_tokens = np.argmax(preds, axis=-1)

    for true_seq, pred_seq in zip(y, pred_tokens):
        for true_token, pred_token in zip(true_seq, pred_seq):
            if true_token != 0:  # skip padding
                total_tokens += 1
                if true_token == pred_token:
                    correct_tokens += 1

    accuracy = (correct_tokens / total_tokens) * 100 if total_tokens > 0 else 0
    return accuracy

In [31]:
accuracy = evaluate_sequence_accuracy(model, X, y, tokenizer)
print(f"Sequence Prediction Accuracy: {accuracy:.2f}%")


Sequence Prediction Accuracy: 100.00%


In [32]:
def print_predicted_sequences(model, X, tokenizer):
    preds = model.predict(X, verbose=0)
    pred_token_ids = np.argmax(preds, axis=-1)

    for i, pred_ids in enumerate(pred_token_ids):
        pred_tokens = [tokenizer.index_word.get(id, '') for id in pred_ids]
        true_tokens = [tokenizer.index_word.get(id, '') for id in y[i]]

        print(f"Example {i + 1}")
        print(f"Predicted: {' '.join(pred_tokens)}")
        print(f"Actual   : {' '.join(true_tokens)}")
        print("=" * 50)

print_predicted_sequences(model, X, tokenizer)


Example 1
Predicted: x * * 3 / 6 + x               
Actual   : x * * 3 / 6 + x               
Example 2
Predicted: * * 4 / 24 + x * * 3 / 6 + x * * 2 / 2 + x + 1
Actual   : * * 4 / 24 + x * * 3 / 6 + x * * 2 / 2 + x + 1
Example 3
Predicted: x * * 4 / 4 + x * * 3 / 3 - x * * 2 / 2 + x 
Actual   : x * * 4 / 4 + x * * 3 / 3 - x * * 2 / 2 + x 
Example 4
Predicted: * * 4 / 24 - x * * 2 / 2 + 1         
Actual   : * * 4 / 24 - x * * 2 / 2 + 1         
Example 5
Predicted: * * 3 / 3 + x                
Actual   : * * 3 / 3 + x                


#**Specific Task 3:** Use Transformer model
Please train a **Transformer**  model to learn the Taylor expansion of each function.


In [36]:
import torch
from torch.utils.data import DataLoader, TensorDataset
input_sequences = [torch.tensor(seq[:-1]) for seq in padded_sequences]
target_sequences = [torch.tensor(seq[1:]) for seq in padded_sequences]

max_seq_length = max(len(seq) for seq in input_sequences)

input_tensors = []
target_tensors = []
attention_masks = []

for inp_seq, tgt_seq in zip(input_sequences, target_sequences):
    inp_padded = torch.cat([inp_seq, torch.zeros(max_seq_length - len(inp_seq), dtype=torch.long)])
    tgt_padded = torch.cat([tgt_seq, torch.zeros(max_seq_length - len(tgt_seq), dtype=torch.long)])

    attention_mask = (inp_padded != 0).long()

    input_tensors.append(inp_padded)
    target_tensors.append(tgt_padded)
    attention_masks.append(attention_mask)

input_tensors = torch.stack(input_tensors)
target_tensors = torch.stack(target_tensors)
attention_masks = torch.stack(attention_masks)

dataset = TensorDataset(input_tensors, target_tensors, attention_masks)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)


In [37]:
import torch.nn as nn
import torch.optim as optim

class TransformerModel(nn.Module):
    def __init__(self, vocab_size, d_model=64, nhead=8, num_layers=6, dim_feedforward=512, max_seq_length=50):
        super(TransformerModel, self).__init__()

        self.embedding = nn.Embedding(vocab_size, d_model)
        self.positional_encoding = nn.Parameter(torch.randn(max_seq_length, d_model))

        self.transformer = nn.Transformer(
            d_model=d_model,
            nhead=nhead,
            num_encoder_layers=num_layers,
            num_decoder_layers=num_layers,
            dim_feedforward=dim_feedforward,
            batch_first=True
        )

        self.fc_out = nn.Linear(d_model, vocab_size)

    def forward(self, src, tgt, src_mask=None, tgt_mask=None, src_key_padding_mask=None, tgt_key_padding_mask=None):
        src = self.embedding(src) + self.positional_encoding[:src.size(1), :]
        tgt = self.embedding(tgt) + self.positional_encoding[:tgt.size(1), :]

        output = self.transformer(
            src, tgt,
            src_mask=src_mask, tgt_mask=tgt_mask,
            src_key_padding_mask=src_key_padding_mask,
            tgt_key_padding_mask=tgt_key_padding_mask
        )

        return self.fc_out(output)

vocab_size = len(tokenizer.word_index) + 1
model = TransformerModel(vocab_size)

criterion = nn.CrossEntropyLoss(ignore_index=0)  # Ignore padding tokens
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [44]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def train(model, dataloader, criterion, optimizer, num_epochs=150):
    model.train()

    for epoch in range(num_epochs):
        total_loss = 0

        for src, tgt, attn_mask in dataloader:
            src, tgt, attn_mask = src.to(device), tgt.to(device), attn_mask.to(device)

            optimizer.zero_grad()
            output = model(src, tgt[:, :-1])
            loss = criterion(output.view(-1, vocab_size), tgt[:, 1:].reshape(-1))

            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch {epoch+1}, Loss: {total_loss / len(dataloader)}")

train(model, dataloader, criterion, optimizer)

Epoch 1, Loss: 0.011403001844882965
Epoch 2, Loss: 0.010902094654738903
Epoch 3, Loss: 0.011685309931635857
Epoch 4, Loss: 0.010882451198995113
Epoch 5, Loss: 0.010884811170399189
Epoch 6, Loss: 0.010684220120310783
Epoch 7, Loss: 0.010694213211536407
Epoch 8, Loss: 0.010368735529482365
Epoch 9, Loss: 0.010426792316138744
Epoch 10, Loss: 0.010088512673974037
Epoch 11, Loss: 0.010420485399663448
Epoch 12, Loss: 0.009783769957721233
Epoch 13, Loss: 0.009991672821342945
Epoch 14, Loss: 0.011203566566109657
Epoch 15, Loss: 0.010007910430431366
Epoch 16, Loss: 0.009841850027441978
Epoch 17, Loss: 0.009415388107299805
Epoch 18, Loss: 0.00949324294924736
Epoch 19, Loss: 0.009430560283362865
Epoch 20, Loss: 0.009648923762142658
Epoch 21, Loss: 0.00951122585684061
Epoch 22, Loss: 0.009432496502995491
Epoch 23, Loss: 0.009446363896131516
Epoch 24, Loss: 0.009465762414038181
Epoch 25, Loss: 0.00936500821262598
Epoch 26, Loss: 0.009392991662025452
Epoch 27, Loss: 0.009122245945036411
Epoch 28, Los

In [52]:
def evaluate_sequence_accuracy(model, dataloader, tokenizer):
    model.eval()
    total_tokens = 0
    correct_tokens = 0

    with torch.no_grad():
        for src, tgt, attn_mask in dataloader:
            src, tgt = src.to(device), tgt.to(device)
            output = model(src, tgt[:, :-1])
            predictions = output.argmax(dim=-1)

            true_seq = tgt[:, 1:]
            mask = true_seq != 0
            correct = (predictions == true_seq) & mask
            correct_tokens += correct.sum().item()
            total_tokens += mask.sum().item()

    accuracy = (correct_tokens / total_tokens) * 100 if total_tokens > 0 else 0
    return accuracy

accuracy = evaluate_sequence_accuracy(model, dataloader, tokenizer)
print(f"\n Sequence Prediction Accuracy: {accuracy:.2f}%")



 Sequence Prediction Accuracy: 100.00%
