# Импорт библиотек и загрузка файлов

In [1]:
import argparse
import requests
import json
import time
import requests

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.io as pio
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import (mean_squared_error, mean_absolute_error, confusion_matrix,
                            precision_score, recall_score, f1_score, fbeta_score)
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout, BatchNormalization, Input, Concatenate
from tensorflow.keras.optimizers import Adam

from modules.utils import download_data, txt_to_df
from modules.data_transformation import *
from modules.db_manager import (
    connect_db,
    load_df_to_db,
    table_to_dataframe,
    close_db,
    create_clean_table,
    insert_clean_data
)

In [2]:
pio.renderers.default = "notebook"

### Сейчас не нужно

In [19]:
# df = pd.read_csv("data/K562.txt", sep=",", engine="python",
#                  names=['target', 'potential_off_target', 'is_off_target']
#                  )

# df['encoded_7channels'] = df.apply(
#     lambda row: encode_7channels(
#         row['target'],
#         row['potential_off_target'],
#         pam_location="last",
#         pam_length=3
#     ).flatten(),
#     axis=1
# )

# df['mismatch_count'] = df.apply(
#     lambda row: count_mismatches(row['target'], row['potential_off_target']),
#     axis=1
# )

# df['gc_target'] = df['target'].apply(calc_gc_content)
# df['gc_off_target'] = df['potential_off_target'].apply(calc_gc_content)

In [None]:
# # Объединяем данные из эксель файла в один датафрейм, добавляя столбец cell_line (название листа в файле)
# file_path="data/4_cell_lines.xlsx"
# sheets = pd.ExcelFile(file_path).sheet_names

# df_list = []

# for sheet in sheets:
#     df = pd.read_excel(file_path, sheet_name=sheet)
#     df["cell_line"] = sheet
#     df_list.append(df)

# final_df = pd.concat(df_list, ignore_index=True)

# print(final_df.head())

# final_df.to_csv("data/4_cell_lines.csv", index=False, header=True)

In [3]:
# df = pd.read_csv("data/Table_S8_machine_learning_input.csv", sep="\t", engine="python")

In [14]:
# df1 = pd.read_csv("data/HEK293t.txt", sep=",", engine="python")
# df1.head()

### Начало

In [19]:
df4 = pd.read_csv(r"data/II4.txt", sep=",", engine="python", names=['target', 'potential_off_target', 'is_off_target'])  

In [43]:
df = pd.read_csv(r"data/K562_with_extra_features.csv", sep=",", engine="python")
df.drop(columns=df.columns[0], axis=1, inplace=True)

In [13]:
df.mismatch_count.value_counts()

mismatch_count
6    16070
5     3547
4      608
3       76
2       13
1        4
7        1
Name: count, dtype: int64

In [11]:
df[df.is_off_target == 1].mismatch_count.value_counts()

mismatch_count
4    71
3    32
2    12
1     4
7     1
Name: count, dtype: int64

In [8]:
df1 = pd.read_csv(r"data/4_cell_lines.csv")

## CNN

### embeddings

In [None]:
# # Генерируем эмбеддинги для target
# target_embeddings = generate_embeddings_v3(df, sequence_column='target', polymer_type='DNA', encoding_strategy='aptamer')
# target_embeddings.to_csv(r'data/target_embeddings.csv', header=True)

# # Генерируем эмбеддинги для potential_off_target
# off_target_embeddings = generate_embeddings_v3(df, sequence_column='potential_off_target',
#                                                polymer_type='DNA', encoding_strategy='aptamer')
# off_target_embeddings.to_csv(r'data/off_target_embeddings.csv', header=True)

# # Объединяем с исходным DataFrame
# df = df.join(target_embeddings.add_prefix('target_'), how='left')
# df = df.join(off_target_embeddings.add_prefix('off_target_'), how='left')

In [44]:
target_embeddings = pd.read_csv("data/target_embeddings.csv", index_col=0)
off_target_embeddings = pd.read_csv("data/off_target_embeddings.csv", index_col=0)

df = df.join(target_embeddings.add_prefix('target_'), how='left')
df = df.join(off_target_embeddings.add_prefix('off_target_'), how='left')

In [69]:
# Удаляем строки, где нет хотя бы одного эмбеддинга
df_cleaned = df.dropna(subset=[f'target_feature_{i}' for i in range(43)] + 
                               [f'off_target_feature_{i}' for i in range(43)]).reset_index(drop=True)

print(f"Размер после очистки: {df_cleaned.shape}")  # Должно быть (20319, 93), если всё норм

target_features = [f'target_feature_{i}' for i in range(43)]
off_target_features = [f'off_target_feature_{i}' for i in range(43)]

X_target = df_cleaned[target_features].to_numpy()
X_off_target = df_cleaned[off_target_features].to_numpy()
y = df_cleaned['is_off_target'].fillna(0).to_numpy()

# Разделяем данные
X_target_train, X_target_test, X_off_train, X_off_test, y_train, y_test = train_test_split(
    X_target, X_off_target, y, test_size=0.2, random_state=42, stratify=y
)

print(f"X_target_train shape: {X_target_train.shape}")
print(f"X_off_train shape: {X_off_train.shape}")
print(f"y_train shape: {y_train.shape}")

Размер после очистки: (20319, 93)
X_target_train shape: (16255, 43)
X_off_train shape: (16255, 43)
y_train shape: (16255,)


In [70]:
# Вход для target
input_target = Input(shape=(43,), name='target_input')

# Вход для potential_off_target
input_off_target = Input(shape=(43,), name='off_target_input')

# Полносвязные слои для target
x1 = Dense(64, activation='relu')(input_target)
x1 = Dropout(0.3)(x1)

# Полносвязные слои для potential_off_target
x2 = Dense(64, activation='relu')(input_off_target)
x2 = Dropout(0.3)(x2)

# Объединяем два входа
merged = Concatenate()([x1, x2])
x = Dense(128, activation='relu')(merged)
x = Dropout(0.3)(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.3)(x)

# Выходной слой (теперь он правильно связан с моделью)
output = Dense(1, activation='sigmoid', name='output')(x)

# Определяем модель
model = Model(inputs=[input_target, input_off_target], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Вывод структуры модели
model.summary()

In [74]:
# Преобразуем список классов в numpy массив
class_weights = compute_class_weight(class_weight='balanced', classes=np.array([0, 1]), y=y_train)
class_weight_dict = {i: class_weights[i] for i in range(len(class_weights))}

print(f"Class Weights: {class_weight_dict}")  # Посмотрим, какие веса

# Обучаем с учетом весов
history = model.fit(
    [X_target_train, X_off_train], y_train, 
    epochs=20, batch_size=32, validation_split=0.2, 
    class_weight=class_weight_dict, verbose=1
)

Class Weights: {0: np.float64(0.502970480846587), 1: np.float64(84.66145833333333)}
Epoch 1/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 11ms/step - accuracy: 0.7464 - loss: 5.6945 - val_accuracy: 0.0920 - val_loss: 0.8014
Epoch 2/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.5471 - loss: 1.1978 - val_accuracy: 0.9945 - val_loss: 0.3656
Epoch 3/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - accuracy: 0.5855 - loss: 1.1828 - val_accuracy: 0.0584 - val_loss: 0.8825
Epoch 4/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13ms/step - accuracy: 0.2962 - loss: 0.7089 - val_accuracy: 0.0821 - val_loss: 0.7433
Epoch 5/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.3202 - loss: 0.8664 - val_accuracy: 0.9923 - val_loss: 0.6414
Epoch 6/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.3

In [75]:
results_df, annotated_cm = evaluate_model(model, [X_target_test, X_off_test], y_test, beta=1, threshold=0.4)

[1m127/127[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.0813 - loss: 0.7434
Test Loss: 0.74396
Test Accuracy: 0.07899
[1m127/127[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step

Metrics:
Precision: 0.00635
Recall: 1.00000
F1-score: 0.01263
F-beta (1): 0.01263
Threshold used: 0.4

Results DataFrame (first 5 rows):
   y_test  y_pred_proba  y_pred prediction_is_true
0       0      0.551689       1                 No
1       0      0.551689       1                 No
2       0      0.551689       1                 No
3       0      0.551689       1                 No
4       0      0.551689       1                 No

Annotated Confusion Matrix:
                       Predicted No (0)          Predicted Yes (1)
Actual No (0)   TN (True Negative): 287  FP (False Positive): 3753
Actual Yes (1)   FN (False Negative): 0     TP (True Positive): 24


### encoded_7channels + 3 extra

In [58]:
# Входы
sequence_input = Input(shape=(23, 7), name='sequence_input')  # N = длина последовательности
additional_input = Input(shape=(3,), name='additional_input')  # gc_content, mismatch_count

# CNN для последовательностей
x = Conv1D(32, kernel_size=3, activation='relu')(sequence_input)
x = Flatten()(x)

# Полносвязный слой для дополнительных признаков
w = Dense(16, activation='relu')(additional_input)

# Объединяем
combined = Concatenate()([x, w])
z = Dense(64, activation='relu')(combined)
output = Dense(1, activation='sigmoid')(z)

# Финальная модель
model = tf.keras.Model(inputs=[sequence_input, additional_input], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [64]:
X_seq_train, X_seq_test, X_add_train, X_add_test, y_train, y_test = train_test_split(
    X_sequences, X_additional, y, test_size=0.2, random_state=42
)

# Обучение модели на тренировочных данных
model.fit(
    [X_seq_train, X_add_train],
    y_train,
    epochs=20,
    batch_size=32,
    validation_split=0.2,
    verbose=1
)

Epoch 1/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9993 - loss: 0.0044 - val_accuracy: 0.9975 - val_loss: 0.0131
Epoch 2/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9996 - loss: 0.0011 - val_accuracy: 0.9982 - val_loss: 0.0054
Epoch 3/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 1.0000 - loss: 1.4825e-04 - val_accuracy: 0.9988 - val_loss: 0.0073
Epoch 4/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 1.0000 - loss: 7.2149e-05 - val_accuracy: 0.9978 - val_loss: 0.0092
Epoch 5/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 1.0000 - loss: 5.4437e-05 - val_accuracy: 0.9978 - val_loss: 0.0090
Epoch 6/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 1.0000 - loss: 2.3947e-05 - val_accuracy: 0.9978 - val_loss: 0.0100
Epoch 7/20


In [66]:
# Оценка модели на тестовых данных
results_df, annotated_cm = evaluate_model(model, [X_seq_test, X_add_test], y_test)

[1m127/127[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9988 - loss: 0.0038
Test Loss: 0.00235
Test Accuracy: 0.99926
[1m127/127[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

Results DataFrame (first 5 rows):
   y_test  y_pred prediction_is_true
0       0       0                Yes
1       0       0                Yes
2       0       0                Yes
3       0       0                Yes
4       0       0                Yes

Annotated Confusion Matrix:
                        Predicted No (0)       Predicted Yes (1)
Actual No (0)   TN (True Negative): 4040  FP (False Positive): 0
Actual Yes (1)    FN (False Negative): 3  TP (True Positive): 21


### Only encoded_7channels

In [7]:
# Преобразуем данные
X = np.array([np.reshape(ch, (7, -1)).T for ch in df['encoded_7channels']])  # Преобразуем flattened массивы обратно в матрицы 7xN
y = np.array(df['is_off_target'])  # Бинарная целевая переменная

# Разделение на тренировочный и тестовый наборы
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Определяем параметры входа
input_shape = X_train.shape[1:]  # (длина последовательности, 7 каналов)

In [14]:
unique, frequency = np.unique(y_test, return_counts = True)

print("Unique Values:", unique)
print("Frequency Values:", frequency)

Unique Values: [0 1]
Frequency Values: [4040   24]


In [12]:
# Создаем модель CNN
model = Sequential([
    Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=input_shape),
    BatchNormalization(),
    Dropout(0.3),
    Conv1D(filters=64, kernel_size=3, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')  # Для бинарной классификации
])

# Компилируем модель
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Обучаем модель
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2, verbose=1)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.9826 - loss: 0.0606 - val_accuracy: 0.9938 - val_loss: 0.0313
Epoch 2/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9953 - loss: 0.0146 - val_accuracy: 0.9945 - val_loss: 0.0147
Epoch 3/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9962 - loss: 0.0099 - val_accuracy: 0.9942 - val_loss: 0.0174
Epoch 4/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9971 - loss: 0.0082 - val_accuracy: 0.9945 - val_loss: 0.0149
Epoch 5/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9973 - loss: 0.0065 - val_accuracy: 0.9945 - val_loss: 0.0142
Epoch 6/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9975 - loss: 0.0068 - val_accuracy: 0.9938 - val_loss: 0.0148
Epoch 7/20
[1m407/407[0m 

In [23]:
results_df, annotated_cm = evaluate_model(model, [X_seq_test, X_add_test], y_test)

Unnamed: 0,Predicted No (0),Predicted Yes (1)
Actual No (0),TN (True Negative): 4035,FP (False Positive): 5
Actual Yes (1),FN (False Negative): 5,TP (True Positive): 19


Нужно свести к минимуму FN

## Функции

In [5]:
def generate_embeddings_v3(df, sequence_column, polymer_type='DNA', encoding_strategy='aptamer', batch_size=80):
    """
    Генерирует эмбеддинги для последовательностей из указанного столбца DataFrame.
    Сохраняет индексы для последующего объединения.

    :param df: Исходный DataFrame с последовательностями.
    :param sequence_column: Название столбца с последовательностями.
    :param polymer_type: Тип полимера ('DNA' для последовательностей ATGC).
    :param encoding_strategy: Стратегия кодирования ('aptamer').
    :param batch_size: Количество последовательностей в одном запросе.
    :return: DataFrame с эмбеддингами, индексами, совпадающими с исходным DataFrame.
    """
    headers = {
        'accept': 'application/json',
        'Content-Type': 'application/json',
    }

    # Исходные последовательности + сохранение их индексов
    sequences = df[sequence_column].tolist()
    indices = df.index.tolist()  # Сохраняем индексы для правильного присоединения

    # Разбиваем на батчи по 80 последовательностей
    several_id_lists = np.array_split(np.asarray(sequences), int(len(sequences) / batch_size) + 1)
    index_splits = np.array_split(np.asarray(indices), int(len(sequences) / batch_size) + 1)

    embeddings = {}

    for i, (batch, index_batch) in enumerate(zip(several_id_lists, index_splits)):
        print(f"Обрабатываем батч {i + 1} из {len(several_id_lists)}...")
        params = {
            'sequences': ', '.join(list(batch)),
            'polymer_type': polymer_type,
            'encoding_strategy': encoding_strategy,
            'skip_unprocessable': 'true',
        }
        try:
            # Отправляем запрос
            response = requests.post('https://ai-chemistry.itmo.ru/api/encode_sequence', params=params, headers=headers)
            response.raise_for_status()

            # Преобразуем ответ в JSON
            data = json.loads(response.content)
            
            # Записываем эмбеддинги в словарь с сохранением индексов
            for seq, idx in zip(batch, index_batch):
                if seq in data:
                    embeddings[idx] = data[seq]
                else:
                    embeddings[idx] = None  # Если последовательность не обработалась, ставим None

        except requests.exceptions.RequestException as e:
            print(f"Ошибка при обработке батча {i + 1}: {e}")

        # Задержка для предотвращения перегрузки API
        time.sleep(4)

    # Преобразуем словарь в DataFrame
    embeddings_df = pd.DataFrame.from_dict(embeddings, orient='index')

    # Добавляем имена столбцов
    embeddings_df.columns = [f"feature_{i}" for i in range(embeddings_df.shape[1])]

    # Убеждаемся, что индексы соответствуют исходному DataFrame
    embeddings_df = embeddings_df.reindex(df.index)

    return embeddings_df

In [6]:
def evaluate_model(model, X_test, y_test, beta=2, threshold=0.5):
    """
    Оценивает Keras модель и выводит результаты: accuracy, confusion matrix, precision, recall, F1-score и F-beta score.

    :param model: Keras Model
    :param X_test: Тестовые данные (массив или список массивов для многовходных моделей)
    :param y_test: Истинные значения
    :param beta: Значение beta для F-beta score (по умолчанию 2)
    :param threshold: Порог классификации (по умолчанию 0.5)
    :return: Tuple (results_df, annotated_cm)
    """
    # Проверка размерностей данных
    if isinstance(X_test, list):
        for i, x in enumerate(X_test):
            if len(x) != len(y_test):
                raise ValueError(
                    f"Размер входного массива X_test[{i}] ({len(x)}) не совпадает с размером y_test ({len(y_test)})."
                )
    else:
        if len(X_test) != len(y_test):
            raise ValueError(
                f"Размер входного массива X_test ({len(X_test)}) не совпадает с размером y_test ({len(y_test)})."
            )

    # Оцениваем модель
    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
    print(f"Test Loss: {test_loss:.5f}")
    print(f"Test Accuracy: {test_accuracy:.5f}")

    # Предсказания вероятностей
    y_pred_proba = model.predict(X_test)

    # Применяем порог классификации
    y_pred = (y_pred_proba > threshold).astype(int).flatten()

    # Вычисление метрик
    precision = precision_score(y_test, y_pred, zero_division=0)
    recall = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)
    f_beta = fbeta_score(y_test, y_pred, beta=beta, zero_division=0)

    print("\nMetrics:")
    print(f"Precision: {precision:.5f}")
    print(f"Recall: {recall:.5f}")
    print(f"F1-score: {f1:.5f}")
    print(f"F-beta ({beta}): {f_beta:.5f}")
    print(f"Threshold used: {threshold}")

    # Создаем DataFrame с результатами
    results_df = pd.DataFrame({
        'y_test': y_test,
        'y_pred_proba': y_pred_proba.flatten(),  # Добавляем вероятности
        'y_pred': y_pred,
    })
    results_df['prediction_is_true'] = results_df.apply(
        lambda row: 'Yes' if row['y_test'] == row['y_pred'] else 'No',
        axis=1
    )

    # Выводим первые строки DataFrame
    print("\nResults DataFrame (first 5 rows):")
    print(results_df.head())

    # Confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    cm_df = pd.DataFrame(
        cm, 
        index=['Actual No (0)', 'Actual Yes (1)'], 
        columns=['Predicted No (0)', 'Predicted Yes (1)']
    )

    # Аннотированная confusion matrix
    annotations = [
        ['TN (True Negative)', 'FP (False Positive)'],
        ['FN (False Negative)', 'TP (True Positive)']
    ]
    annotated_cm = cm_df.astype(str)
    for i, row in enumerate(cm_df.index):
        for j, col in enumerate(cm_df.columns):
            annotated_cm.loc[row, col] = f"{annotations[i][j]}: {cm[i, j]}"

    # Выводим annotated confusion matrix
    print("\nAnnotated Confusion Matrix:")
    print(annotated_cm)

    return results_df, annotated_cm

In [7]:
def train_and_evaluate_model(df: pd.DataFrame, encoding_function, model) -> None:
    """
    Обучает модель с использованием заданной функции кодирования и выводит R^2, MSE, предсказания и тестовые значения.

    :param df: Датафрейм с колонками genome input, sgRNA input и mean relative gamma
    :param encoding_function: Функция кодирования для использования
    """
    # Кодирование данных
    df['encoded'] = df.apply(
        lambda row: encoding_function(row['genome input'], row['sgRNA input']).flatten(),
        axis=1
    )

    # Подготовка данных для обучения
    X = np.vstack(df['encoded'].values)  # Преобразование списка массивов в 2D массив
    y = df['mean relative gamma']

    # Разделяем данные на обучающую и тестовую выборки
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Обучение модели
    model.fit(X_train, y_train)

    # Предсказания
    y_pred = model.predict(X_test)

    # Вычисление метрик
    r2_score = model.score(X_test, y_test)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)

    # Вывод результатов
    print(f"R^2 Score: {r2_score:.4f}")
    print(f"Mean Squared Error: {mse:.4f}")
    print(f"Mean Absolute Error: {mae:.4f}")
    print("Predictions vs Actual:")
    for pred, actual in zip(y_pred, y_test):
        print(f"Predicted: {pred:.4f}, Actual: {actual:.4f}")