<a href="https://colab.research.google.com/github/nvinogradskaya/DL_HW1/blob/main/Untitled2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tqdm.auto import tqdm
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding, Concatenate, LayerNormalization, Attention
from tensorflow.keras.callbacks import EarlyStopping
from geopy.distance import geodesic
import matplotlib.pyplot as plt

In [None]:
MAX_USERS = 3
SEQ_LENGTH = 10
LSTM_UNITS = 128
BATCH_SIZE = 256
EPOCHS = 5
TEST_SIZE = 0.3
EMBEDDING_DIM = 32

In [None]:
def load_data_with_stay_points(data_path):
    data = []
    user_map = {}
    user_dirs = sorted(os.listdir(data_path))[:MAX_USERS]

    for idx, user in enumerate(tqdm(user_dirs, desc="Users")):
        user_map[user] = idx
        traj_dir = os.path.join(data_path, user, 'Trajectory')
        for file in os.listdir(traj_dir):
            if file.endswith('.plt'):
                df = pd.read_csv(os.path.join(traj_dir, file), skiprows=6, header=None,
                                 usecols=[0, 1, 3, 5, 6],
                                 names=['lat', 'lon', 'alt', 'date', 'time'])
                df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'])
                df['user_id'] = idx
                df = detect_stay_points(df)
                if df is not None:
                    data.append(df)

    df_all = pd.concat(data, ignore_index=True)
    df_all = df_all[(df_all.lat != 0) & (df_all.lon != 0)].ffill()
    scaler = MinMaxScaler()
    df_all[['lat', 'lon', 'alt']] = scaler.fit_transform(df_all[['lat', 'lon', 'alt']])
    return df_all, user_map, scaler

In [None]:
def detect_stay_points(df, dist_thresh=200, time_thresh=300):
    if len(df) < 2:
        return None
    stays = []
    i = 0
    while i < len(df) - 1:
        j = i + 1
        while j < len(df):
            d = geodesic((df.iloc[i].lat, df.iloc[i].lon), (df.iloc[j].lat, df.iloc[j].lon)).meters
            t = (df.iloc[j].datetime - df.iloc[i].datetime).total_seconds()
            if d > dist_thresh:
                if t >= time_thresh:
                    mean_lat = df.iloc[i:j].lat.mean()
                    mean_lon = df.iloc[i:j].lon.mean()
                    mean_time = df.iloc[i:j].datetime.iloc[0]
                    stays.append({
                        'lat': mean_lat, 'lon': mean_lon, 'alt': df.iloc[i:j].alt.mean(),
                        'datetime': mean_time, 'user_id': df.iloc[i].user_id
                    })
                break
            j += 1
        i = j
    return pd.DataFrame(stays) if stays else None

In [None]:
def create_sequences(df, seq_length=SEQ_LENGTH):
    sequences = []
    next_coords = []
    user_ids = []
    times = []

    df = df.sort_values(['user_id', 'datetime'])
    for uid in df.user_id.unique():
        user_df = df[df.user_id == uid].reset_index(drop=True)
        for i in range(len(user_df) - seq_length):
            seq = user_df.iloc[i:i+seq_length]
            target = user_df.iloc[i+seq_length]
            sequences.append(seq[['lat', 'lon', 'alt']].values)
            next_coords.append(target[['lat', 'lon']].values)
            user_ids.append(uid)
            times.append(target['datetime'].hour)

    return np.array(sequences), np.array(next_coords), np.array(user_ids), np.array(times)

In [None]:
from tensorflow.keras.layers import Lambda

def build_deepmove_model():
    coord_input = Input(shape=(SEQ_LENGTH, 3), name='coord_input')
    user_input = Input(shape=(), dtype='int32', name='user_input')
    time_input = Input(shape=(), dtype='int32', name='time_input')

    user_emb = Embedding(input_dim=MAX_USERS, output_dim=EMBEDDING_DIM)(user_input)
    time_emb = Embedding(input_dim=24, output_dim=EMBEDDING_DIM)(time_input)

    lstm_out = LSTM(LSTM_UNITS, return_sequences=True)(coord_input)
    attention = Attention()([lstm_out, lstm_out])
    attn_sum = Lambda(lambda x: tf.reduce_mean(x, axis=1))(attention)  # <-- Фикс тут

    x = Concatenate()([attn_sum, user_emb, time_emb])
    x = Dense(128, activation='relu')(x)
    output = Dense(2)(x)

    model = Model(inputs=[coord_input, user_input, time_input], outputs=output)
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

In [None]:
def evaluate_metrics(y_true, y_pred):
    def latlon_to_meters(lat1, lon1, lat2, lon2):
        return np.array([geodesic((a, b), (c, d)).meters for a, b, c, d in zip(lat1, lon1, lat2, lon2)])

    lat1, lon1 = y_true[:, 0], y_true[:, 1]
    lat2, lon2 = y_pred[:, 0], y_pred[:, 1]
    errors = latlon_to_meters(lat1, lon1, lat2, lon2)
    ade = np.mean(errors)
    fde = errors[-1]
    pct_100m = np.mean(errors < 100) * 100
    print(f"\nADE: {ade:.2f} m | FDE: {fde:.2f} m | % < 100m: {pct_100m:.2f}%")

In [None]:
from google.colab import drive
drive.mount('/content/drive')
data_path = "/content/drive/My Drive/Colab Notebooks/Data/"

In [None]:
df, user_map, scaler = load_data_with_stay_points(data_path)

In [None]:
X, y, users, hours = create_sequences(df)

In [None]:
X_train, X_test, y_train, y_test, u_train, u_test, t_train, t_test = train_test_split(
    X, y, users, hours, test_size=TEST_SIZE, random_state=42)

In [None]:
model = build_deepmove_model()
model.summary()

In [None]:
X_train = X_train.astype(np.float32)
y_train = y_train.astype(np.float32)
u_train = u_train.astype(np.int32)
t_train = t_train.astype(np.int32)

X_test = X_test.astype(np.float32)
y_test = y_test.astype(np.float32)
u_test = u_test.astype(np.int32)
t_test = t_test.astype(np.int32)


In [None]:
model.fit(
    {'coord_input': X_train, 'user_input': u_train, 'time_input': t_train},
    y_train,
    validation_split=0.1,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=[EarlyStopping(patience=3, restore_best_weights=True)],
    verbose=2
)

In [None]:
y_pred = model.predict({'coord_input': X_test, 'user_input': u_test, 'time_input': t_test})
evaluate_metrics(y_test, y_pred)