In [16]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense
from datetime import datetime, timedelta
import random
import tensorflow as tf
import os


In [17]:
from tensorflow.keras.layers import Input, Dense, Dropout, LayerNormalization, GlobalAveragePooling1D, Reshape, Add
from tensorflow.keras.models import Model


In [18]:
# Load the dataset
df = pd.read_csv('netflix_single_user_balanced_pattern_v5.csv')


In [19]:
df.head(2)

Unnamed: 0,user,timestamp,mac_address,service_group,service_name,device_type,usage_minutes,usage_percentage,signal_strength,packet_loss_rate,latency,jitter_ms,traffic_spike,bandwidth_speed_per_sec_mbps,buffer_occupancy
0,user1,2023-01-01 00:00:00,02:00:00:3e:60:f7,Social Media,LinkedIn,Mobile,6,10.0,-46.89,0.9104,19.78,7.3,0,6.15,0.4301
1,user1,2023-01-01 00:15:00,02:00:00:ea:f9:7c,Streaming,Prime Video,TV,26,43.333333,-45.01,0.298,47.07,2.19,1,10.49,0.3375


In [20]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

def getWorkHours(hour, is_weekend):
    return 9 <= hour <= 17 and not is_weekend

def getTimeBlock(hour):
    if hour >= 0 and hour < 12:
        return "morning"
    elif hour >= 12 and hour < 17:
        return "afternoon"
    elif hour >= 17 and hour < 24:
        return "night"

def getMeetingHours(hour, is_weekend):
    return hour in [9, 10, 14, 15] and not is_weekend

def getLunchHours(hour):
    return hour in [12, 13]

def getEveningHour(hour):
    return 18 <= hour <= 22

def formatServiceGroup(service):
    if service == "Gaming":
        return "gaming"
    elif service == "Social Media":
        return "social_media"
    elif service == "Software":
        return "software"
    elif service == "Shopping":
        return "shopping"
    elif service == "Streaming":
        return "streaming"
    else:
        return service


In [21]:
# # Time-based features
# df['month'] = df['timestamp'].dt.month
# df['day_of_week'] = df['timestamp'].dt.dayofweek.astype('float32')
# df['hour'] = df['timestamp'].dt.hour.astype('float32')
# df['is_weekend'] = df['timestamp'].dt.dayofweek >= 5
# df['is_business_hours'] = (df['timestamp'].dt.hour >= 9) & (df['timestamp'].dt.hour <= 17)
# df['week_number'] = df['timestamp'].dt.isocalendar().week
# # df['time_block'] = df['hour'].apply(getTimeBlock)
# df['is_work_hour'] = df.apply(lambda row: getWorkHours(row['hour'], row['is_weekend']), axis=1)
# df['is_meeting_hour'] = df.apply(lambda row: getMeetingHours(row['hour'], row['is_weekend']), axis=1)
# df['is_lunch_hour'] = df['hour'].apply(getLunchHours)
# df['is_evening'] = df['hour'].apply(getEveningHour)
# df['service_group'] = df['service_group'].apply(formatServiceGroup)

# # Ensure timestamp is sorted
# df = df.sort_values('timestamp').reset_index(drop=True)
# df.set_index('timestamp', inplace=True)

# df_pivot = pd.pivot_table(df, values='usage_minutes', index=['timestamp', 'device_type', 'hour','day_of_week','is_weekend', 'is_work_hour', 'is_meeting_hour', 'is_lunch_hour', 'is_evening'], 
#                 columns='service_group', fill_value=0).reset_index()

# dominant_service = df['service_group'].unique()
# df_pivot['dominant_service'] = df[dominant_service].idxmax(axis=1)

# df_pivot.rename(columns={'gaming': 'gaming_usage'}, inplace=True)
# df_pivot.rename(columns={'shopping': 'shopping_usage'}, inplace=True)
# df_pivot.rename(columns={'social_media': 'social_media_usage'}, inplace=True)
# df_pivot.rename(columns={'streaming': 'streaming_usage'}, inplace=True)
# df_pivot.rename(columns={'software': 'software_usage'}, inplace=True)

# df_pivot.head(5)


In [22]:
import pandas as pd

# Ensure the 'timestamp' column is in datetime format
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')

# Check for and drop rows with invalid timestamps (optional)
if df['timestamp'].isna().any():
    print("Invalid timestamps detected. Dropping rows with invalid timestamps.")
    df = df.dropna(subset=['timestamp'])

# Time-based features
df['month'] = df['timestamp'].dt.month
df['day_of_week'] = df['timestamp'].dt.dayofweek.astype('float32')
df['hour'] = df['timestamp'].dt.hour.astype('float32')
df['is_weekend'] = df['timestamp'].dt.dayofweek >= 5
df['is_business_hours'] = (df['timestamp'].dt.hour >= 9) & (df['timestamp'].dt.hour <= 17)
df['week_number'] = df['timestamp'].dt.isocalendar().week

# Apply time-based custom functions
df['is_work_hour'] = df.apply(lambda row: getWorkHours(row['hour'], row['is_weekend']), axis=1)
df['is_meeting_hour'] = df.apply(lambda row: getMeetingHours(row['hour'], row['is_weekend']), axis=1)
df['is_lunch_hour'] = df['hour'].apply(getLunchHours)
df['is_evening'] = df['hour'].apply(getEveningHour)

# Ensure 'service_group' column exists before applying formatting
if 'service_group' in df.columns:
    df['service_group'] = df['service_group'].apply(formatServiceGroup)
else:
    raise ValueError("The 'service_group' column is missing from the DataFrame.")

# Ensure timestamp is sorted
df = df.sort_values('timestamp').reset_index(drop=True)
df.set_index('timestamp', inplace=True)

# Create pivot table
df_pivot = pd.pivot_table(
    df,
    values='usage_minutes',
    index=['timestamp', 'device_type', 'hour', 'day_of_week', 'is_weekend', 'is_work_hour', 'is_meeting_hour', 'is_lunch_hour', 'is_evening'],
    columns='service_group',
    fill_value=0
).reset_index()

# Ensure the service_group column is handled correctly for dominant_service
if 'service_group' in df.columns:
    unique_services = df['service_group'].unique()
    df_pivot['dominant_service'] = df_pivot[unique_services].idxmax(axis=1)
else:
    raise ValueError("The 'service_group' column is missing or incorrectly processed.")

# Rename columns for clarity
df_pivot.rename(columns={
    'gaming': 'gaming_usage',
    'shopping': 'shopping_usage',
    'social_media': 'social_media_usage',
    'streaming': 'streaming_usage',
    'software': 'software_usage'
}, inplace=True)

# Display the first 5 rows
print(df_pivot.head(5))


service_group           timestamp device_type  hour  day_of_week  is_weekend  \
0             2023-01-01 00:00:00      Mobile   0.0          6.0        True   
1             2023-01-01 00:15:00          TV   0.0          6.0        True   
2             2023-01-01 00:30:00          TV   0.0          6.0        True   
3             2023-01-01 00:45:00      Tablet   0.0          6.0        True   
4             2023-01-01 01:00:00      Tablet   1.0          6.0        True   

service_group  is_work_hour  is_meeting_hour  is_lunch_hour  is_evening  \
0                     False            False          False       False   
1                     False            False          False       False   
2                     False            False          False       False   
3                     False            False          False       False   
4                     False            False          False       False   

service_group  gaming_usage  shopping_usage  social_media_usage  \
0

In [23]:
df.head(4)

Unnamed: 0_level_0,user,mac_address,service_group,service_name,device_type,usage_minutes,usage_percentage,signal_strength,packet_loss_rate,latency,jitter_ms,traffic_spike,bandwidth_speed_per_sec_mbps,buffer_occupancy,month,day_of_week,hour,is_weekend,is_business_hours,week_number,is_work_hour,is_meeting_hour,is_lunch_hour,is_evening
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
2023-01-01 00:00:00,user1,02:00:00:3e:60:f7,social_media,LinkedIn,Mobile,6,10.0,-46.89,0.9104,19.78,7.3,0,6.15,0.4301,1,6.0,0.0,True,False,52,False,False,False,False
2023-01-01 00:15:00,user1,02:00:00:ea:f9:7c,streaming,Prime Video,TV,26,43.333333,-45.01,0.298,47.07,2.19,1,10.49,0.3375,1,6.0,0.0,True,False,52,False,False,False,False
2023-01-01 00:30:00,user1,02:00:00:9c:e3:5f,streaming,Netflix,TV,50,83.333333,-41.17,0.7898,13.61,0.58,0,8.75,0.505,1,6.0,0.0,True,False,52,False,False,False,False
2023-01-01 00:45:00,user1,02:00:00:b2:f7:66,social_media,Instagram,Tablet,5,8.333333,-58.64,0.5335,38.34,3.95,1,12.19,0.3344,1,6.0,0.0,True,False,52,False,False,False,False


In [24]:
df.count()

user                            67105
mac_address                     67105
service_group                   67105
service_name                    67105
device_type                     67105
usage_minutes                   67105
usage_percentage                67105
signal_strength                 67105
packet_loss_rate                67105
latency                         67105
jitter_ms                       67105
traffic_spike                   67105
bandwidth_speed_per_sec_mbps    67105
buffer_occupancy                67105
month                           67105
day_of_week                     67105
hour                            67105
is_weekend                      67105
is_business_hours               67105
week_number                     67105
is_work_hour                    67105
is_meeting_hour                 67105
is_lunch_hour                   67105
is_evening                      67105
dtype: int64

In [25]:
# from sklearn.preprocessing import MinMaxScaler
# import joblib

# def add_rolling_features(df):
#     """
#     Add rolling statistics features based on usage minutes
#     """
#     # Sort by timestamp to ensure correct rolling calculations
#     df = df.sort_values('timestamp')

#     # Number of 15-min intervals for different periods
#     intervals = {
#         'day': 96,  # 24 * 4 (15-min intervals)
#         'week': 672,  # 7 * 24 * 4
#         'two_weeks': 1344  # 14 * 24 * 4
#     }

#     # Calculate rolling statistics for each service
#     services = ["shopping", "gaming", 
#                 "social_media", "streaming", "software"]

#     print("services..... rolling calculation....")
#     for service in services:
#         # print(f"service ******************************** {service} ********************************")
#         usage_col = f'{service}_usage'
#         # usage_col = f'{service}'

#         # Rolling sums
#         df[f'{service}_day_sum'] = df[usage_col].rolling(intervals['day'], min_periods=1).sum()
#         df[f'{service}_week_sum'] = df[usage_col].rolling(intervals['week'], min_periods=1).sum()
#         df[f'{service}_2week_sum'] = df[usage_col].rolling(intervals['two_weeks'], min_periods=1).sum()

#         # print(f"services_day_sum", df[f'{service}_day_sum'])
#         # print(f"services_2week_sum", df[f'{service}_2week_sum'])

#         # Rolling means
#         df[f'{service}_day_mean'] = df[usage_col].rolling(intervals['day'], min_periods=1).mean()
#         df[f'{service}_week_mean'] = df[usage_col].rolling(intervals['week'], min_periods=1).mean()
#         df[f'{service}_2week_mean'] = df[usage_col].rolling(intervals['two_weeks'], min_periods=1).mean()

#         # Rolling max
#         df[f'{service}_day_max'] = df[usage_col].rolling(intervals['day'], min_periods=1).max()
#         df[f'{service}_week_max'] = df[usage_col].rolling(intervals['week'], min_periods=1).max()
        
        
#         # Time since last peak usage (defined as usage > 75th percentile)
#         peak_threshold = df[usage_col].quantile(0.75)
#         df[f'{service}_since_peak'] = (df[usage_col] > peak_threshold).astype(int)
#         df[f'{service}_intervals_since_peak'] = df[f'{service}_since_peak'].cumsum()

#         # Calculate relative usage proportions
#         total_usage = df[[f'{s}_usage' for s in services]].sum(axis=1)
#         # total_usage = df[[f'{s}' for s in services]].sum(axis=1)
#         for service in services:
#             df[f'{service}_usage_ratio'] = df[f'{service}_usage'] / total_usage

#         # Fill NaN values with 0
#         df = df.fillna(0)
#         # df.head(5)

#         return df


        

In [26]:
# def add_rolling_features(df):
#     """
#     Add rolling statistics features based on usage minutes.
#     """
#     # Sort by timestamp to ensure proper rolling calculations
#     df = df.sort_values('timestamp')

#     # Define rolling intervals
#     intervals = {
#         'day': 96,  # 24 * 4 (15-min intervals)
#         'week': 672,  # 7 * 24 * 4
#         'two_weeks': 1344  # 14 * 24 * 4
#     }

#     # List of services for rolling calculations
#     services = ["shopping", "gaming", "social_media", "streaming", "software"]

#     print("Adding rolling features for services...")
#     for service in services:
#         usage_col = f'{service}_usage'

#         # Skip if base usage column does not exist
#         if usage_col not in df.columns:
#             print(f"Skipping {service}: '{usage_col}' not found in DataFrame.")
#             continue

#         # Add rolling sum features
#         df[f'{service}_day_sum'] = df[usage_col].rolling(intervals['day'], min_periods=1).sum()
#         df[f'{service}_week_sum'] = df[usage_col].rolling(intervals['week'], min_periods=1).sum()
#         df[f'{service}_2week_sum'] = df[usage_col].rolling(intervals['two_weeks'], min_periods=1).sum()

#         # Add rolling mean features
#         df[f'{service}_day_mean'] = df[usage_col].rolling(intervals['day'], min_periods=1).mean()
#         df[f'{service}_week_mean'] = df[usage_col].rolling(intervals['week'], min_periods=1).mean()
#         df[f'{service}_2week_mean'] = df[usage_col].rolling(intervals['two_weeks'], min_periods=1).mean()

#         # Add rolling max features
#         df[f'{service}_day_max'] = df[usage_col].rolling(intervals['day'], min_periods=1).max()
#         df[f'{service}_week_max'] = df[usage_col].rolling(intervals['week'], min_periods=1).max()

#         # Add time since last peak
#         peak_threshold = df[usage_col].quantile(0.75)
#         df[f'{service}_since_peak'] = (df[usage_col] > peak_threshold).astype(int)
#         df[f'{service}_intervals_since_peak'] = df[f'{service}_since_peak'].cumsum()

#     # Calculate relative usage proportions (outside the loop)
#     print("Calculating usage ratios...")
#     total_usage = df[[f'{s}_usage' for s in services if f'{s}_usage' in df.columns]].sum(axis=1)
#     for service in services:
#         usage_col = f'{service}_usage'
#         if usage_col in df.columns:
#             df[f'{service}_usage_ratio'] = df[usage_col] / total_usage

#     # Fill NaN values with 0
#     df = df.fillna(0)

#     print("Rolling features added for all services.")
#     return df


In [27]:
def add_rolling_features(df):
    """
    Add rolling statistics features based on usage minutes.
    """
    # Sort by timestamp to ensure proper rolling calculations
    df = df.sort_values('timestamp')

    # Define rolling intervals
    intervals = {
        'day': 96,  # 24 * 4 (15-min intervals)
        'week': 672,  # 7 * 24 * 4
        'two_weeks': 1344  # 14 * 24 * 4
    }

    # List of services for rolling calculations
    services = ["shopping", "gaming", "social_media", "streaming", "software"]

    print("Adding rolling features for services...")
    for service in services:
        usage_col = f'{service}_usage'

        # Skip if base usage column does not exist
        if usage_col not in df.columns:
            print(f"Skipping {service}: '{usage_col}' not found in DataFrame.")
            continue

        # Add rolling sum features
        df[f'{service}_day_sum'] = df[usage_col].rolling(intervals['day'], min_periods=1).sum()
        df[f'{service}_week_sum'] = df[usage_col].rolling(intervals['week'], min_periods=1).sum()
        df[f'{service}_2week_sum'] = df[usage_col].rolling(intervals['two_weeks'], min_periods=1).sum()

        # Add rolling mean features
        df[f'{service}_day_mean'] = df[usage_col].rolling(intervals['day'], min_periods=1).mean()
        df[f'{service}_week_mean'] = df[usage_col].rolling(intervals['week'], min_periods=1).mean()
        df[f'{service}_2week_mean'] = df[usage_col].rolling(intervals['two_weeks'], min_periods=1).mean()

        # Add rolling max features
        df[f'{service}_day_max'] = df[usage_col].rolling(intervals['day'], min_periods=1).max()
        df[f'{service}_week_max'] = df[usage_col].rolling(intervals['week'], min_periods=1).max()

        # Add time since last peak
        peak_threshold = df[usage_col].quantile(0.75)
        df[f'{service}_since_peak'] = (df[usage_col] > peak_threshold).astype(int)
        df[f'{service}_intervals_since_peak'] = df[f'{service}_since_peak'].cumsum()

    # Calculate relative usage proportions
    print("Calculating usage ratios...")
    total_usage = df[[f'{s}_usage' for s in services]].sum(axis=1)
    for service in services:
        df[f'{service}_usage_ratio'] = df[f'{service}_usage'] / total_usage

    # Fill NaN values with 0
    df = df.fillna(0)

    print("Rolling features added for all services.")
    return df


In [28]:
df_pivot = add_rolling_features(df_pivot)
print("Columns in df_pivot after rolling features:", df_pivot.columns)


Adding rolling features for services...
Calculating usage ratios...
Rolling features added for all services.
Columns in df_pivot after rolling features: Index(['timestamp', 'device_type', 'hour', 'day_of_week', 'is_weekend',
       'is_work_hour', 'is_meeting_hour', 'is_lunch_hour', 'is_evening',
       'gaming_usage', 'shopping_usage', 'social_media_usage',
       'software_usage', 'streaming_usage', 'dominant_service',
       'shopping_day_sum', 'shopping_week_sum', 'shopping_2week_sum',
       'shopping_day_mean', 'shopping_week_mean', 'shopping_2week_mean',
       'shopping_day_max', 'shopping_week_max', 'shopping_since_peak',
       'shopping_intervals_since_peak', 'gaming_day_sum', 'gaming_week_sum',
       'gaming_2week_sum', 'gaming_day_mean', 'gaming_week_mean',
       'gaming_2week_mean', 'gaming_day_max', 'gaming_week_max',
       'gaming_since_peak', 'gaming_intervals_since_peak',
       'social_media_day_sum', 'social_media_week_sum',
       'social_media_2week_sum', 'soci

In [29]:
def prepare_sequences(df, seq_length=672, pred_length=672):
    """
    Prepare sequences with enhanced features
    """
    # Add cyclical time features
    df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
    df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)
    df['day_sin'] = np.sin(2 * np.pi * df['day_of_week'] / 7)
    df['day_cos'] = np.cos(2 * np.pi * df['day_of_week'] / 7)

    # Define time-based features to use
    base_features = [
        'hour_sin', 'hour_cos', 'day_sin', 'day_cos',
        'is_weekend', 'is_work_hour', 'is_meeting_hour', 'is_lunch_hour',
        'is_evening'
    ]

    usage_features = [
        'software_usage', 'streaming_usage',
        'social_media_usage', 'shopping_usage', 'gaming_usage',
        'software_day_sum', 'software_week_sum', 'software_2week_sum',
        'streaming_day_sum', 'streaming_week_sum', 'streaming_2week_sum',
        'social_media_day_sum', 'social_media_week_sum', 'social_media_2week_sum',
        'gaming_day_sum', 'gaming_week_sum', 'gaming_2week_sum',
        'shopping_day_sum', 'shopping_week_sum', 'shopping_2week_sum',
        'software_usage_ratio', 'streaming_usage_ratio', 'social_media_usage_ratio',
        'gaming_usage_ratio', 'shopping_usage_ratio',
        'software_day_mean', 'streaming_day_mean', 'social_media_day_mean',
        'gaming_day_mean', 'shopping_day_mean',
        'software_intervals_since_peak', 'streaming_intervals_since_peak',
        'social_media_intervals_since_peak', 'gaming_intervals_since_peak',
        'shopping_intervals_since_peak'
    ]

    features = base_features + usage_features

    # Scale features
    scaler = StandardScaler()
    df[features] = scaler.fit_transform(df[features])

    # Encode services
    le = LabelEncoder()
    df['service_encoded'] = le.fit_transform(df['dominant_service'])
    num_classes = len(le.classes_)

    # Save Preprocessors
    joblib.dump(scaler, 'service_scaler.pkl')
    joblib.dump(le, 'label_encoders.pkl')

    X, y = [], []
    step = seq_length

    for i in range(0, len(df) - seq_length - pred_length + 1, step):
        X.append(df[features].iloc[i:i + seq_length].values)
        y_seq = df['service_encoded'].iloc[i + seq_length:i + seq_length + pred_length].values
        y.append(tf.keras.utils.to_categorical(y_seq, num_classes=num_classes))

    return np.array(X), np.array(y), le, scaler, features


In [30]:
# def prepare_sequences(df, seq_length=672, pred_length=672):
#     features = [
#         # Ensure these features are present
#         'software_usage', 'streaming_usage', 'social_media_usage', 'gaming_usage', 'shopping_usage',
#         'software_day_sum', 'software_week_sum', 'software_2week_sum',
#         'streaming_day_sum', 'streaming_week_sum', 'streaming_2week_sum',
#         'social_media_day_sum', 'social_media_week_sum', 'social_media_2week_sum',
#         'gaming_day_sum', 'gaming_week_sum', 'gaming_2week_sum',
#         'software_day_mean', 'streaming_day_mean', 'social_media_day_mean', 'gaming_day_mean',
#         'software_intervals_since_peak', 'streaming_intervals_since_peak',
#         'social_media_intervals_since_peak', 'gaming_intervals_since_peak',
#     ]
    
#     # Verify all features are in DataFrame
#     missing_features = [f for f in features if f not in df.columns]
#     if missing_features:
#         raise KeyError(f"The following features are missing in the DataFrame: {missing_features}")
    
#     # Proceed with scaling
#     scaler = StandardScaler()
#     df[features] = scaler.fit_transform(df[features])
    
    
#     # Encode services
#     le = LabelEncoder()
#     df['service_encoded'] = le.fit_transform(df['dominant_service'])
#     num_classes = len(le.classes_)

#     # Save Preprocessors
#     joblib.dump(scaler, 'service_scaler.pkl')
#     joblib.dump(le, 'label_encoders.pkl')

#     X, y = [], []
#     step = seq_length

#     for i in range(0, len(df) - seq_length - pred_length + 1, step):
#         X.append(df[features].iloc[i:i + seq_length].values)
#         y_seq = df['service_encoded'].iloc[i + seq_length:i + seq_length + pred_length].values
#         y.append(tf.keras.utils.to_categorical(y_seq, num_classes=num_classes))

#     return np.array(X), np.array(y), le, scaler, features


In [31]:
# # Custom attention layer compatible with TFLite
# class TFLiteCompatibleAttention(tf.keras.layers.Layer):
    
#     def __init__(self, num_heads, key_dim, dropout=0.0, **kwargs):
#         super().__init__(**kwargs)
#         self.num_heads = num_heads
#         self.key_dim = key_dim
#         self.dropout_rate = dropout

#         # Initialize dense layers for Q, K, V
#         self.query_dense = Dense(num_heads * key_dim)
#         self.key_dense = Dense(num_heads * key_dim)
#         self.value_dense = Dense(num_heads * key_dim)
#         self.combine_heads = Dense(key_dim * num_heads)

#     def call(self, inputs):
#         # Reshape inputs
#         batch_size = tf.shape(inputs)[0]
#         seq_len = tf.shape(inputs)[1]

#         # Linear transformations
#         query = self.query_dense(inputs)
#         key = self.key_dense(inputs)
#         value = self.value_dense(inputs)



In [32]:
# # Reshape to (batch_size, seq_len, num_heads, key_dim)
# def reshape_for_attention(x):
#     return tf.reshape([x, batch_size, seq_len, self.num_heads, self.key_dim])

# # # Assume query, key, value, batch_size, seq_len, num_heads, key_dim are already defined
# # batch_size = tf.shape(query)[0]
# # seq_len = tf.shape(query)[1]

# # Reshape query, key, and value
# query = reshape_for_attention(query, batch_size, seq_len, self.num_heads, self.key_dim)
# key = reshape_for_attention(key, batch_size, seq_len, self.num_heads, self.key_dim)
# value = reshape_for_attention(value, batch_size, seq_len, self.num_heads, self.key_dim)

# # Transpose to (batch_size, num_heads, seq_len, key_dim)
# query = tf.transpose(query, [0, 2, 1, 3])
# key = tf.transpose(key, [0, 2, 1, 3])
# value = tf.transpose(value, [0, 2, 1, 3])

# # Calculate attention scores
# scale = tf.cast(self.key_dim, tf.float32) ** -0.5
# attention_scores = tf.matmul(query, key, transpose_b=True) * scale

# # Apply softmax
# attention_weights = tf.nn.softmax(attention_scores)

# # Apply dropout
# if self.dropout_rate > 0:
#     attention_weights = tf.nn.dropout(attention_weights, self.dropout_rate)

# # Apply attention to values
# attended_values = tf.matmul(attention_weights, value)

# # Reshape back
# attended_values = tf.transpose(attended_values, [0, 2, 1, 3])
# attended_values = tf.reshape(attended_values, [batch_size, seq_len, self.num_heads * self.key_dim])

# # Final linear transformation
# outputs = self.combine_heads(attended_values)

# return outputs


In [33]:
# Custom attention layer compatible with TFLite
class TFLiteCompatibleAttention(tf.keras.layers.Layer):
    
    def __init__(self, num_heads, key_dim, dropout=0.0, **kwargs):
        super().__init__(**kwargs)
        self.num_heads = num_heads
        self.key_dim = key_dim
        self.dropout_rate = dropout

        # Initialize dense layers for Q, K, V
        self.query_dense = tf.keras.layers.Dense(num_heads * key_dim)
        self.key_dense = tf.keras.layers.Dense(num_heads * key_dim)
        self.value_dense = tf.keras.layers.Dense(num_heads * key_dim)
        self.combine_heads = tf.keras.layers.Dense(key_dim * num_heads)

    def call(self, inputs):
        # Reshape inputs
        batch_size = tf.shape(inputs)[0]
        seq_len = tf.shape(inputs)[1]

        # Linear transformations
        query = self.query_dense(inputs)
        key = self.key_dense(inputs)
        value = self.value_dense(inputs)

        # Reshape to (batch_size, seq_len, num_heads, key_dim)
        def reshape_for_attention(x, batch_size, seq_len, num_heads, key_dim):
            return tf.reshape(x, [batch_size, seq_len, num_heads, key_dim])

        query = reshape_for_attention(query, batch_size, seq_len, self.num_heads, self.key_dim)
        key = reshape_for_attention(key, batch_size, seq_len, self.num_heads, self.key_dim)
        value = reshape_for_attention(value, batch_size, seq_len, self.num_heads, self.key_dim)

        # Transpose to (batch_size, num_heads, seq_len, key_dim)
        query = tf.transpose(query, [0, 2, 1, 3])
        key = tf.transpose(key, [0, 2, 1, 3])
        value = tf.transpose(value, [0, 2, 1, 3])

        # Calculate attention scores
        scale = tf.cast(self.key_dim, tf.float32) ** -0.5
        attention_scores = tf.matmul(query, key, transpose_b=True) * scale

        # Apply softmax
        attention_weights = tf.nn.softmax(attention_scores)

        # Apply dropout
        if self.dropout_rate > 0:
            attention_weights = tf.nn.dropout(attention_weights, self.dropout_rate)

        # Apply attention to values
        attended_values = tf.matmul(attention_weights, value)

        # Reshape back
        attended_values = tf.transpose(attended_values, [0, 2, 1, 3])
        attended_values = tf.reshape(attended_values, [batch_size, seq_len, self.num_heads * self.key_dim])

        # Final linear transformation
        outputs = self.combine_heads(attended_values)

        return outputs


In [34]:
from tensorflow.keras.layers import Activation

from tensorflow.keras.layers import (
    Input, Dense, Dropout, LayerNormalization, GlobalAveragePooling1D,
    Reshape, Add, Activation
)
from tensorflow.keras.models import Model

"""
Build model using TFLite compatible attention
"""
def build_tflite_compatible_transformer_model(seq_length, num_features, pred_length, num_classes):

    print("debug: build_tflite_compatible_model2...")
    inputs = Input(shape=(seq_length, num_features))
    print(seq_length, num_features)

    # Initial processing
    x = Dense(128, activation='relu')(inputs)
    x = LayerNormalization(epsilon=1e-6)(x)
    x = Dropout(0.1)(x)

    # Custom attention blocks
    for _ in range(4):
        # Attention
        attention_output = TFLiteCompatibleAttention(
            num_heads=4, 
            key_dim=32,  # Do not change this
            dropout=0.1
        )(x)
        x = Add()([x, attention_output])
        x = LayerNormalization(epsilon=1e-6)(x)

        # FFN
        ffn = Dense(256, activation='relu')(x)
        ffn = Dropout(0.1)(ffn)
        ffn = Dense(128)(ffn)
        x = Add()([x, ffn])
        x = LayerNormalization(epsilon=1e-6)(x)

    # Output processing
    x = GlobalAveragePooling1D()(x)
    x = Dense(pred_length * num_classes)(x)
    x = Reshape((pred_length, num_classes))(x)
    outputs = Activation('softmax')(x)

    return tf.keras.Model(inputs=inputs, outputs=x)


In [35]:
import joblib


# Add rolling features
print("debug: Adding rolling features...")
df_pivot.head(5)
df_pivot = add_rolling_features(df_pivot)
df_pivot.head(100)

# Prepare sequences
print("debug: Preparing sequences...")
X, y, label_encoder, scaler, features = prepare_sequences(df_pivot)

# Split data
split_idx = int(len(X) * 0.8)
X_train, X_val = X[:split_idx], X[split_idx:]
y_train, y_val = y[:split_idx], y[split_idx:]

print(f"Training shapes: X={X_train.shape}, y={y_train.shape}")

# Build and compile
print("debug: Building and compiling transformer model...")


model = build_tflite_compatible_transformer_model(
    seq_length=X_train.shape[1],
    num_features=X_train.shape[2],
    pred_length=y_train.shape[1],
    num_classes=y_train.shape[2]
)

# Save initial shapes and parameters
model_config = {
    'seq_length': X_train.shape[1],
    'num_features': X_train.shape[2],
    'pred_length': y_train.shape[1],
    'num_classes': y_train.shape[2],
    'feature_columns': features
}

joblib.dump(model_config, 'model_config.pkl')

# Compile with warmup
initial_learning_rate = 0.001

lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
    initial_learning_rate,
    decay_steps=1000,
    alpha=0.0001
)

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("debug: Adding rolling features...")
print("debug: Preparing sequences...")
print(f"Training shapes: X={X_train.shape}, y={y_train.shape}")
print("debug: Building and compiling transformer model...")
print("debug: build_tflite_compatible_model2...")


debug: Adding rolling features...
Adding rolling features for services...
Calculating usage ratios...
Rolling features added for all services.
debug: Preparing sequences...
Training shapes: X=(78, 672, 44), y=(78, 672, 5)
debug: Building and compiling transformer model...
debug: build_tflite_compatible_model2...
672 44


2024-12-04 15:38:13.015470: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Pro
2024-12-04 15:38:13.015495: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2024-12-04 15:38:13.015504: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2024-12-04 15:38:13.015536: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-12-04 15:38:13.015549: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


debug: Adding rolling features...
debug: Preparing sequences...
Training shapes: X=(78, 672, 44), y=(78, 672, 5)
debug: Building and compiling transformer model...
debug: build_tflite_compatible_model2...


In [36]:
# import tensorflow as tf
# print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))


In [37]:
# tf.debugging.set_log_device_placement(True)

# # Create a simple tensor operation
# a = tf.constant([[1.0, 2.0, 3.0]])
# b = tf.constant([[4.0], [5.0], [6.0]])
# c = tf.matmul(a, b)

# print(c)


In [38]:
# import os
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Suppress INFO and WARNING logs


In [None]:
# Train
print("debug: Training model...")
history = model.fit(
    X_train,
    y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=32,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_accuracy',
            patience=10,
            restore_best_weights=True
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_accuracy',
            factor=0.5,
            patience=5,
            min_lr=0.0001
        )
    ],
    verbose=1
)


debug: Training model...
Epoch 1/50


2024-12-04 15:38:23.288284: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


In [None]:
import matplotlib.pyplot as plt

# Plot training and validation loss
def plot_training_history(history):
    plt.figure(figsize=(10, 6))

    # Plot Loss
    plt.plot(history.history['loss'], label='Training Loss (MSE)')
    plt.plot(history.history['val_loss'], label='Validation Loss (MSE)')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    plt.show()

# Call the function to plot
plot_training_history(history)


In [None]:
import os
os.makedirs('qos', exist_ok=True)
print("\n Savings model...")
model.save("./qos/qos_predicted_model")

In [None]:
import numpy as np

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.2f}")

# Generate predictions on the test set
y_pred = model.predict(X_test)

# Convert predictions and true labels to class indices
y_pred_classes = np.argmax(y_pred, axis=-1)  # Predicted classes
y_true_classes = np.argmax(y_test, axis=-1)  # True classes

# Calculate per-group accuracy
print("\nPer-Group Accuracy:")
for group in label_encoder.classes_:
    group_idx = label_encoder.transform([group])[0]  # Index for the group
    mask = y_true_classes == group_idx  # Mask for the current group
    group_acc = np.mean(y_pred_classes[mask] == y_true_classes[mask])  # Group accuracy
    print(f"{group}: {group_acc * 100:.2f}%")
