In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Flatten
import matplotlib.pyplot as plt
import gc


In [2]:

# Load the dataset
df = pd.read_csv("balanced_dataset_time.csv")


In [3]:

# Convert 'timestamp' to datetime format
df['timestamp'] = pd.to_datetime(df['timestamp'])


In [4]:

# Filter for one service group (e.g., "Gaming") for this example
service = 'Gaming'
df_daily = df[df['service_group'] == service].groupby(df['timestamp'].dt.date).size().reset_index(name='usage_count')
df_daily['timestamp'] = pd.to_datetime(df_daily['timestamp'])



In [5]:

# Sort by date and set the index
df_daily = df_daily.sort_values('timestamp').set_index('timestamp')


In [6]:

# Normalize the data using MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
df_daily['usage_count'] = scaler.fit_transform(df_daily[['usage_count']])


In [7]:

# Reduce the dataset size by using a smaller subset (10% for testing)
df_daily = df_daily.sample(frac=0.1, random_state=42)  # Use 10% of the data for testing


In [8]:

# Clear unused variables to free up memory
gc.collect()


33

In [9]:

# Set a smaller sequence length
sequence_length = 5  # Reduced to 5 to save memory


In [10]:

# Prepare the dataset for time series forecasting
X, y = [], []
for i in range(len(df_daily) - sequence_length):
    X.append(df_daily['usage_count'].iloc[i:i + sequence_length].values)
    y.append(df_daily['usage_count'].iloc[i + sequence_length])

X, y = np.array(X), np.array(y)


In [11]:

# Reshape X for GRU model: [samples, time steps, features]
X = np.reshape(X, (X.shape[0], X.shape[1], 1))


In [12]:

# Clear unused variables to free up memory
gc.collect()


0

In [13]:

# Define the GRU model with fewer units
model = Sequential()
model.add(GRU(units=10, return_sequences=True, input_shape=(X.shape[1], 1)))  # Reduced to 10 units
model.add(GRU(units=10))  # Reduced to 10 units
model.add(Dense(units=1))


2024-11-11 15:39:48.446040: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Pro
2024-11-11 15:39:48.446088: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2024-11-11 15:39:48.446095: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2024-11-11 15:39:48.446147: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-11-11 15:39:48.446170: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [14]:

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')


In [None]:

# Train the model with fewer epochs and a smaller batch size
history = model.fit(X, y, epochs=2, batch_size=4, validation_split=0.2)  # Reduced epochs to 2 and batch size to 4


Epoch 1/2


In [None]:

# Make predictions
train_predict = model.predict(X)


In [None]:

# Inverse transform to get back to original scale
train_predict = scaler.inverse_transform(train_predict)
y_actual = scaler.inverse_transform([y]).T


In [None]:

# Plot the actual vs predicted values
plt.figure(figsize=(12, 6))
plt.plot(df_daily.index[sequence_length:], y_actual, label="Actual Usage Count", color='black')
plt.plot(df_daily.index[sequence_length:], train_predict, label="Predicted Usage Count", color='blue')
plt.title(f"{service} Usage Prediction Using GRU")
plt.xlabel("Date")
plt.ylabel("Usage Count")
plt.legend()
plt.show()