In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout
import matplotlib.pyplot as plt

# Load the dataset
data = pd.read_csv("user_activity_pattern_updated_v6_30_nov.csv")  # Replace with your file path

# Convert timestamp to datetime and sort the data
data['timestamp'] = pd.to_datetime(data['timestamp'])
data = data.sort_values(by='timestamp')

# Aggregate data for hourly usage
data_agg = data.groupby(['timestamp', 'service_group', 'service_name'])['usage_minutes'].sum().reset_index()

# Pivot the data to create a time series for each service
pivoted_data = data_agg.pivot_table(
    index='timestamp', columns=['service_group', 'service_name'], values='usage_minutes', fill_value=0
)

# Normalize the data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(pivoted_data)

# Create sequences for GRU
def create_sequences(data, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i + sequence_length])
        y.append(data[i + sequence_length])
    return np.array(X), np.array(y)

# Set sequence length (e.g., 24 hours)
sequence_length = 24
X, y = create_sequences(scaled_data, sequence_length)

# Split data into training and testing sets
train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Print shapes for verification
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")


X_train shape: (36526, 24, 16), y_train shape: (36526, 16)
X_test shape: (9132, 24, 16), y_test shape: (9132, 16)


In [None]:
# Define the GRU model
model = Sequential([
    GRU(64, activation='relu', return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    GRU(64, activation='relu', return_sequences=False),
    Dropout(0.2),
    Dense(X_train.shape[2])  # Output layer with the number of features (services)
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=10,  # Adjust epochs as needed
    batch_size=64,
    verbose=1
)




2024-12-02 06:28:48.442036: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Pro
2024-12-02 06:28:48.442125: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2024-12-02 06:28:48.442147: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2024-12-02 06:28:48.442398: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-12-02 06:28:48.442850: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch 1/10


In [None]:
# Generate future predictions
def predict_future(model, input_data, future_steps):
    predictions = []
    current_input = input_data[-1]  # Start from the last available data
    for _ in range(future_steps):
        prediction = model.predict(current_input[np.newaxis, :, :])
        predictions.append(prediction[0])
        current_input = np.vstack([current_input[1:], prediction])  # Update the input with the prediction
    return np.array(predictions)

# Predict the next 15 days (15 * 24 = 360 hourly steps)
future_steps = 15 * 24
future_predictions = predict_future(model, X_test, future_steps)

# Inverse transform the predictions to get the actual scale
future_predictions_rescaled = scaler.inverse_transform(future_predictions)

# Generate future timestamps
last_timestamp = pivoted_data.index[-1]
future_timestamps = pd.date_range(start=last_timestamp, periods=future_steps + 1, freq='H')[1:]

# Combine predictions into a DataFrame
future_df = pd.DataFrame(future_predictions_rescaled, index=future_timestamps, columns=pivoted_data.columns)
future_df.reset_index(inplace=True)
future_df.rename(columns={"index": "timestamp"}, inplace=True)

# Save predictions to CSV
future_df.to_csv("future_predictions.csv", index=False)
print("Future predictions saved to 'future_predictions.csv'")


In [None]:
# Plot actual vs predicted usage trends (hourly)
plt.figure(figsize=(12, 6))

# Select a service group and name (e.g., Gaming - Fortnite)
service_group = "Gaming"
service_name = "Fortnite"

# Plot actual data
plt.plot(
    pivoted_data.index[-len(y_test):],
    scaler.inverse_transform(y_test)[:, pivoted_data.columns.get_loc((service_group, service_name))],
    label="Actual",
    marker='o'
)

# Plot predicted data
plt.plot(
    future_timestamps,
    future_predictions_rescaled[:, pivoted_data.columns.get_loc((service_group, service_name))],
    label="Predicted",
    linestyle='--'
)

plt.title(f"Actual vs Predicted Usage for {service_name} ({service_group})")
plt.xlabel("Timestamp")
plt.ylabel("Usage (Minutes)")
plt.legend()
plt.grid()
plt.show()


In [None]:
# Hourly trends
hourly_trends = future_df.groupby(future_df['timestamp'].dt.hour).sum()

# Weekly trends
weekly_trends = future_df.groupby(future_df['timestamp'].dt.dayofweek).sum()

# Monthly trends
monthly_trends = future_df.groupby(future_df['timestamp'].dt.month).sum()

# Plot hourly trends
plt.figure(figsize=(10, 6))
hourly_trends.sum(axis=1).plot(kind='bar')
plt.title("Predicted Hourly Usage Trends")
plt.xlabel("Hour of the Day")
plt.ylabel("Total Usage (Minutes)")
plt.show()

# Plot weekly trends
plt.figure(figsize=(10, 6))
weekly_trends.sum(axis=1).plot(kind='bar')
plt.title("Predicted Weekly Usage Trends")
plt.xlabel("Day of the Week")
plt.ylabel("Total Usage (Minutes)")
plt.show()

# Plot monthly trends
plt.figure(figsize=(10, 6))
monthly_trends.sum(axis=1).plot(kind='bar')
plt.title("Predicted Monthly Usage Trends")
plt.xlabel("Month")
plt.ylabel("Total Usage (Minutes)")
plt.show()


In [None]:
# Visualize trends for a specific service
def plot_service_trends(service_group, service_name, y_test, future_predictions_rescaled, future_timestamps, pivoted_data):
    # Find the column index for the selected service
    service_index = pivoted_data.columns.get_loc((service_group, service_name))
    
    # Extract actual and predicted data for the service
    actual_data = scaler.inverse_transform(y_test)[:, service_index]
    predicted_data = future_predictions_rescaled[:, service_index]
    
    # Plot actual vs predicted data
    plt.figure(figsize=(12, 6))
    plt.plot(
        pivoted_data.index[-len(actual_data):],
        actual_data,
        label="Actual",
        marker='o',
        color="blue"
    )
    plt.plot(
        future_timestamps,
        predicted_data,
        label="Predicted",
        linestyle='--',
        color="orange"
    )
    plt.title(f"Actual vs Predicted Usage for {service_name} ({service_group})")
    plt.xlabel("Timestamp")
    plt.ylabel("Usage (Minutes)")
    plt.legend()
    plt.grid()
    plt.show()

# Example: Plot trends for "Gaming" and "Fortnite"
plot_service_trends("Gaming", "Fortnite", y_test, future_predictions_rescaled, future_timestamps, pivoted_data)


In [None]:
# Visualize aggregated trends for a service group
def plot_group_trends(service_group, y_test, future_predictions_rescaled, future_timestamps, pivoted_data):
    # Get column indices for the selected group
    group_columns = [col for col in pivoted_data.columns if col[0] == service_group]
    group_indices = [pivoted_data.columns.get_loc(col) for col in group_columns]
    
    # Aggregate actual and predicted data for the group
    actual_data = scaler.inverse_transform(y_test)[:, group_indices].sum(axis=1)
    predicted_data = future_predictions_rescaled[:, group_indices].sum(axis=1)
    
    # Plot aggregated trends
    plt.figure(figsize=(12, 6))
    plt.plot(
        pivoted_data.index[-len(actual_data):],
        actual_data,
        label="Actual",
        marker='o',
        color="blue"
    )
    plt.plot(
        future_timestamps,
        predicted_data,
        label="Predicted",
        linestyle='--',
        color="orange"
    )
    plt.title(f"Actual vs Predicted Aggregated Usage for {service_group}")
    plt.xlabel("Timestamp")
    plt.ylabel("Usage (Minutes)")
    plt.legend()
    plt.grid()
    plt.show()

# Example: Plot trends for the "Gaming" group
plot_group_trends("Gaming", y_test, future_predictions_rescaled, future_timestamps, pivoted_data)


In [None]:
3. Hourly, Weekly, and Monthly Trends for a Service
This code aggregates the predictions for a specific service to show trends by time.


In [None]:
# Plot hourly, weekly, and monthly trends for a service
def plot_time_aggregated_trends(service_group, service_name, future_predictions_rescaled, future_timestamps, pivoted_data):
    # Find the column index for the selected service
    service_index = pivoted_data.columns.get_loc((service_group, service_name))
    
    # Extract predicted data for the service
    predicted_data = future_predictions_rescaled[:, service_index]
    predicted_df = pd.DataFrame({"timestamp": future_timestamps, "usage": predicted_data})
    
    # Aggregate by hour, week, and month
    predicted_df['hour'] = predicted_df['timestamp'].dt.hour
    predicted_df['day_of_week'] = predicted_df['timestamp'].dt.dayofweek
    predicted_df['month'] = predicted_df['timestamp'].dt.month
    
    # Hourly trends
    hourly_trends = predicted_df.groupby('hour')['usage'].sum()
    plt.figure(figsize=(10, 6))
    hourly_trends.plot(kind='bar', color='skyblue')
    plt.title(f"Hourly Predicted Usage for {service_name} ({service_group})")
    plt.xlabel("Hour of the Day")
    plt.ylabel("Total Usage (Minutes)")
    plt.show()

    # Weekly trends
    weekly_trends = predicted_df.groupby('day_of_week')['usage'].sum()
    plt.figure(figsize=(10, 6))
    weekly_trends.plot(kind='bar', color='green')
    plt.title(f"Weekly Predicted Usage for {service_name} ({service_group})")
    plt.xlabel("Day of the Week")
    plt.ylabel("Total Usage (Minutes)")
    plt.show()

    # Monthly trends
    monthly_trends = predicted_df.groupby('month')['usage'].sum()
    plt.figure(figsize=(10, 6))
    monthly_trends.plot(kind='bar', color='orange')
    plt.title(f"Monthly Predicted Usage for {service_name} ({service_group})")
    plt.xlabel("Month")
    plt.ylabel("Total Usage (Minutes)")
    plt.show()

# Example: Plot hourly, weekly, and monthly trends for "Gaming - Fortnite"
plot_time_aggregated_trends("Gaming", "Fortnite", future_predictions_rescaled, future_timestamps, pivoted_data)
