In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random
import plotly.graph_objects as go

# Set random seed for reproducibility
np.random.seed(42)

# Generate timestamps for 3 months of data at 5-minute intervals
start_date = datetime(2023, 9, 1)
end_date = datetime(2023, 12, 1)
timestamps = pd.date_range(start=start_date, end=end_date, freq='5T')

# Initialize lists to store our data
data = []

# Common API endpoints
endpoints = [
    '/api/v1/users',
    '/api/v1/products',
    '/api/v1/orders',
    '/api/v1/cart',
    '/api/v1/checkout'
]

# Define baseline metrics for each endpoint
endpoint_baselines = {
    '/api/v1/users': {'latency': 150, 'error_rate': 0.02},
    '/api/v1/products': {'latency': 200, 'error_rate': 0.01},
    '/api/v1/orders': {'latency': 300, 'error_rate': 0.03},
    '/api/v1/cart': {'latency': 100, 'error_rate': 0.01},
    '/api/v1/checkout': {'latency': 400, 'error_rate': 0.04}
}

# Generate data for each timestamp
for ts in timestamps:
    # Add daily and weekly patterns
    hour_factor = 1 + 0.3 * np.sin(2 * np.pi * ts.hour / 24)  # Daily pattern
    day_factor = 1 + 0.2 * np.sin(2 * np.pi * ts.dayofweek / 7)  # Weekly pattern
    
    for endpoint in endpoints:
        baseline = endpoint_baselines[endpoint]
        
        # Calculate requests per 5-minute interval
        base_requests = np.random.normal(1000, 100) * hour_factor * day_factor
        requests = max(int(base_requests), 0)
        
        # Calculate latency with realistic variations
        base_latency = baseline['latency']
        latency = max(
            np.random.normal(
                base_latency * hour_factor,
                base_latency * 0.1
            ), 
            10
        )
        
        # Calculate error rate with slight randomness
        base_error_rate = baseline['error_rate']
        error_rate = max(min(
            np.random.normal(
                base_error_rate * hour_factor,
                base_error_rate * 0.2
            ),
            1.0
        ), 0.0)
        
        # Calculate errors
        errors = int(requests * error_rate)
        
        # Calculate CPU and memory utilization
        cpu_util = min(
            max(
                np.random.normal(60, 10) * hour_factor * day_factor, 
                0
            ), 
            100
        )
        memory_util = min(
            max(
                np.random.normal(70, 5) * hour_factor,
                0
            ),
            100
        )

        data.append({
            'timestamp': ts,
            'endpoint': endpoint,
            'requests': requests,
            'latency_ms': round(latency, 2),
            'errors': errors,
            'error_rate': round(error_rate * 100, 2),
            'cpu_utilization': round(cpu_util, 2),
            'memory_utilization': round(memory_util, 2)
        })

# Create DataFrame and sort by timestamp
df = pd.DataFrame(data)
df = df.sort_values('timestamp')

# Save to CSV
df.to_csv('api_performance_data.csv', index=False)

# Display a sample of the data with better formatting
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

# Create a proper copy of the first 15 rows
sample_df = df.head(15).copy()

# Format the data
sample_df['timestamp'] = pd.to_datetime(sample_df['timestamp']).dt.strftime('%Y-%m-%d %H:%M')
sample_df['requests'] = sample_df['requests'].apply(lambda x: f"{x:,}")
sample_df['latency_ms'] = sample_df['latency_ms'].round(1)
sample_df['error_rate'] = sample_df['error_rate'].round(2)
sample_df['cpu_utilization'] = sample_df['cpu_utilization'].round(1)
sample_df['memory_utilization'] = sample_df['memory_utilization'].round(1)

# Create the table
fig = go.Figure(data=[go.Table(
    header=dict(
        values=['<b>Timestamp</b>', '<b>Endpoint</b>', '<b>Requests</b>', 
                '<b>Latency (ms)</b>', '<b>Errors</b>', '<b>Error Rate (%)</b>',
                '<b>CPU (%)</b>', '<b>Memory (%)</b>'],
        font=dict(size=12, color='rgb(55, 65, 81)'),
        fill_color='rgb(244, 246, 248)',
        align=['left'] * 8,
        height=36
    ),
    cells=dict(
        values=[sample_df[col] for col in sample_df.columns],
        font=dict(size=11, color='rgb(75, 85, 99)'),
        fill_color='rgb(255, 255, 255)',
        align=['left'] * 8,
        height=30
    )
)])

# Update layout for a cleaner look
fig.update_layout(
    width=1000,
    margin=dict(l=0, r=0, t=0, b=0),
    paper_bgcolor='rgba(0,0,0,0)'
)

# Save as PNG with high resolution
fig.write_image("api_metrics_table.png", scale=2)

# Display in notebook
fig.show()
# print("\nFirst few rows of our API performance dataset:")
# print(df.head())

# print("\nDataset Summary:")
# print(f"Total number of records: {len(df):,}")
# print(f"Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")
# print("\nUnique endpoints:")
# for endpoint in df['endpoint'].unique():
#     print(f"- {endpoint}")

# # Basic statistics for numerical columns
# print("\nBasic statistics for key metrics:")
# print(df[['requests', 'latency_ms', 'error_rate', 'cpu_utilization', 'memory_utilization']].describe())

In [None]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import numpy as np

def create_daily_pattern(df):
    # Convert timestamp to hour
    df['hour'] = pd.to_datetime(df['timestamp']).dt.hour
    
    # Group by hour and calculate means
    hourly_stats = df.groupby('hour').agg({
        'requests': 'mean',
        'latency_ms': 'mean'
    }).reset_index()
    
    # Create figure with secondary y-axis
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    
    # Add traces
    fig.add_trace(
        go.Scatter(x=hourly_stats['hour'], y=hourly_stats['requests'],
                  name="Requests", line=dict(color="#2E86C1")),
        secondary_y=False,
    )
    
    fig.add_trace(
        go.Scatter(x=hourly_stats['hour'], y=hourly_stats['latency_ms'],
                  name="Latency (ms)", line=dict(color="#E74C3C")),
        secondary_y=True,
    )
    
    # Update layout
    fig.update_layout(
        title="24-Hour Traffic Pattern with Latency",
        xaxis_title="Hour of Day",
        template="plotly_white",
        height=500,
        width=800,
        font=dict(size=10),
        showlegend=True,
        legend=dict(
            yanchor="top",
            y=0.99,
            xanchor="right",
            x=0.99
        )
    )
    
    fig.update_yaxes(title_text="Average Requests", secondary_y=False)
    fig.update_yaxes(title_text="Average Latency (ms)", secondary_y=True)
    
    # Save the figure
    fig.write_image("daily_pattern.png", scale=2)
    
    return fig

def create_weekly_pattern(df):
    # Add day of week
    df['day'] = pd.to_datetime(df['timestamp']).dt.day_name()
    
    # Ensure days are in correct order
    day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    
    # Group by day and endpoint
    weekly_stats = df.groupby(['day', 'endpoint'])['requests'].mean().reset_index()
    
    # Create figure
    fig = px.line(weekly_stats, x='day', y='requests', color='endpoint',
                  title="Weekly Traffic Patterns by Endpoint",
                  template="plotly_white",
                  height=500,
                  width=800)
    
    # Customize layout
    fig.update_layout(
        xaxis_title="Day of Week",
        yaxis_title="Average Requests",
        legend_title="Endpoint",
        font=dict(size=10),
        xaxis={'categoryorder': 'array', 'categoryarray': day_order}
    )
    
    # Save the figure
    fig.write_image("weekly_pattern.png", scale=2)
    
    return fig

def create_correlation_matrix(df):
    # Select numerical columns
    numeric_cols = ['requests', 'latency_ms', 'error_rate', 'cpu_utilization', 'memory_utilization']
    
    # Calculate correlation matrix
    corr_matrix = df[numeric_cols].corr()
    
    # Create heatmap
    fig = px.imshow(corr_matrix,
                    labels=dict(color="Correlation"),
                    color_continuous_scale="RdBu",
                    title="Metric Correlations")
    
    fig.update_layout(
        template="plotly_white",
        height=500,
        width=800,
        font=dict(size=10)
    )
    
    # Save the figure
    fig.write_image("correlation_matrix.png", scale=2)
    
    return fig

# Generate all plots
daily_fig = create_daily_pattern(df)
weekly_fig = create_weekly_pattern(df)
corr_fig = create_correlation_matrix(df)

# Display plots in notebook
daily_fig.show()
weekly_fig.show()
corr_fig.show()

In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def create_prediction_plots(df):
    # Prepare data
    X = df['requests'].values.reshape(-1, 1)
    y = df['latency_ms'].values
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Fit model
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    # Make predictions
    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)
    
    # Create subplots
    fig = make_subplots(
        rows=2, cols=1,
        subplot_titles=('Model Fit', 'Actual vs Predicted'),
        vertical_spacing=0.15
    )
    
    # Plot 1: Model Fit
    fig.add_trace(
        go.Scatter(x=X_train.flatten(), y=y_train, mode='markers', 
                  name='Training Data', marker=dict(color='blue', opacity=0.5)),
        row=1, col=1
    )
    
    # Add regression line
    X_line = np.linspace(X.min(), X.max(), 100).reshape(-1, 1)
    y_line = model.predict(X_line)
    fig.add_trace(
        go.Scatter(x=X_line.flatten(), y=y_line, mode='lines',
                  name='Regression Line', line=dict(color='red')),
        row=1, col=1
    )
    
    # Plot 2: Actual vs Predicted
    fig.add_trace(
        go.Scatter(x=y_test, y=y_pred_test, mode='markers',
                  name='Test Predictions', marker=dict(color='green')),
        row=2, col=1
    )
    
    # Add perfect prediction line
    min_val = min(y_test.min(), y_pred_test.min())
    max_val = max(y_test.max(), y_pred_test.max())
    fig.add_trace(
        go.Scatter(x=[min_val, max_val], y=[min_val, max_val], mode='lines',
                  name='Perfect Prediction', line=dict(color='black', dash='dash')),
        row=2, col=1
    )
    
    # Update layout
    fig.update_layout(
        height=800,
        width=800,
        showlegend=True,
        title_text="API Performance Prediction Model",
        template="plotly_white"
    )
    
    # Update axes labels
    fig.update_xaxes(title_text="Requests", row=1, col=1)
    fig.update_yaxes(title_text="Latency (ms)", row=1, col=1)
    fig.update_xaxes(title_text="Actual Latency (ms)", row=2, col=1)
    fig.update_yaxes(title_text="Predicted Latency (ms)", row=2, col=1)
    
    # Save the figure
    fig.write_image("prediction_model.png", scale=2)
    
    return fig, model

# Print model metrics
def print_model_metrics(model, X_test, y_test):
    y_pred = model.predict(X_test)
    mse = np.mean((y_test - y_pred) ** 2)
    r2 = model.score(X_test, y_test)
    
    return {
        'Mean Squared Error': round(mse, 2),
        'R-squared Score': round(r2, 3),
        'Coefficient': round(model.coef_[0], 3),
        'Intercept': round(model.intercept_, 2)
    }

# Execute everything
if __name__ == "__main__":
    # Load your data (assuming you've already generated it using the previous code)
    df = pd.read_csv('api_performance_data.csv')
    
    # Create visualization and get model
    fig, model = create_prediction_plots(df)
    
    # Get and print metrics
    X = df['requests'].values.reshape(-1, 1)
    y = df['latency_ms'].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    metrics = print_model_metrics(model, X_test, y_test)
    
    print("\nModel Metrics:")
    for metric, value in metrics.items():
        print(f"{metric}: {value}")