In [None]:
# !pip install tsai fastai torch pandas numpy matplotlib seaborn

## **Extract Top 4 API**

In [None]:
from tsai.all import *
import pandas as pd
import os

# Load the dataset
file_path = "/content/API Call Dataset.csv"
df = pd.read_csv(file_path)

# Display the initial few rows and columns for verification
print("Initial Data:")
print(df.head())
print("\nColumn Names:", df.columns)

# Define the column names
api_column = "API Code"

# Count the number of calls for each API
api_call_counts = df[api_column].value_counts().nlargest(4)
top_apis = api_call_counts.index

print("\nTop 4 APIs based on the number of calls:")
print(api_call_counts)

# Create the 'top_api' directory if it doesn't exist
top_api_dir = os.path.join(output_directory, "top_api")
os.makedirs(top_api_dir, exist_ok=True)

# Filter and save each top API's data into separate CSV files in the 'top_api' directory
for api in top_apis:
    output_path = os.path.join(top_api_dir, f"{api}_data.csv")  # Changed to include top_api_dir
    api_df = df[df[api_column] == api]
    api_df.to_csv(output_path, index=False)
    print(f"Data for {api} saved to {output_path}")

Initial Data:
  API Code      Time of Call  Unnamed: 2  Unnamed: 3  Unnamed: 4  \
0       A1  01-02-2025 00:00         NaN         NaN         NaN   
1       A1  01-02-2025 00:58         NaN         NaN         NaN   
2       A1  01-02-2025 01:49         NaN         NaN         NaN   
3       A1  01-02-2025 02:01         NaN         NaN         NaN   
4       A1  01-02-2025 03:11         NaN         NaN         NaN   

                      top 4 API  Unnamed: 6 Unnamed: 7    Unnamed: 8  
0  create 5 models for each api         NaN        API  No. of Calls  
1                           NaN         NaN         A9          2451  
2                           NaN         NaN         A2          2438  
3                           NaN         NaN         A7          2410  
4                           NaN         NaN         A4          2402  

Column Names: Index(['API Code', 'Time of Call', 'Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4',
       'top 4 API', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8

## Applying 5 Models for API files
*model_names = ['MLP', 'GRU', 'FCN', 'LSTM', 'TCN']*

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from tsai.all import *
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime, timedelta
import os
from prettytable import PrettyTable

# Folder path containing the CSV files
folder_path = '/content/top_api'

# Clean and convert the 'Time of Call' column to datetime
def clean_time_column(time_str):
    cleaned = time_str.replace('A201', '').strip()
    return cleaned

# Prepare the data for time series models
def prepare_data(df, lookback=5):
    time_diffs = df['Time of Call'].diff().dt.total_seconds() / 60  # in minutes
    time_diffs = time_diffs.dropna()
    scaler = MinMaxScaler()
    time_diffs_scaled = scaler.fit_transform(time_diffs.values.reshape(-1, 1))
    X, y = [], []
    for i in range(len(time_diffs_scaled) - lookback):
        X.append(time_diffs_scaled[i:i+lookback].flatten())
        y.append(time_diffs_scaled[i+lookback])
    return np.array(X), np.array(y), scaler, lookback

# Custom PyTorch models
class MLPModel(nn.Module):
    def __init__(self, input_size, hidden_size=50, output_size=1):
        super(MLPModel, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size//2),
            nn.ReLU(),
            nn.Linear(hidden_size//2, output_size)
        )
    def forward(self, x):
        return self.layers(x)

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size=50, output_size=1):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # If input is 2D, add a sequence dimension
        if x.dim() == 2:
            x = x.unsqueeze(1)

        # LSTM output
        lstm_out, _ = self.lstm(x)

        # Take the last time step
        output = self.fc(lstm_out[:, -1, :])
        return output

class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size=50, output_size=1):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.gru = nn.GRU(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # If input is 2D, add a sequence dimension
        if x.dim() == 2:
            x = x.unsqueeze(1)

        # GRU output
        gru_out, _ = self.gru(x)

        # Take the last time step
        output = self.fc(gru_out[:, -1, :])
        return output

class TCNBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, padding=0):
        super(TCNBlock, self).__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size,
                                stride=stride, dilation=dilation, padding=padding)
        self.relu = nn.ReLU()
        self.bn = nn.BatchNorm1d(out_channels)

    def forward(self, x):
        return self.bn(self.relu(self.conv1(x)))

class TCNModel(nn.Module):
    def __init__(self, input_size, hidden_size=50, output_size=1):
        super(TCNModel, self).__init__()
        self.tcn_blocks = nn.Sequential(
            TCNBlock(input_size, hidden_size, kernel_size=3, dilation=1, padding=1),
            TCNBlock(hidden_size, hidden_size, kernel_size=3, dilation=2, padding=2)
        )
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = x.unsqueeze(1).transpose(1, 2)  # Reshape for Conv1d
        tcn_out = self.tcn_blocks(x)
        output = self.fc(tcn_out.mean(dim=2))
        return output

class FCNModel(nn.Module):
    def __init__(self, input_size, hidden_size=50, output_size=1):
        super(FCNModel, self).__init__()
        self.conv1 = nn.Conv1d(1, hidden_size, kernel_size=8)
        self.conv2 = nn.Conv1d(hidden_size, hidden_size*2, kernel_size=5)
        self.conv3 = nn.Conv1d(hidden_size*2, hidden_size, kernel_size=3)
        self.pool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Ensure input has shape [batch_size, channels, sequence_length]
        x = x.unsqueeze(1)  # Add channel dimension
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv3(x))
        x = self.pool(x).squeeze(-1)
        return self.fc(x)





# Function to train and predict with a model
def train_and_predict(model, X, y, last_timestamp, scaler):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    for epoch in range(200):
        optimizer.zero_grad()
        outputs = model(X)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
    with torch.no_grad():
        pred_scaled = model(X[-1].unsqueeze(0))
    pred_minutes = float(scaler.inverse_transform(pred_scaled.numpy())[0][0])
    next_timestamp = last_timestamp + timedelta(minutes=pred_minutes)
    return next_timestamp

# Combine predictions from all models
def combine_predictions(predictions):
    reference_time = min(predictions.values())
    minute_predictions = [
        (pred - reference_time).total_seconds() / 60
        for pred in predictions.values()
    ]
    avg_minutes = sum(minute_predictions) / len(minute_predictions)
    return reference_time + timedelta(minutes=avg_minutes)

# Initialize results table
results_table = PrettyTable(['File', 'Final Predicted Time'])

# Iterate through all CSV files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        filepath = os.path.join(folder_path, filename)
        df = pd.read_csv(filepath)
        df['Time of Call'] = pd.to_datetime(df['Time of Call'].apply(clean_time_column), format='%d-%m-%Y %H:%M')
        df = df.sort_values('Time of Call')
        X, y, scaler, seq_len = prepare_data(df)
        X_tensor = torch.tensor(X, dtype=torch.float32)
        y_tensor = torch.tensor(y, dtype=torch.float32)
        last_timestamp = df['Time of Call'].max()

        models = [
            MLPModel(X.shape[1]),
            # Add other models like GRUModel, LSTMModel, FCNModel, TCNModel here
        ]
        model_names = ['MLP', 'GRU', 'FCN', 'LSTM', 'TCN']
        model_predictions = {}
        for model, model_name in zip(models, model_names):
            try:
                prediction = train_and_predict(model, X_tensor, y_tensor, last_timestamp, scaler)
                model_predictions[model_name] = prediction
            except Exception as e:
                print(f"Error with {model_name} in {filename}: {e}")

        if model_predictions:
            final_prediction = combine_predictions(model_predictions)
            results_table.add_row([filename, final_prediction.strftime('%d-%m-%Y %H:%M')])
            print(f"File: {filename}, Final Prediction: {final_prediction.strftime('%d-%m-%Y %H:%M')}")
        else:
            print(f"File: {filename}, No valid predictions made.")

# Print final results table
print("\nSummary of Predictions:")
print(results_table)


File: A9_data.csv, Final Prediction: 06-07-2025 10:41
File: A4_data.csv, Final Prediction: 06-07-2025 11:51
File: A7_data.csv, Final Prediction: 06-07-2025 09:34
File: A2_data.csv, Final Prediction: 06-07-2025 11:19

Summary of Predictions:
+-------------+----------------------+
|     File    | Final Predicted Time |
+-------------+----------------------+
| A9_data.csv |   06-07-2025 10:41   |
| A4_data.csv |   06-07-2025 11:51   |
| A7_data.csv |   06-07-2025 09:34   |
| A2_data.csv |   06-07-2025 11:19   |
+-------------+----------------------+
