In [1]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import Dataset, DataLoader

In [2]:
# 1. Create Dataset Class
class PetrolPriceDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X)
        self.y = torch.FloatTensor(y)
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [3]:
# 2. Data Preparation
def prepare_data(data, sequence_length=6):
    # Drop rows with missing values
    data = data.dropna()
    
    # Select features and target
    features = ['Automotive Gas Oil (AGO)/Diesel (KES/Litre)', 
                ' Illuminating Kerosene (IK)/Kerosene (KES/Litre)',
                'Mean Exchange Rate',
                'Murban Crude Oil Prices ($ per barrel)']
    target = 'Premium Motor Spirit (PMS)/Super Petrol (KES/Litre)'
    
    # Create sequences
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[features].values[i:i+sequence_length])
        y.append(data[target].values[i+sequence_length])
    
    X = np.array(X)
    y = np.array(y)
    
    # Scale the data
    scaler_X = MinMaxScaler()
    scaler_y = MinMaxScaler()
    
    X_reshaped = X.reshape(-1, X.shape[-1])
    X_scaled = scaler_X.fit_transform(X_reshaped)
    X = X_scaled.reshape(X.shape)
    
    y = scaler_y.fit_transform(y.reshape(-1, 1))
    
    return X, y, scaler_X, scaler_y


In [4]:
# 3. Create train, validation, and test sets
def create_data_loaders(X, y, batch_size=32):
    # Split into train, validation, and test sets
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
    
    # Create datasets
    train_dataset = PetrolPriceDataset(X_train, y_train)
    val_dataset = PetrolPriceDataset(X_val, y_val)
    test_dataset = PetrolPriceDataset(X_test, y_test)
    
    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)
    
    return train_loader, val_loader, test_loader

In [5]:
# 4. Neural Network Model
class PetrolPricePredictor(nn.Module):
    def __init__(self, input_size, hidden_size=64):
        super(PetrolPricePredictor, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.dropout = nn.Dropout(0.2)
        self.fc = nn.Linear(hidden_size, 1)
        
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        last_output = lstm_out[:, -1, :]
        drop_out = self.dropout(last_output)
        output = self.fc(drop_out)
        return output

In [6]:
# 5-8. Training and Evaluation
def train_and_evaluate(model, train_loader, val_loader, test_loader, 
                      criterion, optimizer, num_epochs=100):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    
    for epoch in range(num_epochs):
        # Training
        model.train()
        train_loss = 0
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        
        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = model(batch_X)
                val_loss += criterion(outputs, batch_y).item()
        
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], '
                  f'Train Loss: {train_loss/len(train_loader):.4f}, '
                  f'Val Loss: {val_loss/len(val_loader):.4f}')
    
    # Test evaluation
    model.eval()
    test_loss = 0
    predictions = []
    actuals = []
    
    with torch.no_grad():
        for batch_X, batch_y in test_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            outputs = model(batch_X)
            test_loss += criterion(outputs, batch_y).item()
            predictions.extend(outputs.cpu().numpy())
            actuals.extend(batch_y.cpu().numpy())
    
    test_loss = test_loss/len(test_loader)
    predictions = np.array(predictions)
    actuals = np.array(actuals)
    r2_score = 1 - np.sum((predictions - actuals)**2) / np.sum((actuals - actuals.mean())**2)
    
    print(f'\nTest Loss: {test_loss:.4f}')
    print(f'R² Score: {r2_score:.4f}')
    
    return model, r2_score


In [7]:
# Main execution
def main():
    # Load and prepare data
    data = pd.read_csv(r'C:\Users\DATA-JOHN\Desktop\Fuel-prices-predictor-using-Machine-Learning\Fuel Prices in Kenya since 2010 - Sheet1.csv')
    
    # Display the first few rows and column names for verification
    print("\nFirst few rows of data:")
    print(data.head())
    print("\nColumn names:")
    print(data.columns.tolist())
    
    X, y, scaler_X, scaler_y = prepare_data(data)
    
    # Create data loaders
    train_loader, val_loader, test_loader = create_data_loaders(X, y)
    
    # Initialize model, criterion, and optimizer
    input_size = X.shape[2]  # number of features
    model = PetrolPricePredictor(input_size)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    
    # Train and evaluate model
    trained_model, r2_score = train_and_evaluate(
        model, train_loader, val_loader, test_loader, criterion, optimizer
    )
    
    return trained_model, scaler_X, scaler_y

if __name__ == "__main__":
    trained_model, scaler_X, scaler_y = main()


First few rows of data:
  Price Commencement Date  \
0              15/12/2010   
1              15/01/2011   
2              15/02/2011   
3              15/03/2011   
4              15/04/2011   

   Premium Motor Spirit (PMS)/Super Petrol (KES/Litre)  \
0                                              94.03     
1                                              95.67     
2                                              98.08     
3                                             102.44     
4                                             111.17     

   Automotive Gas Oil (AGO)/Diesel (KES/Litre)  \
0                                        87.45   
1                                        88.71   
2                                        91.72   
3                                        94.53   
4                                       107.52   

    Illuminating Kerosene (IK)/Kerosene (KES/Litre)  Mean Exchange Rate  \
0                                             75.83               80.57   
