In [3]:
import numpy as np
import pandas as pd
import torch
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split


df = pd.read_csv('train.csv')
df['Date'] = pd.to_datetime(df['Full date'], format="%Y-%m-%d")
df['day_of_year'] = df['Date'].dt.dayofyear
df['sine_day'] = np.sin(2 * np.pi * df['day_of_year'] / 365)
df['cosine_day'] = np.cos(2 * np.pi * df['day_of_year'] / 365)

df = df.drop(columns=['Date', 'day_of_year'])
#print(df)
features = ['sine_day', 'cosine_day']  
target = ['Temp Max', 'Temp Min']


scaler = MinMaxScaler()
df[features + target] = scaler.fit_transform(df[features + target])

sequence_length = 21
X, y = [], []
for i in range(sequence_length, len(df)):
    X.append(df[features].iloc[i-sequence_length:i].values)
    y.append(df[target].iloc[i].values)

X, y = np.array(X), np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

print("Data preprocessing complete.")
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")


Data preprocessing complete.
X_train shape: torch.Size([17223, 21, 2]), y_train shape: torch.Size([17223, 2])
X_test shape: torch.Size([4306, 21, 2]), y_test shape: torch.Size([4306, 2])


In [4]:
import torch
print(torch.cuda.is_available())


True


In [5]:
from torch.utils.data import DataLoader, Dataset

class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = TimeSeriesDataset(X_train, y_train)
test_dataset = TimeSeriesDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)


In [6]:
import torch
import torch.nn as nn
class LSTMModel(nn.Module):
    def __init__(self):
        super(LSTMModel, self).__init__()
        self.lstm1 = nn.LSTM(input_size=2, hidden_size=64, batch_first=True)  
        self.dropout1 = nn.Dropout(0.2)
        self.lstm2 = nn.LSTM(input_size=64, hidden_size=32, batch_first=True)  
        self.dropout2 = nn.Dropout(0.2)
        self.fc1 = nn.Linear(32, 32)  
        self.fc2 = nn.Linear(32, 2)    

    def forward(self, x):
        x, _ = self.lstm1(x)  
        x = self.dropout1(x)
        
        x, _ = self.lstm2(x)  
        x = self.dropout2(x)

        x = x[:, -1, :]  
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [7]:
import numpy as np
import torch
import torch.nn as nn
from sklearn.metrics import r2_score

def calculate_r2(y_true, y_pred):
    return r2_score(y_true, y_pred)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = LSTMModel().to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    epoch_losses = []
    
    total_predictions = []
    total_targets = []

    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)

        outputs = model(inputs)
        loss = criterion(outputs, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_losses.append(loss.item())

        total_predictions.append(outputs.cpu().detach().numpy())
        total_targets.append(targets.cpu().detach().numpy())

    avg_loss = np.mean(epoch_losses)

    total_predictions = np.concatenate(total_predictions)
    total_targets = np.concatenate(total_targets)

    r2 = calculate_r2(total_targets, total_predictions)

    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}, R²: {r2:.4f}')


Epoch [1/50], Loss: 0.0133, R²: 0.5488
Epoch [2/50], Loss: 0.0066, R²: 0.7766
Epoch [3/50], Loss: 0.0062, R²: 0.7891
Epoch [4/50], Loss: 0.0061, R²: 0.7919
Epoch [5/50], Loss: 0.0060, R²: 0.7949
Epoch [6/50], Loss: 0.0059, R²: 0.7984
Epoch [7/50], Loss: 0.0059, R²: 0.7984
Epoch [8/50], Loss: 0.0058, R²: 0.8010
Epoch [9/50], Loss: 0.0058, R²: 0.8013
Epoch [10/50], Loss: 0.0058, R²: 0.8032
Epoch [11/50], Loss: 0.0058, R²: 0.8035
Epoch [12/50], Loss: 0.0057, R²: 0.8040
Epoch [13/50], Loss: 0.0057, R²: 0.8052
Epoch [14/50], Loss: 0.0057, R²: 0.8055
Epoch [15/50], Loss: 0.0057, R²: 0.8053
Epoch [16/50], Loss: 0.0057, R²: 0.8059
Epoch [17/50], Loss: 0.0057, R²: 0.8057
Epoch [18/50], Loss: 0.0057, R²: 0.8060
Epoch [19/50], Loss: 0.0056, R²: 0.8078
Epoch [20/50], Loss: 0.0056, R²: 0.8086
Epoch [21/50], Loss: 0.0057, R²: 0.8075
Epoch [22/50], Loss: 0.0056, R²: 0.8077
Epoch [23/50], Loss: 0.0056, R²: 0.8082
Epoch [24/50], Loss: 0.0056, R²: 0.8082
Epoch [25/50], Loss: 0.0056, R²: 0.8087
Epoch [26

In [8]:
from sklearn.metrics import r2_score

# Evaluation
model.eval()
with torch.no_grad():
    test_losses = []
    predictions = []
    targets_list = []  
    
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        
        test_losses.append(loss.item())
        
        
        predictions.append(outputs.cpu().numpy())
        targets_list.append(targets.cpu().numpy())


test_loss = np.mean(test_losses)
print("Test Loss:", test_loss)

predictions = np.concatenate(predictions)
targets = np.concatenate(targets_list)

r2 = r2_score(targets, predictions)
print("R² Score:", r2)



Test Loss: 0.006498437132945077
R² Score: 0.76789391040802


In [9]:
import pandas as pd
import numpy as np
import torch
from sklearn.preprocessing import MinMaxScaler

train_df = pd.read_csv('train.csv')
print("Columns in training data:", train_df.columns)  

if 'Date' not in train_df.columns or 'Temp Max' not in train_df.columns or 'Temp Min' not in train_df.columns:
    raise KeyError("Columns 'Date', 'Temp Max', or 'Temp Min' are missing from the training data")

train_df['Date'] = pd.to_datetime(train_df['Date'], format="%d-%m-%Y")
train_df['day_of_year'] = train_df['Date'].dt.dayofyear
train_df['sine_day'] = np.sin(2 * np.pi * train_df['day_of_year'] / 365)
train_df['cosine_day'] = np.cos(2 * np.pi * train_df['day_of_year'] / 365)

features = ['sine_day', 'cosine_day']
target = ['Temp Max', 'Temp Min']

feature_scaler = MinMaxScaler()
target_scaler = MinMaxScaler()

train_df[features] = feature_scaler.fit_transform(train_df[features])

train_df[target] = target_scaler.fit_transform(train_df[target])

test_df = pd.read_csv('test.csv')
test_df['Date'] = pd.to_datetime(test_df['Date'], format="%d-%m-%Y")

test_df['day_of_year'] = test_df['Date'].dt.dayofyear
test_df['sine_day'] = np.sin(2 * np.pi * test_df['day_of_year'] / 365)
test_df['cosine_day'] = np.cos(2 * np.pi * test_df['day_of_year'] / 365)

test_df[features] = feature_scaler.transform(test_df[features])

sequence_length = 21
X_new = []
dates_for_predictions = []

for i in range(sequence_length, len(test_df)):
    X_new.append(test_df[features].iloc[i-sequence_length:i].values)
    dates_for_predictions.append(test_df['Date'].iloc[i])

X_new = np.array(X_new)
X_new = torch.tensor(X_new, dtype=torch.float32).to(device)

model.eval()
with torch.no_grad():
    predictions = model(X_new)

predictions = predictions.cpu().numpy()
predictions_original_scale = target_scaler.inverse_transform(predictions)

output_df = pd.DataFrame(predictions_original_scale, columns=['Temp Max', 'Temp Min'])
output_df['Date'] = dates_for_predictions
output_df = output_df.sort_values(by='Date').reset_index(drop=True)

output_df['Date'] = output_df['Date'].dt.strftime('%d-%m-%Y')
output_df.to_csv('predicted_temperatures.csv', index=False)
print("Predictions saved to 'predicted_temperatures.csv'")

Columns in training data: Index(['id', 'Date', 'Rain', 'Temp Max', 'Temp Min', 'Full date', 'Year',
       'Month'],
      dtype='object')


ValueError: time data "1" doesn't match format "%d-%m-%Y", at position 0. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.