In [1]:
from zipfile import ZipFile

with ZipFile('archive.zip', 'r') as zip: 
    zip.extractall()

In [2]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import random
import numpy as np
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using '{device}' device")

Using 'cuda' device


In [4]:
df_test = pd.read_csv('DailyDelhiClimateTest.csv')
df_train = pd.read_csv('DailyDelhiClimateTrain.csv')

In [5]:
df_test

Unnamed: 0,date,meantemp,humidity,wind_speed,meanpressure
0,2017-01-01,15.913043,85.869565,2.743478,59.000000
1,2017-01-02,18.500000,77.222222,2.894444,1018.277778
2,2017-01-03,17.111111,81.888889,4.016667,1018.333333
3,2017-01-04,18.700000,70.050000,4.545000,1015.700000
4,2017-01-05,18.388889,74.944444,3.300000,1014.333333
...,...,...,...,...,...
109,2017-04-20,34.500000,27.500000,5.562500,998.625000
110,2017-04-21,34.250000,39.375000,6.962500,999.875000
111,2017-04-22,32.900000,40.900000,8.890000,1001.600000
112,2017-04-23,32.875000,27.500000,9.962500,1002.125000


In [6]:
df_train = df_train[['date', 'meantemp']]
df_test = df_test[['date', 'meantemp']]
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1462 entries, 0 to 1461
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   date      1462 non-null   object 
 1   meantemp  1462 non-null   float64
dtypes: float64(1), object(1)
memory usage: 23.0+ KB


In [7]:
def set_date_set_index(df):
    df['date'] = pd.to_datetime(df['date'])
    df.set_index('date', inplace=True)

try:
    set_date_set_index(df_train)
    set_date_set_index(df_test)
except:
    pass

df_train.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])


Unnamed: 0_level_0,meantemp
date,Unnamed: 1_level_1
2013-01-01,10.0
2013-01-02,7.4
2013-01-03,7.166667
2013-01-04,8.666667
2013-01-05,6.0


In [8]:
df_train.info()
df_test.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1462 entries, 2013-01-01 to 2017-01-01
Data columns (total 1 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   meantemp  1462 non-null   float64
dtypes: float64(1)
memory usage: 22.8 KB
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 114 entries, 2017-01-01 to 2017-04-24
Data columns (total 1 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   meantemp  114 non-null    float64
dtypes: float64(1)
memory usage: 1.8 KB


In [9]:
class SinDataset(Dataset):
    def __init__(self,df , seq_length=25):
        self.seq_length = seq_length
        df = df['meantemp'].values

        # Veriyi önceden işleme
        x_data = []

        for i in range(len(df) - seq_length):
            x_seq = df[i:i + seq_length +1]
            x_data.append(x_seq)

        x_data = np.array(x_data)

        # Veriyi tensorlara dönüştürme
        self.X = torch.Tensor(x_data).to(device)

        self.num_samples = len(self.X)

    def __len__(self):
        return self.num_samples

    def __getitem__(self, idx):
        return self.X[idx, :-1], self.X[idx, 1:]



In [10]:
seq_length = 25
hidden_size = 256
input_size = 1
output_size = 1
batch_size = 128
epochs = 1000
learning_rate = 0.001


sin_dataset = SinDataset(df=df_train,seq_length=seq_length)
sin_dataloader = DataLoader(sin_dataset, batch_size=batch_size, shuffle=True)


In [11]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True, num_layers=1)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        if torch.is_tensor(x) is False:
            x = torch.from_numpy(x)
        x = x.unsqueeze(-1).to(device)
        out, _ = self.rnn(x)
        out = self.fc(out)
        return out

In [12]:
input_size = 1
hidden_size = 32
output_size = 1

model = RNN(input_size, hidden_size, output_size).to(device)

# Kayıp fonksiyonu ve optimize edici
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [13]:
def train(model, data_loader, epochs, optimizer, loss_fn):
    model.to(device)
    iteration_losses = []
    print("=> Starting training")
    for epoch in range(epochs):
        epoch_loss = 0
        for X, Y in data_loader:
            X, Y = X.to(device), Y.to(device)
            optimizer.zero_grad()
            output = model(X)
            # Adjust dimensions of the target tensor to match the output tensor
            Y = Y.unsqueeze(-1)
            # Ensure both output and target have the same size
            if Y.size() != output.size():
                Y = Y.expand_as(output)
            # Calculate the loss
            loss = loss_fn(output, Y)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        iteration_losses.append(epoch_loss)
        print(
            f"=> Epoch: {epoch + 1}, Loss: {epoch_loss / len(data_loader):.3e}",
            end="\r",
        )
    print("\n=> Training finished")
    return iteration_losses


losses = train(model, sin_dataloader, epochs, optimizer, criterion)

=> Starting training
=> Epoch: 1000, Loss: 2.317e+00
=> Training finished


In [14]:
def plot_loss_over_iterations(losses):
    fig = go.Figure()
    fig.add_trace(
        go.Scatter(
            x=np.arange(len(losses)),
            y=losses,
            mode="lines",
            name="Loss",
            line=dict(width=3),
        ),
    )
    fig.update_yaxes(type="log")
    fig.update_layout(
        title="Loss Over Iterations",
        xaxis_title="Iteration",
        yaxis_title="Loss (log scale)",
        template="plotly_dark",
    )
    fig.show()


def plot_predictions(real_values, predicted_values):
    fig = go.Figure()
    fig.add_trace(
        go.Scatter(
            x=np.arange(len(real_values)),
            y=real_values,
            mode="lines",
            name="Real",
            line=dict(width=3),
        )
    )
    fig.add_trace(
        go.Scatter(
            x=np.arange(len(predicted_values)),
            y=predicted_values,
            mode="markers+lines",
            name="Predicted",
            line=dict(width=3),
        )
    )
    fig.update_layout(
        title="Real vs Predicted Values",
        xaxis_title="Time Step",
        yaxis_title="Value",
        template="plotly_dark",
    )
    fig.show()

In [15]:
# Generate predictions for the first sequence in the dataset
with torch.no_grad():
    input_seq, target_seq = sin_dataset[0]
    input_seq = input_seq.unsqueeze(0).to(device)
    predicted_seq = model(input_seq)

plot_predictions(
    target_seq.cpu().numpy(), predicted_seq.cpu().numpy().flatten()
)
plot_loss_over_iterations(losses)

In [29]:

test_dataset = SinDataset(df=df_test,seq_length=seq_length)
input_seq, target_seq = sin_dataset[0]
input = torch.zeros_like(input_seq)
print(input_seq)
input[0] = input_seq[-1]
input

tensor([10.0000,  7.4000,  7.1667,  8.6667,  6.0000,  7.0000,  7.0000,  8.8571,
        14.0000, 11.0000, 15.7143, 14.0000, 15.8333, 12.8333, 14.7143, 13.8333,
        16.5000, 13.8333, 12.5000, 11.2857, 11.2000,  9.5000, 14.0000, 13.8333,
        12.2500], device='cuda:0')


tensor([12.2500,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0')

In [30]:
# Generate predictions for the first sequence in the dataset
with torch.no_grad():
    input_seq, target_seq = sin_dataset[0]
    input = torch.zeros_like(input_seq)
    input[0] = input_seq[-1]

    input = input.unsqueeze(0).to(device)
    predicted_seq = model(input)

plot_predictions(
    target_seq.cpu().numpy(), predicted_seq.cpu().numpy().flatten()
)

In [18]:
print(target_seq)
print(predicted_seq)

tensor([ 7.4000,  7.1667,  8.6667,  6.0000,  7.0000,  7.0000,  8.8571, 14.0000,
        11.0000, 15.7143, 14.0000, 15.8333, 12.8333, 14.7143, 13.8333, 16.5000,
        13.8333, 12.5000, 11.2857, 11.2000,  9.5000, 14.0000, 13.8333, 12.2500,
        12.6667], device='cuda:0')
tensor([[[10.6900],
         [ 8.8678],
         [ 9.1125],
         [ 9.7084],
         [ 8.1229],
         [ 9.2586],
         [ 8.0840],
         [10.5132],
         [13.1885],
         [11.7310],
         [14.8726],
         [14.2828],
         [14.7757],
         [12.6430],
         [14.0775],
         [14.6074],
         [15.4027],
         [13.2987],
         [12.7678],
         [12.2752],
         [11.6204],
         [10.3794],
         [13.3677],
         [14.1953],
         [12.2966]]], device='cuda:0')


In [19]:
df_train.index.max()

Timestamp('2017-01-01 00:00:00')

In [20]:
# Create future dataframe
future = pd.date_range('2017-01-01','2018-01-01', freq='1D')
future_df = pd.DataFrame(index=future)
future_df['isFuture'] = True
df_train['isFuture'] = False
df_and_future = pd.concat([df_train, future_df])

future_w_features = df_and_future.query('isFuture').copy()

In [21]:
df_and_future

Unnamed: 0,meantemp,isFuture
2013-01-01,10.000000,False
2013-01-02,7.400000,False
2013-01-03,7.166667,False
2013-01-04,8.666667,False
2013-01-05,6.000000,False
...,...,...
2017-12-28,,True
2017-12-29,,True
2017-12-30,,True
2017-12-31,,True


In [22]:
x, y = sin_dataset[0]
x.shape

torch.Size([25])

In [23]:
future_w_features['pred'] = model(future_w_features[['meantemp']].values)

ValueError: input must have the type torch.float32, got type torch.float64