In [1]:
import torch
import re
import math
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer
from tqdm.notebook import tqdm
import numpy as np
import json
import os

In [2]:
# Set random seed for reproducibility
torch.manual_seed(42)

# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained('bert-base-german-cased')

# Special tokens for text substitution
special_tokens = {
    'additional_special_tokens': ['<city>','<temp>','<date>','<velocity>','<percentile>','<rainfall>']
}

# Add special tokens into tokenizer
tokenizer.add_special_tokens(special_tokens)

6

In [3]:
class WeatherDataset(Dataset):
    def __init__(self, weather_data, max_length=100):
        self.data = [weather_data] if isinstance(weather_data, dict) else weather_data
        self.max_length = max_length
        
        # Initialize wind directions from all data points
        self.wind_directions = sorted(list(set([d for data in self.data for d in data['windrichtung']])))
        self.wind_dir_to_idx = {d: i for i, d in enumerate(self.wind_directions)}
        
        # Calculate expected feature dimension
        self.feature_dim = (
            1 +  # temperature
            1 +  # rain risk
            1 +  # rain amount
            1 +  # wind speed
            1 +  # pressure
            1 +  # humidity
            1 +  # cloudiness
            len(self.wind_directions) +  # one-hot wind directions
            2 +  # time encoding (sin, cos)
            3 +  # sun features
            1    # sun hours
        )
        
    def replace_dates(self, text: str) -> str:
        text = re.sub(r"\b\d{1,2}\.\d{1,2}\.\d{4}\b", "<date>", text)
        return text
    
    def replace_city_and_units(self, text: str, city: str) -> str:
        text = re.sub(city, '<city>', text)
        unit_patterns = [
        # TEMPERATURE
        (r'(°[ ]*C|Grad)', ' <temp>'),
        # VELOCITY
        (r'[ ]*km/h', ' <velocity>'),
        # PERCENTILE
        (r'[ ]*%', ' <percentile>'),
        # RAINFALL DELTA
        # (r'\d+\.\d+ bis \d+\.\d+[ ]*l\/m²', '<rainfall> bis <rainfall>'),
        (r'[ ]*l\/m²', ' <rainfall>')
        ]
        for pattern, replacement in unit_patterns:
            try:
                text = re.sub(pattern, replacement, text)
            except Exception:
                continue

        # REMOVE MARKUP
        text = re.sub(r'\**', '', text)

        # REMOVE WEIRD PUNCUATION
        text = re.sub(r' \.', '.', text)

        # REMOVE UNNECESSARY NEWLINES
        text = re.sub(r'\n\n', '\n', text)

        # REMOVE SPACE AFTER NEWLINE
        text = re.sub(r'\n ', '\n', text)

        # REPLACE MULTIPLE WHITESPACES WITH ONE
        text = re.sub(r' +', ' ', text)
        return text

    def _parse_time(self, time_str):
        """Parse time string and handle missing data"""
        if time_str == '-' or not time_str:
            return None
        try:
            # Handle "HH:MM Uhr" format
            if ':' in time_str:
                hour, minute = map(int, time_str.split(' ')[0].split(':'))
                return hour + minute/60
            return None
        except (ValueError, IndexError):
            return None

    def _encode_time(self, time_str):
        # Convert "HH - HH Uhr" to cyclic features
        try:
            start_hour = int(time_str.split(' - ')[0])
            hour_sin = torch.sin(torch.tensor(2 * math.pi * start_hour / 24))
            hour_cos = torch.cos(torch.tensor(2 * math.pi * start_hour / 24))
            return torch.tensor([hour_sin, hour_cos])
        except (ValueError, IndexError):
            # Return neutral values for invalid time
            return torch.tensor([0.0, 1.0])
        
    def __len__(self):
        return len(self.data)
    
    def _encode_sun_info(self, sunrise, sunset, current_time):
        # Parse times, handling missing data
        sunrise_hour = self._parse_time(sunrise)
        sunset_hour = self._parse_time(sunset)
        
        try:
            current_hour = float(current_time.split(' - ')[0])
        except (ValueError, IndexError):
            # Return default values if current time is invalid
            return torch.tensor([0.0, 0.0, 0.0])
        
        # If sunrise or sunset is missing, use approximate values based on season
        if sunrise_hour is None or sunset_hour is None:
            # Return default encoding indicating uncertainty
            return torch.tensor([
                0.5,  # Unknown daylight status
                0.0,  # Neutral time since sunrise
                0.0   # Neutral time until sunset
            ])
        
        # Calculate daylight features
        is_daylight = (current_hour >= sunrise_hour) and (current_hour <= sunset_hour)
        
        if is_daylight:
            time_since_sunrise = (current_hour - sunrise_hour) / (sunset_hour - sunrise_hour)
            time_until_sunset = (sunset_hour - current_hour) / (sunset_hour - sunrise_hour)
        else:
            if current_hour < sunrise_hour:
                time_since_sunrise = -1 * (sunrise_hour - current_hour) / (24 - sunset_hour + sunrise_hour)
                time_until_sunset = -1
            else:
                time_since_sunrise = -1
                time_until_sunset = -1 * (current_hour - sunset_hour) / (24 - sunset_hour + sunrise_hour)
        
        return torch.tensor([float(is_daylight), time_since_sunrise, time_until_sunset])

    def one_hot_wind(self, wind_dir):
        encoding = torch.zeros(len(self.wind_directions))
        encoding[self.wind_dir_to_idx[wind_dir]] = 1
        return encoding
    
    def __getitem__(self, idx):
        item = self.data[idx]
        
        # Get sequence length from the data
        seq_len = len(item['temperatur_in_deg_C'])
        
        # Initialize features tensor with correct shape
        features = torch.zeros((seq_len, self.feature_dim))
        
        # Fill features one by one, maintaining consistent shapes
        current_idx = 0
        
        # Numerical features - all should be shape [seq_len, 1]
        features[:, current_idx] = torch.tensor([float(t) for t in item['temperatur_in_deg_C']])
        current_idx += 1
        
        features[:, current_idx] = torch.tensor([float(r) for r in item['niederschlagsrisiko_in_perc']])
        current_idx += 1
        
        # Handle rain amount with forward filling for NaN values
        rain_values = []
        last_valid = 0.0
        for r in item['niederschlagsmenge_in_l_per_sqm']:
            try:
                val = float(r)
                if not torch.isnan(torch.tensor(val)):
                    last_valid = val
                rain_values.append(last_valid)
            except ValueError:
                rain_values.append(last_valid)
        features[:, current_idx] = torch.tensor(rain_values)
        current_idx += 1
        
        features[:, current_idx] = torch.tensor([float(w) for w in item['windgeschwindigkeit_in_km_per_s']])
        current_idx += 1
        
        features[:, current_idx] = torch.tensor([float(p) for p in item['luftdruck_in_hpa']])
        current_idx += 1
        
        features[:, current_idx] = torch.tensor([float(h) for h in item['relative_feuchte_in_perc']])
        current_idx += 1
        
        features[:, current_idx] = torch.tensor([float(c.split('/')[0]) / 8 for c in item['bewölkungsgrad']])
        current_idx += 1
        
        # Wind directions (one-hot encoded)
        wind_features = torch.stack([self.one_hot_wind(w) for w in item['windrichtung']])
        features[:, current_idx:current_idx + len(self.wind_directions)] = wind_features
        current_idx += len(self.wind_directions)
        
        # Time features
        time_features = torch.stack([self._encode_time(t) for t in item['times']])
        features[:, current_idx:current_idx + 2] = time_features
        current_idx += 2
        
        # Sun features
        sun_features = torch.stack([
            self._encode_sun_info(
                item.get('sunrise', '-'), 
                item.get('sundown', '-'), 
                t
            ) for t in item['times']
        ])
        features[:, current_idx:current_idx + 3] = sun_features
        current_idx += 3
        
        # Sun hours feature
        sun_hours = torch.tensor([1.0 if "fast nicht zu sehen" in item.get('sunhours', '') else 0.0])
        features[:, current_idx] = sun_hours.expand(seq_len)
        
        return {
            'features': features,
            'text': self.replace_dates(self.replace_city_and_units(item['report_long'], item['city']))
        }

In [4]:
if __name__=='__main__':
    
    # change directory if not on root
    if str(os.getcwd()).endswith('LLamas') == False:
        os.chdir('../..')
    
    # check if dict contains correct key
    # def check_file(path):
    #     with open(path, 'r', encoding='utf-8') as f:
    #         data: dict = json.load(f)
    #         if 'gpt_rewritten_v2' in data.keys():
    #             return True
    #         else:
    #             return False

    def check_file(path):
        """
        Checks if the file contains valid keys and values.
        Returns True if the file should be loaded, False otherwise.
        """
        with open(path, 'r', encoding='utf-8') as f:
            try:
                data: dict = json.load(f)
            except json.JSONDecodeError:
                print(f"Invalid JSON in file: {path}")
                return False

            # Ensure required keys are present
            if not {'report_long', 'city'}.issubset(data.keys()):
                return False

            # Check if 'city' and 'report_long' have valid non-empty values
            if not isinstance(data['city'], str) or not data['city'].strip():
                return False
            if not isinstance(data['report_long'], str) or not data['report_long'].strip():
                return False

            return True
                
    # transform data into weatherDataSet class format
    def load_data(path):
        with open(path, 'r', encoding='utf-8') as f:
            data: dict = json.load(f)
            return data
    
    # files for reading
    files = os.listdir(os.path.join(os.getcwd(), 'data', 'files_for_chatGPT', '2024-12-12'))
    files = {(file.split('-')[-1]).split('_')[0]:load_data(os.path.join(os.getcwd(), 'data', 'files_for_chatGPT', '2024-12-12', file)) for file in tqdm(files) if check_file(os.path.join(os.getcwd(), 'data', 'files_for_chatGPT', '2024-12-12', file))}

  0%|          | 0/28812 [00:00<?, ?it/s]

In [5]:
weather_data = list(files.values())
dataset = WeatherDataset(weather_data, max_length=100)
def validate_and_clean_weather_data(weather_data, dataset_class):
    """
    Validates the dataset and returns cleaned weather_data with problematic samples removed.
    
    Args:
        weather_data: List of weather data samples
        dataset_class: The dataset class constructor to use for validation
    
    Returns:
        tuple: (cleaned_weather_data, removed_indices, validation_summary)
    """
    # Create temporary dataset for validation
    temp_dataset = dataset_class(weather_data, max_length=100)
    
    summary = {
        'total_samples': len(temp_dataset),
        'invalid_samples': [],
        'statistics': {
            'nan_count': 0,
            'inf_count': 0,
            'extreme_values': 0
        }
    }
    
    invalid_indices = set()
    
    # Validate each sample
    for idx in range(len(temp_dataset)):
        try:
            sample = temp_dataset[idx]
            features = sample['features']
            
            has_issue = False
            
            # Check for NaN values
            nan_mask = torch.isnan(features)
            if nan_mask.any():
                summary['statistics']['nan_count'] += nan_mask.sum().item()
                has_issue = True
                
            # Check for infinity
            inf_mask = torch.isinf(features)
            if inf_mask.any():
                summary['statistics']['inf_count'] += inf_mask.sum().item()
                has_issue = True
                
            # Check for extreme values
            extreme_mask = (features.abs() > 1e6)
            if extreme_mask.any():
                summary['statistics']['extreme_values'] += extreme_mask.sum().item()
                has_issue = True
            
            if has_issue:
                invalid_indices.add(idx)
                summary['invalid_samples'].append({
                    'index': idx,
                    'text': sample['text']
                })
                
        except Exception as e:
            print(f"Error processing sample {idx}: {str(e)}")
            invalid_indices.add(idx)
            summary['invalid_samples'].append({
                'index': idx,
                'error_message': str(e)
            })
    
    # Create cleaned weather_data list
    cleaned_weather_data = [
        data for idx, data in enumerate(weather_data) 
        if idx not in invalid_indices
    ]
    
    # Print summary
    print("\nValidation Summary:")
    print(f"Total samples: {summary['total_samples']}")
    print(f"Samples with issues: {len(summary['invalid_samples'])}")
    print(f"Total NaN values: {summary['statistics']['nan_count']}")
    print(f"Total infinite values: {summary['statistics']['inf_count']}")
    print(f"Total extreme values: {summary['statistics']['extreme_values']}")
    print(f"\nRemoved {len(invalid_indices)} samples")
    print(f"Remaining samples: {len(cleaned_weather_data)}")
    
    return cleaned_weather_data, list(invalid_indices), summary

# Example usage function
def clean_and_create_dataset(weather_data, dataset_class):
    """
    Cleans the weather data and creates a new dataset.
    
    Args:
        weather_data: Original weather data list
        dataset_class: Dataset class to use
    
    Returns:
        tuple: (new_dataset, removed_indices, validation_summary)
    """
    cleaned_data, removed_indices, summary = validate_and_clean_weather_data(
        weather_data, dataset_class
    )
    
    # Create new dataset with cleaned data
    clean_dataset = dataset_class(cleaned_data, max_length=100)
    
    return clean_dataset, removed_indices, summary

# Clean the data and create new dataset
clean_dataset, removed_indices, summary = clean_and_create_dataset(weather_data, WeatherDataset)


Validation Summary:
Total samples: 28811
Samples with issues: 43
Total NaN values: 454
Total infinite values: 0
Total extreme values: 0

Removed 43 samples
Remaining samples: 28768


# ANOMALY DETECTION RUN

In [None]:
# Create DataLoader with the corrected collate function
def create_dataloader(dataset, batch_size, tokenizer):
    return DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=True,
        collate_fn=lambda batch: prepare_batch(batch, tokenizer)
    )

class WeatherGRU(nn.Module):
    def __init__(self, feature_dim, vocab_size, embedding_dim=256, hidden_dim=512, n_layers=2, dropout=0.1):
        super().__init__()
        
        self.timestep_feature_dim = feature_dim

        # Add layer normalization for better stability
        self.feature_encoder = nn.Sequential(
            nn.LayerNorm(self.timestep_feature_dim),
            nn.Linear(self.timestep_feature_dim, embedding_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.LayerNorm(embedding_dim),
            nn.Linear(embedding_dim, embedding_dim),
            nn.ReLU(),
            nn.Dropout(dropout)
        )
        
        # Initialize embedding with Xavier/Glorot initialization
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        nn.init.xavier_uniform_(self.embedding.weight)
        
        # Add gradient clipping to GRU
        self.gru = nn.GRU(
            input_size=embedding_dim,
            hidden_size=hidden_dim,
            num_layers=n_layers,
            dropout=dropout if n_layers > 1 else 0,
            batch_first=True
        )
        
        # Initialize GRU weights
        for name, param in self.gru.named_parameters():
            if 'weight' in name:
                nn.init.orthogonal_(param)
            elif 'bias' in name:
                nn.init.zeros_(param)
        
        self.feature_projection = nn.Sequential(
            nn.LayerNorm(self.timestep_feature_dim),
            nn.Linear(self.timestep_feature_dim, hidden_dim)
        )
        
        # Add layer normalization before final projection
        self.output_layer = nn.Sequential(
            nn.LayerNorm(hidden_dim),
            nn.Linear(hidden_dim, vocab_size)
        )
        
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
    def forward(self, features, tokens, teacher_forcing_ratio=1.0):
        # Add input validation
        if torch.isnan(features).any():
            raise ValueError("NaN detected in input features")
        if torch.isinf(features).any():
            raise ValueError("Inf detected in input features")
            
        batch_size = features.size(0)
        seq_len = features.size(1)
        max_len = tokens.size(1) - 1
        
        # Scale features to prevent extreme values
        features = torch.clamp(features, -10, 10)
        
        features_reshaped = features.view(-1, self.timestep_feature_dim)
        encoded_features = self.feature_encoder(features_reshaped)
        encoded_features = encoded_features.view(batch_size, seq_len, -1)
        
        # Initialize hidden state with scaled features
        h_0 = self.feature_projection(features[:, 0])
        h_0 = torch.tanh(h_0)  # Ensure values are in [-1, 1]
        h_0 = h_0.unsqueeze(0).expand(self.n_layers, batch_size, self.hidden_dim).contiguous()
        
        outputs = torch.zeros(batch_size, max_len, self.output_layer[-1].out_features, device=features.device)
        decoder_input = tokens[:, 0].unsqueeze(1)
        
        for t in range(max_len):
            token_emb = self.embedding(decoder_input)
            current_features = encoded_features[:, min(t, seq_len-1)].unsqueeze(1)
            
            # Scale combined input
            combined_input = (token_emb + current_features) / 2
            
            output, h_0 = self.gru(combined_input, h_0)
            
            # Check for NaN in hidden state
            if torch.isnan(h_0).any():
                raise ValueError(f"NaN detected in hidden state at timestep {t}")
                
            prediction = self.output_layer(output)
            outputs[:, t:t+1] = prediction
            
            teacher_force = torch.rand(1).item() < teacher_forcing_ratio
            decoder_input = tokens[:, t+1].unsqueeze(1) if teacher_force else prediction.argmax(dim=-1)
        
        return outputs

    # Add this method to the WeatherGRU class:
    def generate(self, features, max_length=100, temperature=0.7):
        """
        Generate text from features.
        
        Args:
            features: Input features tensor [batch_size, seq_len, feature_dim]
            max_length: Maximum length of generated sequence
            temperature: Sampling temperature (higher = more random)
        
        Returns:
            list: Generated token indices
        """
        self.eval()
        with torch.no_grad():
            batch_size = features.size(0)
            seq_len = features.size(1)
            
            # Scale features
            features = torch.clamp(features, -10, 10)
            
            # Process features
            features_reshaped = features.view(-1, self.timestep_feature_dim)
            encoded_features = self.feature_encoder(features_reshaped)
            encoded_features = encoded_features.view(batch_size, seq_len, -1)
            
            # Initialize hidden state
            h_0 = self.feature_projection(features[:, 0])
            h_0 = torch.tanh(h_0)
            h_0 = h_0.unsqueeze(0).expand(self.n_layers, batch_size, self.hidden_dim).contiguous()
            
            # Ensure decoder_input is a 2D tensor
            decoder_input = torch.full((batch_size, 1), tokenizer.cls_token_id, dtype=torch.long, device=features.device)
            
            generated_tokens = []
            
            for t in range(max_length):
                token_emb = self.embedding(decoder_input)
                current_features = encoded_features[:, min(t, seq_len-1)].unsqueeze(1)
                combined_input = (token_emb + current_features) / 2
                
                output, h_0 = self.gru(combined_input, h_0)
                logits = self.output_layer(output)
                
                # Apply temperature
                logits = logits.squeeze(1) / temperature
                
                # Sample from the distribution
                probs = F.softmax(logits, dim=-1)
                next_token = torch.multinomial(probs, 1)
                
                generated_tokens.append(next_token)
                
                # Stop if we hit the SEP token
                if (next_token == tokenizer.sep_token_id).any():
                    break
                    
                decoder_input = next_token
            
            # Concatenate generated tokens
            generated_tokens = torch.cat(generated_tokens, dim=1)
            return generated_tokens

def prepare_batch(batch_list, tokenizer):
    features = torch.stack([item['features'] for item in batch_list])
    texts = [item['text'] for item in batch_list]
    
    # Normalize features
    features = (features - features.mean()) / (features.std() + 1e-8)
    
    encoded = tokenizer(
        texts,
        padding=True,
        truncation=True,
        return_tensors='pt'
    )
    
    return {
        'features': features,
        'text': encoded['input_ids']
    }

def train_epoch(model, dataloader, criterion, optimizer, device, teacher_forcing_ratio=1.0, epoch=0, total_epochs=1):
    model.train()
    total_loss = 0
    num_batches = 0
    pbar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{total_epochs}")
    
    for batch_idx, batch in enumerate(pbar):
        try:
            features = batch['features'].to(device)
            text = batch['text'].to(device)
            
            # Check for invalid values in inputs
            if torch.isnan(features).any() or torch.isinf(features).any():
                print(f"Warning: Invalid values in features at batch {batch_idx}")
                continue
                
            optimizer.zero_grad()
            
            # Forward pass with gradient checking
            with torch.autograd.detect_anomaly():
                outputs = model(features, text, teacher_forcing_ratio)
                outputs = outputs.view(-1, outputs.size(-1))
                targets = text[:, 1:].contiguous().view(-1)
                
                loss = criterion(outputs, targets)
                
                # Check if loss is valid
                if torch.isnan(loss) or torch.isinf(loss):
                    print(f"Warning: Invalid loss value {loss.item()} at batch {batch_idx}")
                    print("Last output values:", outputs[-5:])
                    print("Last target values:", targets[-5:])
                    raise ValueError("Invalid loss detected")
                
                loss.backward()
                
                # Clip gradients
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                
                # Check gradients
                for name, param in model.named_parameters():
                    if param.grad is not None:
                        grad_norm = param.grad.norm()
                        if torch.isnan(grad_norm) or torch.isinf(grad_norm):
                            print(f"Warning: Invalid gradient for {name}")
                            raise ValueError(f"Invalid gradient detected in {name}")
                
                optimizer.step()
                
                total_loss += loss.item()
                num_batches += 1
                avg_loss = total_loss / num_batches
                pbar.set_postfix({"Loss": f"{avg_loss:.4f}"})
                
        except ValueError as e:
            print(f"Error in batch {batch_idx}: {str(e)}")
            continue
            
    return total_loss / num_batches if num_batches > 0 else float('inf')

# Training setup with adjusted hyperparameters
def train_model(model, dataset, tokenizer, num_epochs=10, batch_size=16, learning_rate=1e-4):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    
    # Use a smaller batch size and learning rate for stability
    dataloader = create_dataloader(clean_dataset, batch_size=batch_size, tokenizer=tokenizer)
    
    criterion = nn.CrossEntropyLoss(ignore_index=tokenizer.pad_token_id, reduction='mean')
    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=0.01)
    
    # Add learning rate scheduler
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=2, verbose=True
    )
    
    losses = []
    
    # Select a few examples for generation testing
    test_indices = [0, len(dataset)//2, len(dataset)-1]  # Beginning, middle, and end
    test_samples = [dataset[i] for i in test_indices]
    test_features = torch.stack([sample['features'] for sample in test_samples]).to(device)
    
    print("\nOriginal texts for test samples:")
    for idx, sample in zip(test_indices, test_samples):
        print(f"Sample {idx}: {sample['text']}")
    
    for epoch in range(num_epochs):
        try:
            loss = train_epoch(
                model, dataloader, criterion, optimizer, device,
                teacher_forcing_ratio=0.9,
                epoch=epoch, total_epochs=num_epochs
            )
            losses.append(loss)
            
            scheduler.step(loss)
            print(f"\nEpoch {epoch + 1}, Loss: {loss:.4f}")
            
            # Generate examples
            print("\nGenerated examples:")
            model.eval()
            with torch.no_grad():
                generated_tokens = model.generate(test_features, temperature=0.7)
                for idx, tokens in enumerate(generated_tokens):
                    generated_text = tokenizer.decode(tokens, skip_special_tokens=False)
                    print(f"Sample {test_indices[idx]}: {generated_text}")
            model.train()
            
            # Save checkpoint
            if (epoch + 1) % 5 == 0:
                torch.save({
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': loss,
                }, f'checkpoint_epoch_{epoch+1}.pt')
                
        except Exception as e:
            print(f"Error during epoch {epoch + 1}: {str(e)}")
            continue
    
    return losses

# Usage example:
model = WeatherGRU(
    feature_dim=22,
    vocab_size=len(tokenizer),
    embedding_dim=64,
    hidden_dim=128,
    n_layers=2
)

losses = train_model(
    model=model,
    dataset=dataset,
    tokenizer=tokenizer,
    num_epochs=2,
    batch_size=64,
    learning_rate=1e-3
)


Original texts for test samples:
Sample 0: Wetter heute, <date> In <city> stören am Morgen nur einzelne Wolken den sonst blauen Himmel bei Temperaturen von 23 <temp>. Mittags stören nur einzelne Wolken den sonst blauen Himmel und die Temperaturen erreichen 30 <temp>. Abends gibt es in <city> lockere Bewölkung bei Temperaturen von 24 bis 26 <temp>. In der Nacht bedecken einzelne Wolken den Himmel und die Werte gehen auf 22 <temp> zurück. Die gefühlten Temperaturen liegen bei 23 bis 33 <temp>. <city> liegt in der Region Karibik. Dort finden Sie eine Wettervorhersage für die gesamte Region.
Sample 14405: Wetter heute, <date> In <city> wird am Morgen die Sonne von einzelnen Wolken verdeckt und die Temperatur liegt bei 5 <temp>. Gegen später gibt es ungestörten Sonnenschein bei Höchstwerten von 25 <temp>. Abends gibt es in <city> überwiegend blauen Himmel mit vereinzelten Wolken bei Temperaturen von 14 bis 20 <temp>. In der Nacht ist es bedeckt bei Tiefsttemperaturen von 12 <temp>. Die Wah

Epoch 1/2:   0%|          | 0/450 [00:00<?, ?it/s]

  with torch.autograd.detect_anomaly():



Epoch 1, Loss: 2.1498

Generated examples:
Sample 0: Wetter heute, <date> In <city> ist es am bewölkt und die Temperatur liegt bei 22 <temp>. Später scheint die Sonne bei blauem Himmel und die Temperaturen erreichen 3 <temp>. Abends gibt es in <city> Wolken und die Temperaturen liegen zwischen 24 und 27 <temp>. Nachts verdecken einzelne Wolken den Himmel bei Tief
Sample 14405: Wetter heute, <date> In <city> überwiegt am Morgen dichte Bewölkung aber es bleibt trocken und die Temperaturen liegen zwischen 3 und 18 <temp>. Nachts ist es wolkig und teils heiter bei Temperaturen von 23 bis zu 14 <temp>. Nachts stören nur nur einzelne Wolken den sonst klaren Himmel bei
Sample 28810: Wetter heute, <date> In <city> ist es morgens vielfach wolkig bei Temperaturen von 23 <temp>. Im weiteren Tagesverlauf stören nur einzelne Wolken den sonst blauen Himmel und die Temperaturen erreichen 32 <temp>. <city> liegt in Region der Region Huia. Dort finden Sie eine Wettervorhersage für die gesamte Region. 

Epoch 2/2:   0%|          | 0/450 [00:00<?, ?it/s]

### First Iteration
    embedding_dim=64,
    hidden_dim=128,

ca 9,806,800 parameter laut Claude

Results:
