In [None]:
import pandas as pd

def enlarge_dataset(file_path, duplication_factor):
    """
    Enlarges the dataset by duplicating and adjusting the timestamps.
    
    Parameters:
    - file_path: Path to the original CSV dataset.
    - duplication_factor: How many times the dataset should be duplicated.
    """
    # Load and prepare the original dataset
    data = pd.read_csv(file_path)
    data['timestamp'] = pd.to_datetime(data['timestamp'])
    
    # Calculate the offset to apply for each duplication
    offset_duration = data['timestamp'].iloc[-1] - data['timestamp'].iloc[0] + pd.Timedelta(minutes=5)
    
    # Create duplicates with adjusted timestamps
    datasets = [
        data.assign(timestamp=data['timestamp'] + offset_duration * i)
        for i in range(duplication_factor)
    ]
    
    # Combine all datasets into one and reset the index
    enlarged_dataset = pd.concat(datasets).reset_index(drop=True)
    
    # Save the enlarged dataset
    enlarged_dataset_file_path = f'enlarged_{file_path}'
    enlarged_dataset.to_csv(enlarged_dataset_file_path, index=False)
    
    return enlarged_dataset_file_path

# Example usage
file_path = 'ec2_cpu_utilization_825cc2.csv'  # Update this path to your actual file location
duplication_factor = 3  # Specify how many times you want to duplicate the dataset
enlarged_dataset_file_path = enlarge_dataset(file_path, duplication_factor)
print(f'Enlarged dataset saved to: {enlarged_dataset_file_path}')