# Calculating time to render datasets
## Using Matplotlib to plot large datasets

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import time

# Generate large dataset for simulation
data_size = np.linspace(10000, 500000, 5, dtype=int)  # different data sizes for tiling test
# make the data normal distribution
datasets = [pd.DataFrame({'x': np.random.normal(size=size), 'y': np.random.normal(size=size)}) for size in data_size]

# List to store rendering times
elapsed_times = []

# Interactively render the datasets with Plotly
for idx, df in enumerate(datasets):
    start_time = time.time()
    
    # Create interactive scatterplot
    fig = px.scatter(df, x='x', y='y', title=f"Interactive Scatterplot: {data_size[idx]} Points",
                     labels={'x': 'X-axis', 'y': 'Y-axis'})
    
    # Display the figure (comment this out if running in a non-interactive environment)
    fig.show()
    
    # Measure elapsed time
    elapsed_time = time.time() - start_time
    elapsed_times.append(elapsed_time)
    print(f"Plotly Test {idx + 1}: Time to render {data_size[idx]} points: {elapsed_time:.4f} seconds")

# Plot the results using Plotly
render_times_df = pd.DataFrame({'Data Size': data_size, 'Render Time (s)': elapsed_times})

fig_time = px.line(render_times_df, x='Data Size', y='Render Time (s)', 
                   title='Render Times for Interactive Scatterplots',
                   labels={'Data Size': 'Number of Points', 'Render Time (s)': 'Render Time (seconds)'})
fig_time.show()



In [None]:
fig_time = px.line(render_times_df, x='Data Size', y='Render Time (s)', 
                   title='Render Times for Interactive Scatterplots',
                   labels={'Data Size': 'Number of Points', 'Render Time (s)': 'Render Time (seconds)'})
fig_time.show()

# Level of Detail (LOD) rendering

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time

# Generate dataset for LOD experiment
data_sizes = np.linspace(10000, 1000000, 10, dtype=int)
datasets = [pd.DataFrame({'x': np.random.normal(size=size), 'y': np.random.normal(size=size)}) for size in data_sizes]

# Function for LOD rendering
def render_lod(data, zoom_level):
    if zoom_level == 'low':
        data = data.sample(frac=0.1)  # 10% of data
    elif zoom_level == 'medium':
        data = data.sample(frac=0.5)  # 50% of data
    # "high" stays unchanged
    plt.scatter(data['x'], data['y'], s=1, alpha=0.5)
    plt.title(f"LOD: Zoom-Level {zoom_level}")
    plt.xlabel("x")
    plt.ylabel("y")
    plt.show()

# Measure rendering times for different LOD levels
lod_levels = ['low', 'medium', 'high']
render_times = {level: [] for level in lod_levels}

for zoom_level in lod_levels:
    for size, dataset in zip(data_sizes, datasets):
        start_time = time.time()
        render_lod(dataset, zoom_level)
        render_time = time.time() - start_time
        render_times[zoom_level].append(render_time)

# Plot the rendering times
for zoom_level in lod_levels:
    plt.plot(data_sizes, render_times[zoom_level], label=f'LOD: {zoom_level.capitalize()}')

plt.title("Rendering Times for Different LOD Levels")
plt.xlabel("Data Size")
plt.ylabel("Rendering Time (s)")
plt.legend()
plt.grid(True)
plt.show()

# Visualisierungs Typen

In [None]:
# Rewriting the code to include heatmap functionality from scratch after reset

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time

# Generate a dataset for the experiment
data_sizes = np.linspace(10000, 500000, 5, dtype=int)  # Varying data sizes
datasets = [pd.DataFrame({'x': np.random.normal(size=size), 
                          'y': np.random.normal(size=size)}) for size in data_sizes]

# Visualization types to test, including heatmap
visualization_types = ['scatter', 'line', 'bar', 'heatmap']

# Function to render different visualization types, extended with Heatmap
def render_visualization(data, vis_type):
    if vis_type == 'scatter':
        plt.scatter(data['x'], data['y'], s=1, alpha=0.5)
    elif vis_type == 'line':
        plt.plot(data['x'], data['y'], linewidth=0.5, alpha=0.5)
    elif vis_type == 'bar':
        bar_data = data[:min(len(data), 50)]  # Barplot is limited to smaller subsets
        plt.bar(bar_data.index, bar_data['y'], alpha=0.5)
    elif vis_type == 'heatmap':
        heatmap_data = np.histogram2d(data['x'], data['y'], bins=50)[0]
        plt.imshow(heatmap_data, cmap='viridis', origin='lower', aspect='auto')
        plt.colorbar(label='Density')
    plt.title(f"{vis_type.capitalize()} Plot")
    plt.xlabel("x")
    plt.ylabel("y")
    plt.show()

# Measure rendering times including Heatmap
render_times = {vis_type: [] for vis_type in visualization_types}

for vis_type in visualization_types:
    for size, dataset in zip(data_sizes, datasets):
        start_time = time.time()
        render_visualization(dataset, vis_type)
        render_time = time.time() - start_time
        render_times[vis_type].append(render_time)

# Plot the rendering times including Heatmap
for vis_type in visualization_types:
    plt.plot(data_sizes, render_times[vis_type], label=f'{vis_type.capitalize()}')

plt.title("Rendering Times for Different Visualization Types")
plt.xlabel("Data Size")
plt.ylabel("Rendering Time (s)")
plt.legend()
plt.grid(True)
plt.show()


## Using Plotly to plot large datasets interactively

In [None]:
import plotly.express as px

elapsed_times_plotly = []

# Plot using Plotly
for df in datasets:
    start_time = time.time()
    fig = px.scatter(df, x='x', y='y', width=800, height=800)
    fig.show()
    elapsed_time = time.time() - start_time
    elapsed_times_plotly.append(elapsed_time)
    print(f"Plotly Test: Time to render {len(df)} points: {elapsed_time:.4f} seconds")



In [None]:
# Plot line with datapoints
plt.plot(elapsed_times_plotly, data_size, marker='o')
plt.title("Time to Render Data vs. Data Size (Plotly)")
plt.xlabel("Time to Render Data (s)")
plt.ylabel("Data Size")
plt.show()

## Experimente von ChatGPT

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time

# Datensätze mit unterschiedlichen Größen generieren
data_sizes = np.linspace(10000, 1000000, 10, dtype=int)
datasets = [pd.DataFrame({'x': np.random.normal(size=size), 'y': np.random.normal(size=size)}) for size in data_sizes]

render_times = []

# Rendering-Zeiten mit Matplotlib messen
for size, dataset in zip(data_sizes, datasets):
    start_time = time.time()
    plt.scatter(dataset['x'], dataset['y'], s=1, alpha=0.5)
    plt.title(f"Datensatzgröße: {size}")
    plt.xlabel("x")
    plt.ylabel("y")
    plt.show()
    render_time = time.time() - start_time
    render_times.append(render_time)
    print(f"Rendering-Zeit für {size} Punkte: {render_time:.4f} Sekunden")


In [None]:
# Tile-Funktion: Nur sichtbare Tiles werden gerendert
def render_tile(data, x_range, y_range):
    filtered_data = data[(data['x'] >= x_range[0]) & (data['x'] <= x_range[1]) &
                         (data['y'] >= y_range[0]) & (data['y'] <= y_range[1])]
    plt.scatter(filtered_data['x'], filtered_data['y'], s=1, alpha=0.5)
    plt.title("Tiling-Ansatz")
    plt.xlabel("x")
    plt.ylabel("y")
    plt.show()

# Simulieren eines Zooms durch Tiling
x_range = (-2, 2)  # Sichtbarer Bereich
y_range = (-2, 2)
for size, dataset in zip(data_sizes, datasets):
    start_time = time.time()
    render_tile(dataset, x_range, y_range)
    render_time = time.time() - start_time
    print(f"Tiling: Rendering-Zeit für {size} Punkte: {render_time:.4f} Sekunden")


In [None]:
# Funktion für LOD: Datenpunkte reduzieren, je nach Zoom-Level
def render_lod(data, zoom_level):
    if zoom_level == 'low':
        data = data.sample(frac=0.1)  # 10% der Daten
    elif zoom_level == 'medium':
        data = data.sample(frac=0.5)  # 50% der Daten
    # "high" bleibt unverändert
    plt.scatter(data['x'], data['y'], s=1, alpha=0.5)
    plt.title(f"LOD: Zoom-Level {zoom_level}")
    plt.xlabel("x")
    plt.ylabel("y")
    plt.show()

# LOD mit verschiedenen Zoom-Leveln testen
for zoom_level in ['low', 'medium', 'high']:
    for size, dataset in zip(data_sizes, datasets):
        start_time = time.time()
        render_lod(dataset, zoom_level)
        render_time = time.time() - start_time
        print(f"LOD ({zoom_level}): Rendering-Zeit für {size} Punkte: {render_time:.4f} Sekunden")


In [None]:
import plotly.express as px

render_times_plotly = []

for size, dataset in zip(data_sizes, datasets):
    start_time = time.time()
    fig = px.scatter(dataset, x='x', y='y', title=f"Datensatzgröße: {size}", width=800, height=800)
    fig.show()
    render_time = time.time() - start_time
    render_times_plotly.append(render_time)
    print(f"Plotly: Rendering-Zeit für {size} Punkte: {render_time:.4f} Sekunden")
