## Get unique coords

In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import os

In [None]:
file_list: list = []
for file in os.listdir("_dataset/TRAIN/"):
    file_list.append("_dataset/TRAIN/" + file)
for file in os.listdir("_dataset/TEST_shuffled/"):
    file_list.append("_dataset/TEST_shuffled/" + file)

print(file_list)

[]


In [3]:
df = pd.read_parquet(path = "unique_coords.parquet")
print(df)

       längengrad  breitengrad
0            72.0       -25.00
1            72.0       -24.75
2            72.0       -24.50
3            72.0       -24.25
4            72.0       -24.00
...           ...          ...
42988        34.0        44.00
42989        34.0        44.25
42990        34.0        44.50
42991        34.0        44.75
42992        34.0        45.00

[42993 rows x 2 columns]


## Plot to map

In [4]:
fig: object = px.scatter_geo(data_frame = df,
                             lat = "längengrad",
                             lon = "breitengrad",
                             color_discrete_sequence = ["rgba(255, 0, 0, 0.2)"])

fig.update_geos(fitbounds = "locations",
                projection_type = "natural earth",
                showcountries = True,
                lataxis_showgrid = True,
                lonaxis_showgrid = True,
                lonaxis_gridcolor = "rgba(0, 0, 0, 0.15)",
                lataxis_gridcolor = "rgba(0, 0, 0, 0.15)")

fig.update_layout(margin = dict(l = 0, r = 0, t = 0, b = 0))

fig.show()
fig.show(renderer = "browser")

## Preparing data to be plotted by only looking for the min, max and avg data of a certain stat of hours per day/ days per month/ months per year/ years

In [None]:
def min_max_mean(data_1: str, data_2: str, lat: int, long: int) -> pd.core.frame.DataFrame:
    data_frames: list = []
    for file in file_list:
        print(f"working on file: {file}")
        df = pd.read_parquet(path = file, columns = ["latitude", "longitude", data_2, data_1])
        filtered = df.loc[(df["latitude"] == lat) & (df["longitude"] == long)]
        data_frames.append(filtered)
    
    full_df = pd.concat(data_frames, ignore_index = True)
    return full_df.groupby(["latitude", "longitude", data_2])[data_1].agg(["min", "max", "mean"]).reset_index()

## Plotting all the prepared data

In [6]:
data_y: str = "lufttemperatur" # lufttemperatur, niederschlag, wassertemperatur, luftdruck, luftfeuchtigkeit, schneetiefe, windgeschwindigkeit, (windrichtung)
data_x: str = "month" # year, month, day, hour
lat: int = 70 # 34 bis 72; 0,25er Schritte
long: int = 27 # -28 bis 48; 0,25er Schritte

df = min_max_mean(data_1 = data_y, data_2 = data_x, lat = lat, long = long)
fig = go.Figure()

fig.add_trace(go.Scatter(x = list(df[data_x]),
                         y = list(df["min"]),
                         name = "Min",
                         line = dict(color = "Blue")))
fig.add_trace(go.Scatter(x = list(df[data_x]),
                         y = list(df["max"]),
                         name = "Max",
                         line = dict(color="Red")))
fig.add_trace(go.Scatter(x = list(df[data_x]),
                         y = list(df["mean"]),
                         name = "Mean",
                         line = dict(color="Green")))

fig.show()

NameError: name 'file_list' is not defined

## Heightmap

In [3]:
altitude_df = pd.read_parquet("unique_coords_with_altitude.parquet")

# Pivot to 2D grid
z = altitude_df.pivot(
    index="breitengrad",
    columns="längengrad",
    values="höhe"
)

x = z.columns.values
y = z.index.values

# Create surface plot
fig = go.Figure(
    data=[go.Surface(x = x, y = y, z = z.values)]
)

fig.update_layout(
    title="Höhenkarte",
    autosize=True,
    scene=dict(
        xaxis_title="Längengrad",
        yaxis_title="Breitengrad",
        zaxis_title="Höhe"
    )
)

fig.show(renderer="browser")
fig.show()