## Globale Temperaturänderungen und Gefahren

### Test
- Temperatur- und Geo-Daten einlesen zusammenführen
- Geobasierte Visualisierung von Temperaturdaten

---

In [14]:
import numpy as np
import pandas as pd
from lat_lon_parser import parse

import plotly.figure_factory as ff
import plotly.graph_objects as go
import plotly.express as px
from plotly.colors import sample_colorscale
import dash
from dash import Dash, dcc, html, Input, Output, callback, callback_context
from sklearn.preprocessing import MinMaxScaler

import h3
import re

import ipywidgets as widgets
from IPython.display import display, clear_output
from ipywidgets import HBox, Label, IntSlider

import dash
from dash import Dash, dcc, html, Input, Output


pd.set_option('display.max_colwidth', None)  # Spaltenbreitenberschränkung aufheben
# from pyspark.sql.functions import to_date

# Temperaturdaten einlesen

## 1. Einlesen

Zuvor gespeicherte CSVs werden in Dataframes geladen:
- clim_change_celsius_df.csv `celsius_df`
- clim_change_geo_df.csv `geo_df`

DataFrames werden Zusammengeführt mit "pd.merge"
- Gemeinsamer Schlüssel besteht aus `City` und `plus_code`


In [15]:
# Einlesen des Temperatur-DataFrames --> ca. 17 Sekunden
celsius_df = pd.read_csv('clim_change_celsius_df.csv', parse_dates=['dt'],
                 usecols=['dt', 'AverageTemperature', 'City', 'plus_code'],
                 dtype={'AverageTemperature': 'float32',
                        'City': 'string',
                        'plus_code': 'string'
                       }
                )

celsius_df.info(show_counts=True, memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8235082 entries, 0 to 8235081
Data columns (total 4 columns):
 #   Column              Non-Null Count    Dtype         
---  ------              --------------    -----         
 0   dt                  8235082 non-null  datetime64[ns]
 1   AverageTemperature  8235082 non-null  float32       
 2   City                8235082 non-null  string        
 3   plus_code           8235082 non-null  string        
dtypes: datetime64[ns](1), float32(1), string(2)
memory usage: 1004.6 MB


In [16]:
# Einlesen des Geo-Dataframes
geo_df = pd.read_csv('clim_change_geo_df.csv',
                     usecols=['City', 'Country', 'Latitude', 'Longitude', 'plus_code'],
                     dtype={'City': 'string',
                            'Country': 'string',
                            'Latitude': 'float32',
                            'Longitude': 'float32',
                            'plus_code': 'string'
                           }
                    )
geo_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3510 entries, 0 to 3509
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   City       3510 non-null   string 
 1   Country    3510 non-null   string 
 2   Latitude   3510 non-null   float32
 3   Longitude  3510 non-null   float32
 4   plus_code  3510 non-null   string 
dtypes: float32(2), string(3)
memory usage: 109.8 KB


In [17]:
print('[-- celsius_df --]')
print(celsius_df[['City', 'plus_code', 'dt', 'AverageTemperature']].sample(10).to_string(index=False))
print('')
print('[-- geo_df --]')
print(geo_df[['City', 'plus_code', 'Latitude', 'Longitude', 'Country']].sample(10).to_string(index=False))

[-- celsius_df --]
        City plus_code         dt  AverageTemperature
         Iwo 6FV663J2+ 1999-07-01           24.971001
      Yungho 7QP4W9C6+ 1851-10-01           22.667999
Tiruvottiyur 7M52M35Q+ 1834-02-01           25.552000
        Baku 8HGCXXRR+ 1951-06-01           20.290001
     Abidjan 6CQRJQJ9+ 1906-01-01           26.862000
  Bratislava 8FVVCQ96+ 1782-04-01            9.530000
   Bhagalpur 7MP8WWC2+ 1958-09-01           29.148001
       Herne 9F496VRJ+ 1863-04-01            8.580000
       Betim 58FQWJ5Q+ 1842-09-01           18.620001
      Benoni 5G5CFMH5+ 1947-12-01           18.480000

[-- geo_df --]
             City plus_code  Latitude  Longitude        Country
          Saratov 9H4967RX+ 52.240002  47.299999         Russia
       Cheltenham 9C4V69R9+ 52.240002  -2.630000 United Kingdom
Lexington Fayette 869PQHHJ+ 37.779999 -85.419998  United States
            Yanan 8P8F599Q+ 36.169998 109.389999          China
        Remscheid 9F28J8JR+ 50.630001   6.340000   

In [18]:
# Mergen beider Frames --> ca. 5 Sekunden
celsius_geo_df = pd.merge(
    celsius_df,
    geo_df,
    on=['City', 'plus_code'],
    how='inner',
    validate='m:1'  # Prüft, dass geo_df eindeutige Schlüssel hat
)

# plus_code Spalte löschen
celsius_geo_df = celsius_geo_df.drop(columns=['plus_code'])

# Ausgabe
celsius_geo_df.info(show_counts=True, memory_usage='deep')  # ca. 6 Sekunden
print('')
print(celsius_geo_df.tail(5).to_string(index=False))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8235082 entries, 0 to 8235081
Data columns (total 6 columns):
 #   Column              Non-Null Count    Dtype         
---  ------              --------------    -----         
 0   dt                  8235082 non-null  datetime64[ns]
 1   AverageTemperature  8235082 non-null  float32       
 2   City                8235082 non-null  string        
 3   Country             8235082 non-null  string        
 4   Latitude            8235082 non-null  float32       
 5   Longitude           8235082 non-null  float32       
dtypes: datetime64[ns](1), float32(3), string(2)
memory usage: 1.0 GB

        dt  AverageTemperature   City     Country  Latitude  Longitude
2013-04-01               7.710 Zwolle Netherlands 52.240002       5.26
2013-05-01              11.464 Zwolle Netherlands 52.240002       5.26
2013-06-01              15.043 Zwolle Netherlands 52.240002       5.26
2013-07-01              18.775 Zwolle Netherlands 52.240002       5.2

In [19]:
# Durchschnittstemperaturen mit Koordinaten nach Jahren
celsius_geo_df['year'] = celsius_geo_df['dt'].dt.year
celsius_geo_year_df = (
    celsius_geo_df
    .groupby(['year', 'Latitude', 'Longitude'], as_index=False)
    .agg({'AverageTemperature': 'mean'})
)

# Ausgabe
celsius_geo_year_df.info()
print('')
print(celsius_geo_year_df)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 267985 entries, 0 to 267984
Data columns (total 4 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   year                267985 non-null  int32  
 1   Latitude            267985 non-null  float32
 2   Longitude           267985 non-null  float32
 3   AverageTemperature  267985 non-null  float32
dtypes: float32(3), int32(1)
memory usage: 4.1 MB

        year   Latitude  Longitude  AverageTemperature
0       1743  28.129999 -82.730003           18.664000
1       1743  28.129999 -80.910004           18.722000
2       1743  29.740000 -84.919998           15.379000
3       1743  29.740000 -83.080002           15.164000
4       1743  29.740000 -81.230003           17.549999
...      ...        ...        ...                 ...
267980  2013  65.089996 -21.059999            3.478875
267981  2013  65.089996  24.889999            3.813875
267982  2013  65.089996  40.209999          

In [20]:
# Temperaturdifferenz (max-min) nach Jahren
celsius_geo_agg_df = (
    celsius_geo_df
    .groupby(['year', 'Latitude', 'Longitude'])['AverageTemperature']
    .agg(['min', 'max'])
    .reset_index()
)
celsius_geo_agg_df['temp_diff'] = celsius_geo_agg_df['max'] - celsius_geo_agg_df['min']

# Ausgabe
print(celsius_geo_agg_df.tail(10).to_string(index=False))

 year  Latitude  Longitude        min       max  temp_diff
 2013 61.880001  35.660000 -11.776000 15.607000  27.382999
 2013 61.880001  49.250000 -15.788000 18.677999  34.466000
 2013 61.880001  73.019997 -24.969000 20.188999  45.157997
 2013 63.490002   9.000000  -1.896000 12.922000  14.818000
 2013 63.490002  52.200001 -18.048000 17.877001  35.925003
 2013 65.089996 -21.059999  -2.502000 10.279000  12.781000
 2013 65.089996  24.889999 -10.022000 14.953000  24.975000
 2013 65.089996  40.209999 -13.235000 15.445000  28.680000
 2013 68.309998  34.700001 -13.205000 14.244000  27.449001
 2013 69.919998  88.830002 -31.518999 15.733000  47.251999


## 2. Weltkarte Übersicht
Eine Übersicht vorhandener Temperaturdaten wird auf einer Weltkarte dargestellt
- Es werden Mittelwerte in Hexagonen abgebildet

In [21]:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = Dash(external_stylesheets=external_stylesheets)

# Layout definieren
app.layout = html.Div([
    html.H5("Globale Temperaturverteilung pro Jahr (Ø °C)"),
    
    # Datensatz-Auswahl
    html.Div([
        dcc.RadioItems(
            id='dataset-selector',
            options=[
                {'label': 'Temperatur absolut', 'value': 'temp'},
                {'label': 'Temperaturschwankungen (Δ min/max)', 'value': 'diff'}
            ],
            value='temp'
        )
    ]),
    
    html.Div([

        # Jahr-Label
        html.Div([
            html.Label("Jahr:")
        ], style={'padding': '20px 0px 20px 10px',
                  'margin': '10px 0px 10px 10px'}),
    
        # Jahr Eingabefeld
        html.Div([
            dcc.Input(
                id='year-input',
                type='number',
                min=min(celsius_geo_year_df['year'].unique()),
                max=max(celsius_geo_year_df['year'].unique()),
                value=max(celsius_geo_year_df['year'].unique()),
                style={'width': '5%'}
            )
        ], style={'flex': '1',
                  'padding': '10px 0px 10px 0px',
                  'margin': '10px 0px 10px 0px'}),

    ], style={'display': 'flex', 'gap': '10px'}),

    html.Div([
    
        # Jahr-Slider
        html.Div([
            dcc.Slider(
                id='year-slider',
                min=min(celsius_geo_year_df['year'].unique()),
                max=max(celsius_geo_year_df['year'].unique()),
                value=max(celsius_geo_year_df['year'].unique()),
                marks={str(year): str(year) for year in 
                       sorted(celsius_geo_year_df['year'].unique())[::10]},
                step=1
            ),
        ], style={'width': '1100px', 'display': 'inline-block'}),
        
        # Graph-Container (Karte)
        dcc.Graph(
            id='map-graph',
            config={'scrollZoom': True}
        )
    ])
])

# Callback für Synchronisation
@app.callback(
    [Output('year-slider', 'value'),
     Output('year-input', 'value')],
    [Input('year-slider', 'value'),
     Input('year-input', 'value')]
)
def sync_input_and_slider(slider_val, input_val):
    ctx = callback_context
    if not ctx.triggered:
        raise dash.exceptions.PreventUpdate
    
    trigger_id = ctx.triggered[0]['prop_id'].split('.')[0]
    
    if trigger_id == 'year-slider':
        return slider_val, slider_val
    elif trigger_id == 'year-input':
        return input_val, input_val
    else:
        raise dash.exceptions.PreventUpdate

# Callback für Daten-Aktualisierung und Karte
@callback(
    Output('map-graph', 'figure'),
    [Input('dataset-selector', 'value'),
     Input('year-slider', 'value')]
)
def update_map(dataset, year):
    # Datensatz auswählen
    if dataset == 'temp':
        df = celsius_geo_year_df
        color_column = 'AverageTemperature'
    else:
        df = celsius_geo_agg_df
        color_column = 'c_diff'
    
    # Nach Jahr filtern
    filtered_df = df[df['year'] == year]
    
    # Statistik erstellen
    stats = filtered_df[[color_column, 'Latitude', 'Longitude']].describe().reset_index().to_dict('records')
    stats_table = html.Table([
        html.Thead(html.Tr([html.Th(col) for col in ['Statistik', color_column, 'Latitude', 'Longitude']])),
        html.Tbody([
            html.Tr([html.Td(row['index'])] + [html.Td(f"{row[col]:.2f}") for col in [color_column, 'Latitude', 'Longitude']])
            for row in stats
        ])
    ])
    
    # Hexbin-Map erstellen
    fig = ff.create_hexbin_mapbox(
        data_frame=filtered_df,
        lat="Latitude",
        lon="Longitude",
        nx_hexagon=80,
        opacity=0.4,
        # line=dict(width=0),
        color=color_column,
        agg_func=np.mean,
        color_continuous_scale="Spectral_r",
        range_color=[0,23],
        labels={'color': '°C'}
    )
    
    # Layout anpassen
    fig.update_layout(
        mapbox=dict(
            style="open-street-map",
            center=dict(lat=22, lon=9),
            zoom=0.9
        ),
        width=1100,
        height=550,
        margin={'r':0,'t':0,'l':0,'b':0},
        dragmode='pan',
        clickmode='event+select'
    )

    # Traces anpassen
    fig.update_traces(
        marker={'line': {'width': 1, 'color': 'rgba(255,255,255,1)'}},
    )
    
    return fig

if __name__ == '__main__':
    app.run(debug=True, jupyter_height=800, port=8066)
