In [1]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np
from sklearn.linear_model import LinearRegression
from plotly.subplots import make_subplots
import plotly.express as px
from scipy.stats import pearsonr


In [2]:
def sk_linreg(df, coef=False):
    df.dropna(inplace=True)
    X = np.array(df['Year']).reshape(-1, 1)
    reg = LinearRegression().fit(X, df.iloc[:,-1])
    df['Linear Regression Line'] = reg.predict(X.reshape(-1, 1))
    return df, reg.coef_ if coef else df

def plot(df_):
    data_name = df_.columns[-1]
    fig = go.Figure()
    for region in np.unique(df_['Region']):
        df = df_[df_['Region']==region]
        fig.add_trace(go.Scatter(x=df['Year'], y=df[data_name], name=region))

    fig.update_layout(title=data_name + ' pro Jahr',
                   xaxis_title='Jahr',
                   yaxis_title=data_name)
    return fig

def plot_linreg(df, data_name='Neuschnee'):
    df.dropna(inplace=True)
    df, coef = sk_linreg(df, True)
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df['Year'], y=df[data_name], name=data_name))
    fig.add_trace(go.Scatter(x=df['Year'], y=df['Linear Regression Line'], name='Linear Regression'))
    fig.update_layout(title='{} pro Jahr. Veränderung von {:.2} pro Jahr'.format(data_name, coef[0]),
    xaxis_title='Jahr',
    yaxis_title=data_name)
    return fig

def plot_linreg_subplots(df, data_name='Neuschnee'):
    coefs = []
    for region in np.unique(df['Region']):
        df_ = df[df['Region']==region].copy()
        df_, coef = sk_linreg(df_, True)
        coefs.append(coef[0])
    
    subplots_titles = []
    for coef, region in zip(coefs, np.unique(df['Region'])):
        subplots_titles.append('{} in {}. Veränderung von {:.2} pro Jahr.'.format(data_name, region, coef))
    
    nr_region = len(np.unique(df['Region']))
    fig = make_subplots(rows=nr_region, cols=1, subplot_titles=subplots_titles)
    
    for i, region in enumerate(np.unique(df['Region']), 1):

        df_ = df[df['Region']==region].copy()
        df_, coef = sk_linreg(df_, True)
        fig.append_trace(go.Scatter(x=df_['Year'], y=df_[data_name],
                                    name=data_name),
                         row = i, col = 1)
        fig.append_trace(go.Scatter(x=df_['Year'], y=df_['Linear Regression Line'], name=np.round(coef[0],2)),
                         row = i, col = 1)
    
    
    fig.update_layout(height=3000, width=600, title_text=data_name+ " und Lineare Regression.",
                      showlegend=False, xaxis_title='Jahr', yaxis_title=data_name)
    return fig

def calculate_corr(df1, df2):
    df1.dropna(inplace=True)
    df2.dropna(inplace=True)
    corrs = []
    regions = []

    for region in np.unique(df1['Region']):
        df1_ = df1[df1['Region']==region].copy()
        df2_ = df2[df2['Region']==region].copy()
        first_year = max(df1_['Year'].min(), df2_['Year'].min())
        df1_ = df1_[df1_['Year']>=first_year].copy()
        df2_ = df2_[df2_['Year']>=first_year].copy()
        df1_.sort_values(by='Year', inplace=True)
        df2_.sort_values(by='Year', inplace=True)
        try:
            corr, _ = pearsonr(df1_.iloc[:,-1], df2_.iloc[:,-1])
            corrs.append(corr)
            regions.append(region)
        except Exception as E:
            print(E, "with ", region)

    corr_df = pd.DataFrame(list(zip(regions,corrs)), columns=['Region', 'Correlation'])
    return corr_df

# EDA Klimadaten Schweiz
## Neuschnee

In [3]:
df_neuschnee = pd.read_csv('Neuschnee.csv')
fig = plot(df_neuschnee)
fig.show()

FileNotFoundError: [Errno 2] File Neuschnee.csv does not exist: 'Neuschnee.csv'

### Jahresdurchschnitt Neuschnee für alle Regionen

In [None]:
df_ns_mean = df_neuschnee.groupby('Year', as_index=False).mean()
plot_linreg(df_ns_mean).show()

### Jahresdurchschnitt Neuschnee für alle Regionen ab 1965

In [None]:
df_ns_mean_1965 = df_ns_mean[df_ns_mean['Year']>=1965].copy()
plot_linreg(df_ns_mean_1965).show()

### Neuschnee pro Region mit Lineare Regression

In [None]:
fig = plot_linreg_subplots(df_neuschnee)
fig.show()

## Jahresniederschlag

In [None]:
df_jahresniederschlag = pd.read_csv('Jahresniederschlag.csv')
fig = plot(df_jahresniederschlag)
fig.show()

## Jahresdurchschnitt Niederschlag für alle Regionen

In [None]:
df_regen_mean = df_jahresniederschlag.groupby('Year', as_index=False).mean()
plot_linreg(df_regen_mean, 'Jahresniederschlag').show()

## Niederschlag pro Region mit Lineare Regression

In [None]:
fig = plot_linreg_subplots(df_jahresniederschlag, 'Jahresniederschlag')
fig.show()

## Temperatur

In [None]:
df_jahrestemperatur = pd.read_csv('Jahrestemperatur.csv')
fig = plot(df_jahrestemperatur)
fig.show()

## Temperaturjahresdurchschnitt für alle Regionen

In [None]:
df_temp_mean = df_jahrestemperatur.groupby('Year', as_index=False).mean()
plot_linreg(df_temp_mean, 'Jahrestemperatur').show()

## Temperatur pro Region mit Lineare Regression

In [None]:
fig = plot_linreg_subplots(df_jahrestemperatur, 'Jahrestemperatur')
fig.show()

## Sonnenscheindauer

In [None]:
df_sonnenschein = pd.read_csv('Sonnenscheindauer.csv')
fig = plot(df_sonnenschein)
fig.show()

## Temperaturjahresdurchschnitt für alle Regionen

In [None]:
df_sonne_mean = df_sonnenschein.groupby('Year', as_index=False).mean()
plot_linreg(df_sonne_mean, 'Sonnenscheindauer').show()

## Temperatur pro Region mit Lineare Regression

In [None]:
fig = plot_linreg_subplots(df_sonnenschein, 'Sonnenscheindauer')
fig.show()

### Korrelation
## Korrelation Regen und Neuschnee

In [None]:
df_jahresniederschlag = pd.read_csv('Jahresniederschlag.csv')
df_neuschnee = pd.read_csv('Neuschnee.csv')
corr_regen_schnee = calculate_corr(df_jahresniederschlag, df_neuschnee)
fig = px.bar(corr_regen_schnee, x='Region', y='Correlation', title='Korrelation Regen und Neuschnee')
fig.show()

## Korrelation Neuschnee und Temperatur

In [None]:
df_jahrestemperatur = pd.read_csv('Jahrestemperatur.csv')
df_neuschnee = pd.read_csv('Neuschnee.csv')
corr_temp_schnee = calculate_corr(df_jahrestemperatur, df_neuschnee)
fig = px.bar(corr_temp_schnee, x='Region', y='Correlation', title='Korrelation Temperatur und Neuschnee')
fig.show()
