# Imports

In [1]:
# IMPORTS

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import statsmodels.api as sm
import math 
import pylab 

from matplotlib.dates import DateFormatter
from scipy.optimize import curve_fit
from sklearn.metrics import mean_absolute_error
from glob import glob
from os import path
from collections import defaultdict
from virocon import (
    GlobalHierarchicalModel,
    ExponentiatedWeibullDistribution,
    WeibullDistribution,
    DependenceFunction,
    WidthOfIntervalSlicer,
    plot_marginal_quantiles,
    plot_dependence_functions,
    _fitting
)

from virocon._fitting import (
    fit_function,
    fit_constrained_function,
    convert_bounds_for_curve_fit,
)

import sys
sys.path.insert(0, path.abspath(path.join(path.curdir, '../src')))

%matplotlib notebook

# Data

In [2]:
# Read data

data = pd.read_csv('/Volumes/MASTERTHESE/MasterThese/data_unsorted.csv')
data= data.drop(columns=['Unnamed: 0'])
data= data.rename(columns={"peakDeflection": "peakDeflectionSBIT"})

In [3]:
#print(len(data))
#print(data)

In [4]:
#plt.figure(figsize=(10,7))
#plt.plot(data.windSpeed, label='wind speed')
#plt.plot(data.waveHeight, label='wave height')
#plt.plot(data.waveTp, label='wave peak period')
#plt.plot(data.waveTz, label='Wave zero up-crossing period')
#plt.legend()

In [5]:
plt.figure()
plt.plot(data.peakDeflectionSBIT, label='SBIT')
plt.plot(data.peakDeflectionTower, label='Tower')
plt.xlabel('Time series data points')
plt.ylabel('Deflection (cm)')
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x7ffd0a4799d0>

In [6]:
data = data.drop(data[(data.peakDeflectionTower- data.peakDeflectionSBIT) < 5].index)
data = data[data['windSpeed'].notna()]
data= data.reset_index(drop=True)
print(data)

     windSpeed  waveHeight  waveTp  waveTz  peakDeflectionSBIT  \
0     4.064444        1.39    7.14   5.263              2.7032   
1     3.959574        1.39    7.14   5.263              2.7870   
2     3.863043        1.39    7.14   5.263              3.6654   
3     3.744186        1.39    7.14   5.263              3.7484   
4     3.872340        1.39    7.14   5.263              4.8689   
..         ...         ...     ...     ...                 ...   
795   3.237931        0.54    4.35   3.361              2.8250   
796   3.827119        0.54    4.35   3.361              2.9406   
797   3.351724        0.54    4.35   3.361              1.3385   
798   3.532203        0.54    4.35   3.361              2.5891   
799   4.589655        0.54    4.35   3.361              2.8813   

     peakDeflectionTower  
0                15.6301  
1                17.3423  
2                14.7588  
3                22.1884  
4                15.9363  
..                   ...  
795              1

In [7]:
print(max(data.windSpeed))

11.192982456140356


In [8]:
plt.figure()
plt.plot(data.peakDeflectionSBIT, label='SBIT')
plt.plot(data.peakDeflectionTower, label='Tower')
plt.xlabel('Data points')
plt.ylabel('Deflection (cm)')
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x7ffd0f4b4fd0>

# SBIT RESPONSE EMULATOR

# Distributions

In [9]:
# set up the histogramm, 60 steps= bin size of 0,5 cm deflection
bins = np.linspace(0, 30, 60)
bin_centers = 0.5*(bins[1:] + bins[:-1])

# fit GEV and weibull distribution to data
gev_fit = stats.genextreme.fit(data.peakDeflectionSBIT)
gev_pdf= stats.genextreme.pdf(bin_centers, gev_fit[0], loc= gev_fit[1], scale= gev_fit[2])
gev_ppf= stats.genextreme.ppf(bin_centers, gev_fit[0], loc= gev_fit[1], scale= gev_fit[2])
gev_cdf= stats.genextreme.cdf(bin_centers, gev_fit[0], loc= gev_fit[1], scale= gev_fit[2])

#weib_fit = stats.weibull_min.fit(data.deflection)
#weib_cdf= stats.weibull_min.cdf(bin_centers, weib_fit[0], loc= weib_fit[1], scale= weib_fit[2])

#print(weib_fit)
print(gev_fit)

(-0.1280961498390108, 1.7931853339550328, 0.7194948859860504)


In [10]:
fig, ax1 = plt.subplots()

ax1.hist(data.peakDeflectionSBIT, bins=bins, label='Observations SBIT', color='blue', density= 'true', stacked='true', edgecolor='w')
ax1.plot(bin_centers, gev_pdf, label="PDF of GEV", color='r')
ax1.set_xlabel('Deflection (cm)')
ax1.set_ylabel('Probability density')

ax1.legend()

plt.show()

<IPython.core.display.Javascript object>

In [11]:
fig, ax1 = plt.subplots(1)
ax1.scatter(data.windSpeed, data.peakDeflectionSBIT, alpha= 0.5)
ax1.set_xlabel('1- min mean windspeed (m/s)')
ax1.set_ylabel('Independent peak deflection(cm)')

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Independent peak deflection(cm)')

In [12]:
#r = np.corrcoef(data.windSpeed, data.waveHeight)
#print(r)
data.corr()

Unnamed: 0,windSpeed,waveHeight,waveTp,waveTz,peakDeflectionSBIT,peakDeflectionTower
windSpeed,1.0,0.077811,-0.128155,-0.17824,0.147795,0.204713
waveHeight,0.077811,1.0,0.826796,0.874009,0.246625,0.096135
waveTp,-0.128155,0.826796,1.0,0.940959,0.1302,0.04269
waveTz,-0.17824,0.874009,0.940959,1.0,0.227835,0.030987
peakDeflectionSBIT,0.147795,0.246625,0.1302,0.227835,1.0,0.094591
peakDeflectionTower,0.204713,0.096135,0.04269,0.030987,0.094591,1.0


## GEV Fit

In [13]:
plt.figure()
stats.probplot(data.peakDeflectionSBIT, (gev_fit[0], gev_fit[1], gev_fit[2]), dist='genextreme', plot=pylab)
pylab.show()

<IPython.core.display.Javascript object>

In [14]:
#sm.qqplot(data.deflection, line ='45')
#plt.show()

In [15]:
#stats.kstest(data.deflection, gev_cdf, gev_fit)

In [16]:
#print(data.waveTz)

# Intervals

In [17]:
# Slice the data set of deflection into intervals of the wind speed.

width_slicer_V = WidthOfIntervalSlicer(width=1, reference="right", min_n_points=1)
wind_slices, wind_references, wind_boundaries = width_slicer_V.slice_(data.windSpeed)

deflection_intervals= []
for i in range (len(wind_references)):
    deflection_intervals.append([])


for i in range(len(wind_slices)):
    for j in range(len(wind_slices[i])):
        if (data.windSpeed[j] >= wind_boundaries[i][0]) & (data.windSpeed[j] < wind_boundaries[i][1]):
            deflection_intervals[i].append(data.peakDeflectionSBIT[j])


# Number of points per Interval
#for i in range(len(deflection_intervals)):
    #print(len(deflection_intervals[i]))

In [18]:
parameter_list= []
for i in range (len(gev_fit)):
    parameter_list.append([])

for i in range(len(deflection_intervals)):
    fit= stats.genextreme.fit(deflection_intervals[i])
    parameter_list[0].append(fit[0])
    parameter_list[1].append(fit[1])
    parameter_list[2].append(fit[2])

print(len(parameter_list[0]))

11


In [19]:
# shape parameter
mean= np.mean(parameter_list[0])
print(mean)

-0.11472415007115067


In [20]:
fig, (ax1, ax2, ax3) = plt.subplots(3, sharey=True, sharex= True, figsize=(7,7))
ax1.scatter(wind_references, parameter_list[0])
ax1.set_xlabel('Wind speed (m/s)')
ax1.set_ylabel('Shape parameter')
ax2.scatter(wind_references, parameter_list[1])
ax2.set_xlabel('Wind speed (m/s)')
ax2.set_ylabel('Location parameter')
ax3.scatter(wind_references, parameter_list[2])
ax3.set_xlabel('Wind speed (m/s)')
ax3.set_ylabel('Scale parameter')

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Scale parameter')

In [21]:
# WIND

r_shape_wind = np.corrcoef(parameter_list[0], wind_references)
r_location_wind = np.corrcoef(parameter_list[1], wind_references)
r_scale_wind = np.corrcoef(parameter_list[2], wind_references)
print(r_shape_wind)
print(r_location_wind)
print(r_scale_wind)

[[1.         0.59743541]
 [0.59743541 1.        ]]
[[1.         0.47638591]
 [0.47638591 1.        ]]
[[ 1.         -0.53373724]
 [-0.53373724  1.        ]]


# Dependence function of response

In [22]:
# Define dependence functions

def _power3(x, a, b, c):
    return a + b * x ** c
def _linear(x, a, b):
    return x * a + b
def _exp3(x, a, b, c):
    return a + b * np.exp(c * x)


bounds_3p = [(None, None), (None, None), (None, None)]
bounds_2p = [(None, None), (None, None)]
exp_bounds = [(0, None), (0, None), (None, None)]

power3 = DependenceFunction(_power3, bounds_3p, latex="$a + b * x^c$")
linear = DependenceFunction(_linear, latex="$a + b * x$")
exp3 = DependenceFunction(_exp3, bounds=exp_bounds)

#print(tuple(exp3.parameters.values()))
#print(tuple(linear.parameters.values()))
#print(tuple(power3.parameters.values()))

In [23]:
shape_param_fit= linear.fit(wind_references, parameter_list[0])
print(shape_param_fit)

#linear.fit(wind_references, parameter_list[0])
#print(linear)

None


In [24]:
x= np.asarray(wind_references)
y1= np.asarray(parameter_list[0]) #shape
y2= np.asarray(parameter_list[1]) #location
y3= np.asarray(parameter_list[2]) #scale

## linear fit

In [25]:
my_linear_param1 = fit_function(linear, x, y1, (1, 1), "lsq", None, None)
my_linear_param2 = fit_function(linear, x, y2, (1, 1), "lsq", None, None)
my_linear_param3 = fit_function(linear, x, y3, (1, 1), "lsq", None, None)
print(my_linear_param1)
print(my_linear_param2)
print(my_linear_param3)

[ 0.01917579 -0.2297789 ]
[0.08919855 1.67636924]
[-0.04915738  1.01615747]


In [26]:
fig, (ax1, ax2, ax3) = plt.subplots(3, sharey=True, sharex= True, figsize=(7,7))
ax1.scatter(x, y1, marker="x", c="k")
ax1.plot(x, linear(x, *my_linear_param1), label="linear fit", linewidth=3)
ax1.set_xlabel('Wind speed (m/s)')
ax1.set_ylabel('Shape parameter')
ax2.scatter(x, y2, marker="x", c="k")
ax2.plot(x, linear(x, *my_linear_param2), label="linear fit", linewidth=3)
ax2.set_xlabel('Wind speed (m/s)')
ax2.set_ylabel('Location parameter')
ax3.scatter(x, y3, marker="x", c="k")
ax3.plot(x, linear(x, *my_linear_param3), label="linear fit", linewidth=3)
ax3.set_xlabel('Wind speed (m/s)')
ax3.set_ylabel('Scale parameter')

ax1.legend()
ax2.legend()
ax3.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x7ffd0f76a310>

In [27]:
#plt.figure()
#plt.scatter(x, y1, marker="x", c="k", label= 'shape parameter GEV')
#plt.plot(x, linear(x, *my_linear_param1), label="linear fit", linewidth=3)
#plt.xlabel('Wind speed (m/s)')
#plt.ylabel('Values of shape parameter GEV')
#plt.legend()

In [28]:
#plt.figure()
#plt.scatter(x, y2, marker="x", c="k", label= 'location parameter GEV')
#plt.plot(x, linear(x, *my_linear_param2), label="linear fit", linewidth=3)
#plt.xlabel('Wind speed (m/s)')
#plt.ylabel('Values of location parameter GEV')
#plt.legend()

In [29]:
#plt.figure()
#plt.scatter(x, y3, marker="x", c="k", label= 'scale parameter GEV')
#plt.plot(x, linear(x, *my_linear_param3), label="linear fit", linewidth=3)
#plt.xlabel('Wind speed (m/s)')
#plt.ylabel('Values of scale parameter GEV')
#plt.legend()

## exponential fit

In [30]:
exp_p0 = tuple(exp3.parameters.values())
my_exp_param1 = fit_function(exp3, x, y1, exp_p0, "lsq", exp3.bounds)
my_exp_param2 = fit_function(exp3, x, y2, exp_p0, "lsq", exp3.bounds)
my_exp_param3 = fit_function(exp3, x, y3, exp_p0, "lsq", exp3.bounds)
print(my_exp_param1)
print(my_exp_param2)
print(my_exp_param3)

[ 5.81910985e-13  1.05594945e-04 -2.74911438e+01]
[1.90408092 0.00519133 0.50532382]
[ 0.55565319  1.24669043 -0.52017369]


In [31]:
fig, (ax1, ax2, ax3) = plt.subplots(3, sharey=True, sharex= True, figsize=(7,7))
ax1.scatter(x, y1, marker="x", c="k")
ax1.plot(x, exp3(x, *my_exp_param1), label="exponential fit", linewidth=3)
ax1.set_xlabel('Wind speed (m/s)')
ax1.set_ylabel('Shape parameter')
ax2.scatter(x, y2, marker="x", c="k")
ax2.plot(x, exp3(x, *my_exp_param2), label="exponential fit", linewidth=3)
ax2.set_xlabel('Wind speed (m/s)')
ax2.set_ylabel('Location parameter')
ax3.scatter(x, y3, marker="x", c="k")
ax3.plot(x, exp3(x, *my_exp_param3), label="exponential fit", linewidth=3)
ax3.set_xlabel('Wind speed (m/s)')
ax3.set_ylabel('Scale parameter')

ax1.legend()
ax2.legend()
ax3.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x7ffd0f93a5e0>

In [32]:
#plt.figure()
#plt.scatter(x, y1, marker="x", c="k", label= 'shape parameter GEV')
#plt.plot(x, exp3(x, *my_exp_param1), label="exponential fit", linewidth=3)
#plt.xlabel('Wind speed (m/s)')
#plt.ylabel('Values of shape parameter GEV')
#plt.legend()

In [33]:
#plt.figure()
#plt.scatter(x, y2, marker="x", c="k", label= 'location parameter GEV')
#plt.plot(x, exp3(x, *my_exp_param2), label="exponential fit", linewidth=3)
#plt.xlabel('Wind speed (m/s)')
#plt.ylabel('Values of location parameter GEV')
#plt.legend()

In [34]:
#plt.figure()
#plt.scatter(x, y3, marker="x", c="k", label= 'scale parameter GEV')
#plt.plot(x, exp3(x, *my_exp_param3), label="exponential fit", linewidth=3)
#plt.xlabel('Wind speed (m/s)')
#plt.ylabel('Values of scale parameter GEV')
#plt.legend()

## power 3 fit

In [35]:
#power3_p0 = tuple(power3.parameters.values())
#my_power_param1 = fit_function(power3, x, y1, power3_p0, "lsq", power3.bounds)
#my_power_param2 = fit_function(power3, x, y2, power3_p0, "lsq", power3.bounds)
#my_power_param3 = fit_function(power3, x, y3, power3_p0, "lsq", power3.bounds)
#print(my_power_param1)
#print(my_power_param2)
#print(my_power_param3)

# Alternative Dependency

In [36]:
#mean_absolute_error(data.deflection,)
#Korrelationskoeffizient
# Intervall von 0-2 anschauen, Maxima anschauen

# Mean deflection per interval
mean_deflection = []
for i in range(len(wind_references)):
    mean_deflection.append(np.mean(deflection_intervals[i]))

# Min deflection per interval
min_deflection = []
for i in range(len(wind_references)):
    min_deflection.append(min(deflection_intervals[i]))
    
# Max deflection per interval
max_deflection = []
for i in range(len(wind_references)):
    max_deflection.append(max(deflection_intervals[i]))
    
r_min = np.corrcoef(min_deflection, wind_references)
r_max = np.corrcoef(max_deflection, wind_references)
r_mean = np.corrcoef(mean_deflection, wind_references)
print(r_min)
print(r_mean)
print(r_max)

[[1.         0.76440649]
 [0.76440649 1.        ]]
[[1.         0.18594993]
 [0.18594993 1.        ]]
[[ 1.         -0.45415571]
 [-0.45415571  1.        ]]


In [37]:
my_linear_param_min = fit_function(linear, x, min_deflection, (1, 1), "lsq", None, None)
print(my_linear_param_min)

[0.13182091 0.50983818]


In [38]:
fig, ax1 = plt.subplots(1)
ax1.scatter(data.windSpeed, data.peakDeflectionSBIT, alpha= 0.1, label='Peak independent deflections')
ax1.scatter(wind_references, max_deflection, marker="v", c="k",alpha=0.2, label='Max deflection per interval')
ax1.scatter(wind_references, mean_deflection, marker="o", c="k",alpha=0.5, label='Mean deflection per interval')
ax1.scatter(wind_references, min_deflection, marker="x", c="k", label='Min deflection per interval')
ax1.plot(x, linear(x, *my_linear_param_min), linestyle='--', color= 'r',label="linear fit", linewidth=3)
ax1.set_xlabel('1- min mean windspeed (m/s)')
ax1.set_ylabel('Independent peak deflection(cm)')
ax1.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x7ffd0f945100>

# Modell 1: R

In [39]:
dist_R= stats.genextreme(gev_fit[0], gev_fit[1], gev_fit[2])
#cdf=joint_dist.cdf(r_grid)
#pdf=joint_dist.pdf(r_grid)
#rvs=joint_dist.rvs(size=(20,20))
rvs= dist_R.rvs(size=800)
#print(rvs)

In [40]:
peak_defl_SBIT= pd.DataFrame.to_numpy(data.peakDeflectionSBIT)
#print(peak_defl_SBIT)
model_error= peak_defl_SBIT-rvs
model_error= abs(model_error)

model_error_mean= np.nanmean(model_error)
print(model_error_mean)
print(min(data.peakDeflectionSBIT))
print(max(data.peakDeflectionSBIT))

1.1671152670427907
0.6434
8.1888


In [41]:
plt.figure()
plt.scatter(data.windSpeed, data.peakDeflectionSBIT, label='real data')
plt.scatter(data.windSpeed, rvs, alpha=0.3, color= 'r', label='model data')
plt.xlabel('Wind speed (m/s)')
plt.ylabel('Peak deflection SBIT (cm)')
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x7ffd0f997400>

## Goodness of fit

In [42]:
print(data.peakDeflectionTower.mean())
print(np.mean(rvs))

print(data.peakDeflectionTower.median())
print(np.median(rvs))

13.894361000000002
2.3576505553751743
12.77685
2.0844067561415534


# Modell 2: R(V)

In [43]:
def mu_func(v):
    return 0.089*v +1.676
def sigma_func(v):
    return -0.049*v +1.016

In [44]:
print(len(data))

800


In [45]:
mu= mu_func(data.windSpeed)
sigma= sigma_func(data.windSpeed)

In [47]:
con_dist_V= stats.genextreme(gev_fit[0], mu, gev_fit[2])
#cdf=joint_dist.cdf(r_grid)
#pdf=joint_dist.pdf(r_grid)
#rvs=joint_dist.rvs(size=(20,20))
rvs2D= con_dist_V.rvs(size=800)
#print(rvs)

In [48]:
peak_defl_SBIT= pd.DataFrame.to_numpy(data.peakDeflectionSBIT)
#print(peak_defl_SBIT)
model_error2D= peak_defl_SBIT-rvs2D
model_error2D= abs(model_error2D)

model_error2D_mean= np.nanmean(model_error2D)
print(model_error2D_mean)

1.187045432589364


## Comparison real data and model

In [49]:
plt.figure()
plt.scatter(data.windSpeed, data.peakDeflectionSBIT, label='real data')
plt.scatter(data.windSpeed, rvs, alpha=0.3, color= 'r', label='model data')
plt.xlabel('Wind speed (m/s)')
plt.ylabel('Peak deflection SBIT (cm)')
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x7ffd0fb0f880>

## Goodness of fit

In [50]:
print(data.peakDeflectionTower.mean())
print(np.mean(rvs2D))

print(data.peakDeflectionTower.median())
print(np.median(rvs2D))

13.894361000000002
2.749982448347794
12.77685
2.4764512329609287


In [53]:
fig = plt.figure()
ax = fig.add_subplot(111)
stats.probplot(data.peakDeflectionTower, (gev_fit[0], gev_fit[1], gev_fit[2]), dist='genextreme', plot=ax)
stats.probplot(data.peakDeflectionTower, (gev_fit[0], mu, gev_fit[2]), dist='genextreme', plot=ax)

#ax.set_title("Probability plot of sample data against the quantiles of a specified theoretical distribution.")

ax.get_lines()[0].set_markerfacecolor('b')
ax.get_lines()[0].set_alpha(0.3)
ax.get_lines()[2].set_alpha(0.3)
ax.get_lines()[3].set_color('black')
ax.get_lines()[3].set_linestyle('-')
ax.get_lines()[0].set_label('Measurement data')
ax.get_lines()[0].set_label('Measurement data')
ax.get_lines()[1].set_label('1D Probability model R')
ax.get_lines()[3].set_label('2D probability model R(V)')


ax.legend()
plt.show()

<IPython.core.display.Javascript object>