In [1]:
from math import sqrt
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.pyplot import figure
from sklearn.preprocessing import StandardScaler
from dataset import DatasetGenerator

## Data reading

In [2]:
olb_df = pd.read_excel('data/olb.xlsx')
msa_df = pd.read_excel('data/msa.xlsx')
rsam_df = pd.read_excel('data/rsam.xlsx')
displ_df =  pd.read_excel('data/displacement.xlsx')
bg_seism_df =  pd.read_excel('data/background_seismicity.xlsx')

In [3]:
olb_date =np.array(olb_df['date'])
msa_date = np.array(msa_df['date'])

In [4]:
df = pd.merge(olb_df,msa_df, on='date', how='outer')
df.columns = ['date','T_olb', 'Ru_olb','P_olb','Rn_olb','T_msa','Ru_msa','P_msa','Rn_msa']

full_df = pd.merge(rsam_df, df, on='date',how='outer')

full_df = pd.merge(full_df, displ_df, on='date',how='outer')

full_df = pd.merge(full_df, bg_seism_df, on='date',how='outer')

In [5]:
full_df.sort_values(by='date',inplace = True)
full_df = full_df.reset_index(drop=True)

In [6]:
df = full_df

In [7]:
dates = df['date']
rsam = df['RSAM (m/s)']
rn_olb = df['Rn_olb']
rn_msa = df['Rn_msa']
displ = df['displacement (cm)']
bg = df['background seismicity']

In [8]:
df[~df['Rn_olb'].isnull() & df['RSAM (m/s)'].isnull()]

In [64]:
# dati dove esistono i valori di olb e msa
#filtered_df=df[~df['T_msa'].isnull().values & ~df['T_olb'].isnull().values]
#filtered_df.to_csv('olb_msa_notnull.csv')

In [11]:
#dati completi
df.to_csv('data/olb_msa_full.csv', index=False)

In [10]:
df.columns

## Data Plotting

### Filtered df

In [67]:
date_range = filtered_df['date'].values
y_olb= filtered_df['Rn_olb'].values
y_msa = filtered_df['Rn_msa'].values
y_diff = np.abs(y_olb-y_msa)

In [13]:
figure(figsize=(20,6), dpi = 80)
plt.plot(date_range,y_olb, label='olb')
plt.plot(date_range,y_msa, label = 'msa')
plt.legend()
plt.show()

In [14]:
figure(figsize=(20,6), dpi = 80)
plt.plot(date_range,y_diff, label = 'diff')

### Full Df

In [10]:
df

In [16]:
date = df['date'].values
rn_olb = df['Rn_olb'].values
rn_msa = df['Rn_msa'].values
ru_olb = df['Ru_olb'].values
ru_msa = df['Ru_msa'].values
t_olb = df['T_olb'].values
t_msa = df['T_msa'].values
p_olb = df['P_olb'].values
p_msa = df['P_msa'].values
rsam = df['RSAM (m/s)'].values
displacement = df['displacement (cm)'].values
background = df['background seismicity'].values


In [17]:

figure(figsize=(20,6), dpi = 80)
plt.plot(date,rn_olb, label='olb')
plt.plot(date,rn_msa,label='msa')
plt.legend()
plt.show()


In [24]:
fig, (ax1, ax2, ax3,ax4, ax5) = plt.subplots(5,figsize=(20,10))
fig.suptitle('Olibano plots')
ax1.plot(date, rn_olb, label = 'RN')
ax1.legend()
ax2.plot(date,t_olb, label='T')
ax2.legend()
ax3.plot(date,p_olb, label = 'P')
ax3.legend()
ax4.plot(date,rsam,  label = 'rsam')
ax4.legend()
ax5.plot(date,ru_olb,  label = 'ru')
ax5.legend()


#### Statistiche

In [54]:
df

In [39]:
df.describe()

## Data Cleaning

In [16]:
df.loc[df[df['Rn_msa'].isnull()]['Rn_msa'].index,'Rn_msa']=0

In [17]:
df.loc[df[df['Rn_olb'].isnull()]['Rn_olb'].index,'Rn_olb'] = 0

In [18]:
#full df 

full_df_date = df['date'].values
full_y_olb = df['Rn_olb'].values
full_y_msa = df['Rn_msa'].values
full_y_diff = np.abs(full_y_olb-full_y_msa)

In [19]:
figure(figsize=(20,6), dpi = 80)
plt.plot(full_df_date,full_y_olb, label='olb')
plt.plot(full_df_date,full_y_msa,label='msa')
plt.legend()
plt.show()

In [20]:
figure(figsize=(20,6), dpi = 80)

plt.plot(full_df_date,full_y_diff,label='diff')


### Standardizzo la time-series di OLB in modo da vedere il trend

In [22]:
full_y_olb = full_y_olb.reshape(len(full_y_olb),1)

In [31]:
full_y_msa = full_y_msa.reshape(len(full_y_msa),1)

In [33]:
olb_scaler = StandardScaler()
olb_scaler = olb_scaler.fit(full_y_olb)
print('Mean: %f, StandardDeviation: %f' % (olb_scaler.mean_, sqrt(olb_scaler.var_)))
# standardization the dataset and print the first 5 rows
olb_normalized = olb_scaler.transform(full_y_olb)

In [34]:
msa_scaler = StandardScaler()
msa_scaler = msa_scaler.fit(full_y_msa)
print('Mean: %f, StandardDeviation: %f' % (msa_scaler.mean_, sqrt(msa_scaler.var_)))
# standardization the dataset and print the first 5 rows
msa_normalized = msa_scaler.transform(full_y_msa)

In [36]:
figure(figsize=(20,6), dpi = 80)
plt.plot(full_df_date,olb_normalized,label='normalized_val')
plt.plot(full_df_date,msa_normalized,label='normalized_val')

In [40]:
figure(figsize=(20,6), dpi = 80)
plt.plot(full_df_date,np.abs(olb_normalized-msa_normalized),label='diff')


In [41]:
full_inversed = olb_scaler.inverse_transform(np.abs(olb_normalized-msa_normalized))

In [73]:
figure(figsize=(20,6), dpi = 80)
plt.plot(full_df_date,full_inversed, label='full_inversed')


In [83]:
figure(figsize=(20,6), dpi = 80)
plt.plot(full_df_date,full_inversed - 25000, label='full_inversed')
plt.plot(full_df_date,full_y_olb, label='olb')
plt.legend()
#plt.plot(full_df_date,full_y_msa, label='msa')
