In [1]:
import os 
import sys
import random
import math

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats import lognorm
from scipy.stats import norm 
from scipy import integrate
from scipy.stats import gmean

import statsmodels.api as sm 
import statsmodels.formula.api as smf

### src; https://github.com/arkottke/pyrotd
import pyrotd

%load_ext autoreload
%autoreload 2

In [2]:
with open('BuildingNames.txt', 'r') as f:
    BuildingList = f.read() 
BuildingList = BuildingList.split('\n')
BuildingList

['s1_48x32_high',
 's1_48x32_veryhigh',
 's1_96x48_high',
 's1_96x48_veryhigh',
 's2_48x32_high',
 's2_48x32_veryhigh',
 's2_96x48_high',
 's2_96x48_veryhigh',
 's4_96x48_high',
 's4_96x48_veryhigh']

In [3]:
baseDir = r'/Users/laxmandahal/Desktop/UCLA/Phd/Research/IM_study'

## time period of the buildings
T = np.array([0.13, 0.12, 0.22, 0.22, 0.16, 0.15, 0.26, 0.25, 0.49, 0.49])

numGM = 826
g = 980.665 ## converts GM record to cm/sec^2

In [4]:
# heresi_GM_dir = os.path.join(baseDir, *['References', 'Heresi GMs'])
# os.chdir(heresi_GM_dir)
# heresiGM = pd.read_csv('Ground Motion Records.csv')
# RSNs = heresiGM['Record Sequence Number\n(NGA-West2)'].values
# np.savetxt('Heresi_831_RSNs.txt',[RSNs], delimiter=',', fmt='%d')

In [5]:
# gmDir = r'/Users/laxmandahal/Desktop/UCLA/Phd/Research/SurrogateModeling/Data/MirandaGMs/ProcessedGroundMotion'
gmDir = r'/Users/laxmandahal/Desktop/UCLA/Phd/Research/GM_Selection/Heresi_826_GMs/preProcessed '
os.chdir(os.path.join(gmDir, 'GroundMotionInfo'))
gmNames = open('GMFileNames.txt', 'r').read().splitlines()
gmNumPoints = np.loadtxt('GMNumPoints.txt')
gmSteps = np.loadtxt('GMtimeSteps.txt')


def find_nearest(array, value):
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return array[idx]

In [6]:
gmHistoryDir = os.path.join(gmDir, 'Histories')
os.chdir(gmHistoryDir)
PGA = []
PGV = []
DS_5to75 = []
DS_5to95 = []
CAV = []
gmID = []
for i in range(0, len(gmNames)):
    accl = np.loadtxt("%s"%gmNames[i]) * g 
    PGA.append(max(np.abs(accl)))
    gmID.append('GM%s'%(i+1))
    time = np.arange(0, gmNumPoints[i] * gmSteps[i], gmSteps[i])
    if len(time) > gmNumPoints[i]: #takes care of odd GMNumPoints
        time = time[1:]
    #PGA
    velocity = integrate.cumtrapz(accl, time, initial = 0)
    PGV.append(max(np.abs(velocity)))
    #Arias Intensity
    ariasIntensity = (math.pi / 2 * g) * integrate.cumtrapz(accl ** 2, time, initial = 0)
    AI_normalized = ariasIntensity / max(ariasIntensity)
    ##interval(time) at with 5% of AI is reached
    int_at5 = time[np.where(AI_normalized == find_nearest(AI_normalized, 0.05))][0] 
    int_at75 = time[np.where(AI_normalized == find_nearest(AI_normalized, 0.75))][0]
    int_at95 = time[np.where(AI_normalized == find_nearest(AI_normalized, 0.95))][0]
    DS_5to75.append(int_at75 - int_at5)
    DS_5to95.append(int_at95 - int_at5)
    #Cumulative absolute velocity
    CAV.append(np.trapz(abs(accl), time))

d = {'GMID':gmID,
    'PGA': PGA, 
    'PGV': PGV, 
    'DS_5to70': DS_5to75,
    'DS_5to95' : DS_5to95, 
    'CAV' : CAV}
df_gm = pd.DataFrame(d)
df_gm = df_gm.set_index('GMID')
df_gm.head()

Unnamed: 0_level_0,PGA,PGV,DS_5to70,DS_5to95,CAV
GMID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
GM1,338.426217,41.100682,6.36,12.98,1099.61937
GM2,301.84192,22.985907,5.66,13.46,954.213665
GM3,226.90107,18.212685,6.4,12.02,844.604753
GM4,351.411692,27.369871,5.8,10.64,1183.699743
GM5,105.588004,9.254588,12.28,25.72,569.929722


In [7]:
df_gm.shape

(240, 5)

In [8]:
# spectral_acc = pd.read_csv(r'/Users/laxmandahal/Desktop/UCLA/Phd/Research/SurrogateModeling/Data/MirandaGMs/Miranda240GM5PercentSpectra.csv', 
#                        index_col = 0)
# spectral_acc = spectral_acc.T
# # spectral_acc = spectral_acc.set_index('Time')
# spectral_acc['Sa_avg'] = gmean(spectral_acc.loc[:, 0.1:3], axis = 1)
# # spectral_acc['Sa_avg'] = spectral_acc.loc[:, 0.1:3].apply(gmean, axis=1)
# spectral_acc.head()

Time,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.1,...,7.92,7.93,7.94,7.95,7.96,7.97,7.98,7.99,8.0,Sa_avg
GM1,0.339825,0.361133,0.363163,0.396594,0.450216,0.355248,0.411722,0.65934,0.665346,0.604042,...,0.010611,0.010615,0.010619,0.010622,0.010626,0.010629,0.010631,0.010632,0.010633,0.24202
GM2,0.307881,0.297817,0.317829,0.307808,0.324563,0.45252,0.41904,0.45232,0.393258,0.476768,...,0.010427,0.010398,0.010369,0.01034,0.010311,0.010282,0.010254,0.010225,0.010197,0.230157
GM3,0.235589,0.240269,0.238789,0.265133,0.277307,0.406657,0.373434,0.468871,0.494232,0.456395,...,0.003832,0.003816,0.003801,0.003786,0.003771,0.003756,0.003741,0.003726,0.003711,0.167386
GM4,0.355774,0.366203,0.382934,0.395595,0.454592,0.496565,0.649458,0.606338,0.735975,1.002575,...,0.001953,0.001948,0.001943,0.001938,0.001933,0.001928,0.001923,0.001918,0.001913,0.150906
GM5,0.10696,0.115329,0.119738,0.125706,0.166744,0.142788,0.162358,0.197241,0.195511,0.215484,...,0.013706,0.013679,0.013652,0.013623,0.013593,0.013564,0.013534,0.013503,0.013472,0.072547


In [9]:
df_IMs = pd.merge(left = df_gm, right = spectral_acc, on = df_gm.index)
df_IMs = df_IMs.set_index('key_0')
df_IMs.head()

Unnamed: 0_level_0,PGA,PGV,DS_5to70,DS_5to95,CAV,0.01,0.02,0.03,0.04,0.05,...,7.92,7.93,7.94,7.95,7.96,7.97,7.98,7.99,8.0,Sa_avg
key_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GM1,338.426217,41.100682,6.36,12.98,1099.61937,0.339825,0.361133,0.363163,0.396594,0.450216,...,0.010611,0.010615,0.010619,0.010622,0.010626,0.010629,0.010631,0.010632,0.010633,0.24202
GM2,301.84192,22.985907,5.66,13.46,954.213665,0.307881,0.297817,0.317829,0.307808,0.324563,...,0.010427,0.010398,0.010369,0.01034,0.010311,0.010282,0.010254,0.010225,0.010197,0.230157
GM3,226.90107,18.212685,6.4,12.02,844.604753,0.235589,0.240269,0.238789,0.265133,0.277307,...,0.003832,0.003816,0.003801,0.003786,0.003771,0.003756,0.003741,0.003726,0.003711,0.167386
GM4,351.411692,27.369871,5.8,10.64,1183.699743,0.355774,0.366203,0.382934,0.395595,0.454592,...,0.001953,0.001948,0.001943,0.001938,0.001933,0.001928,0.001923,0.001918,0.001913,0.150906
GM5,105.588004,9.254588,12.28,25.72,569.929722,0.10696,0.115329,0.119738,0.125706,0.166744,...,0.013706,0.013679,0.013652,0.013623,0.013593,0.013564,0.013534,0.013503,0.013472,0.072547


In [10]:
gm_info = pd.read_csv(r'/Users/laxmandahal/Desktop/UCLA/Phd/Research/SurrogateModeling/Data/MirandaGMs/Miranda240GroundMotionInfo.csv', 
                     index_col = 0, encoding='latin-1')
## replacing distance with value 0 with 1, to bypass error while taking log of the Rjb
gm_info['Distance 1 (km)'] = gm_info['Distance 1 (km)'].replace(0, 1)
# print(gm_info['Distance 1 (km)'].min())
gm_info.columns


Index(['Earthquake name', 'Earthquake time', 'Fault type', 'AS Input',
       'Sadigh Input', 'Station name', 'Magnitude', 'NEHRP site class',
       'Distance 1 (km)', 'Distance 2 (km)', 'Component Degree',
       'PGA from Miranda(cm/s^2)', 'PGA from GUAN (cm/sec^2)',
       'PGV from GUAN (cm/sec)', 'epsilon (Abrahamson and Silva 1997)',
       'epsilon (Sadigh et al 1997)'],
      dtype='object')

In [11]:
gmDir = r'/Users/laxmandahal/Desktop/UCLA/Phd/Research/SurrogateModeling/Data/MirandaGMs/ProcessedGroundMotion'
os.chdir(os.path.join(gmDir, 'GroundMotionInfo'))
distance_rjb = gm_info['Distance 1 (km)'].values
magnitude = gm_info['Magnitude'].values
# np.savetxt('Rjb.txt', distance_rjb, fmt = '%.3f')
# np.savetxt('Magnitude.txt', magnitude, fmt = '%.2f')

## Defining Classes for efficiency and sufficiency

In [15]:
class OLS:
    def __init__(self, EDP, IM):
        self.EDP = EDP
        self.IM = IM
        self.Y = np.log(EDP)
        self.X = sm.add_constant(np.log(IM))
        
        self.residual_mean = None
        self.residual_std = None
        self.xmin = None
        self.xmax = None
        
        self.fitModel()
        self.get_summary()
        
#         self.plot_residuals_hist()
#         self.plot_model_fit()
        
    def fitModel(self):
        model = sm.OLS(self.Y, self.X)
        self.result = model.fit()
        
    def get_summary(self):
        return self.result.summary()
    
    def plot_residuals_hist(self):
        fig, ax = plt.subplots()
        sns.histplot(x = self.result.resid, ax=ax, stat = 'density', linewidth = 0, kde=True)
        ax.set(title = 'Distribution of residuals', xlabel = 'residuals')
        
        self.residual_mean, self.residual_std = norm.fit(self.result.resid)
        ##plotting normal pdf
        self.xmin, self.xmax = plt.xlim() # using maxi/min values from histogram
        x = np.linspace(self.xmin, self.xmax, 200)
        pdf = norm.pdf(x, self.residual_mean, self.residual_std)
        sns.lineplot(x = x, y = pdf, color = 'red', ax = ax)
        plt.show()
    
    def get_efficiency(self):
        return np.std(self.result.resid)
            
    def qqplot(self):
        sm.qqplot(self.result.resid, line = 's')


    def fitplot(self):
        sm.graphics.plot_fit(self.result, 1, vlines = False);
        
    def plot_model_fit(self):
        Ymin = self.Y.min()
        Ymax = self.Y.max()
#         ax = sns.subplots()
        ax = sns.scatterplot(x = self.result.fittedvalues, y = self.Y)
#         ax.set_ylim(Ymin, Ymax)
#         ax.set_xlim(self.xmin, self.xmax)
        ax.set_xlabel('Predicted Values')
        ax.set_ylabel('Observed Values')
        
        X_ref = Y_ref = np.linspace(Ymin, Ymax, 200)
        plt.plot(X_ref, Y_ref, color = 'red', linewidth = 1.3)
        plt.show()
        
class Sufficiency():
    def __init__(self, EDP, IM, Rjb, Mag):
        self.EDP = EDP
        self.IM = IM
        self.Y = np.log(EDP)
        self.X = sm.add_constant(np.log(IM))
        
        temp = {'I': np.ones(shape = len(Mag)),
                'lnEDP': np.log(self.EDP),
                'lnIM': np.log(self.IM),
                'lnRjb': np.log(Rjb),
                'M': Mag}
        self.dummydf = pd.DataFrame(temp, index = None)
        
        self.sufficiency_against_R()
        self.sufficiency_against_M()
        self.sufficiency_against_M_and_R()
        
    def sufficiency_against_R(self):
        self.model_IM_vs_R = smf.ols('lnEDP ~ lnIM + lnRjb', data = self.dummydf)
        model_res = self.model_IM_vs_R.fit()
        self.summary_against_R = model_res.summary()
        return model_res.pvalues['lnRjb']
        
    def sufficiency_against_M(self):
        self.model_IM_vs_M = smf.ols('lnEDP ~ lnIM + M', data = self.dummydf)
        model_res = self.model_IM_vs_M.fit()
        self.summary_against_R = model_res.summary()
        return model_res.pvalues['M']

    def sufficiency_against_M_and_R(self):
        self.model_IM_vs_R_and_M = smf.ols('lnEDP ~ lnIM + M + lnRjb', data = self.dummydf)
        model_res = self.model_IM_vs_R_and_M.fit()
        self.summary_against_R = model_res.summary()
        return model_res.pvalues
    
    def qqplot(self, model):
        sm.qqplot(model.fit(), line = 's')
      

In [None]:
def SummaryResutls_efficiency(buildingIndex, IMs=['SaT1', 'PGA', 'PGV', 'Sa_avg'], separate_direction = False):
    dataDir = os.path.join(baseDir, *['Results', 'IM_study_240GMs', BuildingList[buildingIndex]])
    os.chdir(dataDir)
    sdr = pd.read_csv('SDR.csv', header = None)
    pfa = pd.read_csv('PFA.csv', header = None)
    
    numStory = int(BuildingList[buildingIndex].split('_')[0][1])
#     IMs = ['SaT1', 'PGA', 'PGV']
    tempdf = []
    d = []
    keys = ['1st Story', '2nd Story', '3rd Story', '4th Story']
    temp_sdr = {}
    
    if separate_direction:
        start = 0
        increment = 2
    else:
        start = 0
        increment = 1
    
    for i in range(len(IMs)):
        if IMs[i] == 'SaT1':
            IM = df_IMs[T[buildingIndex]].values[start::increment]
        else:
            IM = df_IMs[IMs[i]][start::increment]

        for j in range(numStory):
                sdrX = sdr[3+j].values[:numGM][start::increment]
                sdrZ = sdr[3+j].values[numGM:numGM*2][start::increment]

                pfaX = pfa[4+j].values[:numGM][start::increment]
                pfaZ = pfa[4+j].values[numGM:numGM*2][start::increment]

                ols_sdrX = OLS(sdrX, IM)
                ols_sdrZ = OLS(sdrZ, IM)
                ols_pfaX = OLS(pfaX, IM)
                ols_pfaZ = OLS(pfaZ, IM)
                temp_sdr[keys[j]] = {'SDR_X':ols_sdrX.get_efficiency(),
                                     'SDR_Z':ols_sdrZ.get_efficiency(),
                                     'PFA_X': ols_pfaX.get_efficiency(),
                                     'PFA_Z': ols_pfaZ.get_efficiency()}
        reform = {(outerKey, innerKey): values for outerKey, innerDict in temp_sdr.items() for innerKey, values in innerDict.items()}
        df = pd.DataFrame.from_dict(reform, orient='index').transpose()
        df.columns = pd.MultiIndex.from_tuples(df.columns)
        df['IM'] = IMs[i]
        df = df.set_index('IM')
        tempdf.append(df)

    return pd.concat(tempdf)
IM_list = ['SaT1', 'PGA', 'PGV', 'Sa_avg', 'CAV']
s1_48x32_high= SummaryResutls_efficiency(2, IM_list, separate_direction=False)
s1_48x32_high

In [None]:
s1_48x32_high= SummaryResutls_efficiency(2, IM_list, separate_direction=True)
s1_48x32_high

In [None]:
def SummaryResutls_sufficiency(buildingIndex, IMs=['SaT1', 'PGA', 'PGV', 'Sa_avg'], separate_direction = False):
    dataDir = os.path.join(baseDir, *['Results', 'IM_study_240GMs', BuildingList[buildingIndex]])
    os.chdir(dataDir)
    sdr = pd.read_csv('SDR.csv', header = None)
    pfa = pd.read_csv('PFA.csv', header = None)
    
    numStory = int(BuildingList[buildingIndex].split('_')[0][1])
    tempdf = []
    d = []
    keys = ['1st Story', '2nd Story', '3rd Story', '4th Story']
    temp_sdr = {}
    
    if separate_direction:
        start = 0
        increment = 2
    else:
        start = 0
        increment = 1
    
    for i in range(len(IMs)):
        if IMs[i] == 'SaT1':
            IM = df_IMs[T[buildingIndex]].values
        else:
            IM = df_IMs[IMs[i]]

        for j in range(numStory):
                
            sdrX = sdr[3+j].values[:numGM][::increment]
            sdrZ = sdr[3+j].values[numGM:numGM*2][1::increment]

            pfaX = pfa[4+j].values[:numGM][::increment]
            pfaZ = pfa[4+j].values[numGM:numGM*2][1::increment]
            
            suff_sdrX = Sufficiency(sdrX, IM[::increment], distance_rjb[::increment], magnitude[::increment])
            suff_sdrZ = Sufficiency(sdrZ, IM[1::increment], distance_rjb[1::increment], magnitude[1::increment])
            suff_pfaX = Sufficiency(pfaX, IM[::increment], distance_rjb[::increment], magnitude[::increment])
            suff_pfaZ = Sufficiency(pfaZ, IM[1::increment], distance_rjb[1::increment], magnitude[1::increment])
            #### sufficiency against R
            p_sdrX_R = np.round(suff_sdrX.sufficiency_against_R(), 4)
            p_sdrZ_R = np.round(suff_sdrZ.sufficiency_against_R(), 4)
            p_pfaX_R = np.round(suff_pfaX.sufficiency_against_R(), 4)
            p_pfaZ_R = np.round(suff_pfaZ.sufficiency_against_R(), 4)
            #### sufficiency against M 
            p_sdrX_M = np.round(suff_sdrX.sufficiency_against_M(), 4)
            p_sdrZ_M = np.round(suff_sdrZ.sufficiency_against_M(), 4)
            p_pfaX_M = np.round(suff_pfaX.sufficiency_against_M(), 4)
            p_pfaZ_M = np.round(suff_pfaZ.sufficiency_against_M(), 4)
            temp_sdr[keys[j]] = {'SDR_X vs R':'YES(%s)'%p_sdrX_R if p_sdrX_R >= 0.05 else 'NO(%s)'%p_sdrX_R,
                                 'SDR_Z vs R':'YES(%s)'%p_sdrZ_R if p_sdrZ_R >= 0.05 else 'NO(%s)'%p_sdrZ_R,
                                 'PFA_X vs R':'YES(%s)'%p_pfaX_R if p_pfaX_R >= 0.05 else 'NO(%s)'%p_pfaX_R,
                                 'PFA_Z vs R':'YES(%s)'%p_pfaZ_R if p_pfaZ_R >= 0.05 else 'NO(%s)'%p_pfaZ_R,
                                 'SDR_X vs M':'YES(%s)'%p_sdrX_M if p_sdrX_M >= 0.05 else 'NO(%s)'%p_sdrX_M,
                                 'SDR_Z vs M':'YES(%s)'%p_sdrZ_M if p_sdrZ_M >= 0.05 else 'NO(%s)'%p_sdrZ_M,
                                 'PFA_X vs M':'YES(%s)'%p_pfaX_M if p_pfaX_M >= 0.05 else 'NO(%s)'%p_pfaX_M,
                                 'PFA_Z vs M':'YES(%s)'%p_pfaZ_M if p_pfaZ_M >= 0.05 else 'NO(%s)'%p_pfaZ_M
                                }
        reform = {(outerKey, innerKey): values for outerKey, innerDict in temp_sdr.items() for innerKey, values in innerDict.items()}
        df = pd.DataFrame.from_dict(reform, orient='index').transpose()
        df.columns = pd.MultiIndex.from_tuples(df.columns)
        df['IM'] = IMs[i]
        df = df.set_index('IM')
        tempdf.append(df)

    return pd.concat(tempdf)
IM_list = ['SaT1', 'PGA', 'PGV', 'Sa_avg', 'CAV']
s1_48x32_high= SummaryResutls_sufficiency(2, IM_list, separate_direction=False)
s1_48x32_high

In [None]:
def plot_efficiency(buildingIndex, IM=['SaT1', 'PGA', 'PGV', 'Sa_avg', 'CAV'], savefig = False):
    numStory = int(BuildingList[buildingIndex].split('_')[0][1])
    floor = np.arange(1, numStory + 1)
    
    summaryResult = SummaryResutls_efficiency(buildingIndex, IM, separate_direction=True)
    markerList = ['.', '*', '+', 'o', 's', 'p', 'd', 'X']
    
    fig, axs = plt.subplots(2, 2, figsize=(10,12))
    fig.suptitle('%s'%BuildingList[buildingIndex], fontsize = 16)
    for i in range(len(IM)):
        axs[0, 0].plot(summaryResult.loc['%s'%IM[i]][::4], floor, label = IM[i], marker = markerList[i])
        axs[0, 0].set_yticks(floor)
        axs[0, 0].set_title('Dispersion SDR X')
        axs[0,0].legend()
        axs[1, 0].plot(summaryResult.loc['%s'%IM[i]][1::4], floor, label = IM[i], marker = markerList[i])
        axs[1, 0].set_yticks(floor)
        axs[1, 0].set_title('Dispersion SDR Z')
        axs[0, 1].plot(summaryResult.loc['%s'%IM[i]][2::4], floor, label = IM[i], marker = markerList[i])
        axs[0, 1].set_yticks(floor)
        axs[0, 1].set_title('Dispersion PFA X')
        axs[1, 1].plot(summaryResult.loc['%s'%IM[i]][3::4], floor, label = IM[i], marker = markerList[i])
        axs[1, 1].set_yticks(floor)
        axs[1, 1].set_title('Dispersion PFA Z')
    dataDir = os.path.join(baseDir, *['Results', 'IM_study_240GMs', BuildingList[buildingIndex]])
    os.chdir(dataDir)
    if savefig:
        plt.savefig('efficiency.png')
    else:
        plt.show()

for i in range(len(BuildingList)):
    plot_efficiency(i, savefig=True)


In [None]:
np.arange(1, 5)

In [None]:
### Not used for now as the focus is dispersion not the median value
def plot_empirical_EDP(buildingIndex, EDP = 'SDR', separate_direction = False):
    dataDir = os.path.join(baseDir, *['Results', 'IM_study_240GMs', BuildingList[buildingIndex]])
    os.chdir(dataDir)
    if EDP == 'SDR':
        edp = pd.read_csv('SDR.csv', header = None)
        start_index = 3 #for dataframe
    elif EDP == 'PFA':
        edp = pd.read_csv('PFA.csv', header = None)
        start_index = 4
    else:
        print('Enter Valid EDP')
        sys.exit(1)
    
    if separate_direction:
        start = 0
        increment = 2
    else:
        start = 0
        increment = 1
        
        
    edpX = edp[:numGM][start::increment]
    edpY = edp[numGM:numGM*2][start::increment]
    
    numStory = int(BuildingList[buildingIndex].split('_')[0][1])
    floor = np.arange(1, numStory + 1)
    
    median_edpX = np.zeros(shape = numStory)
    median_edpY = []
    logstd_edpX = []
    logstd_edpY = []
    
    for i in range(numStory):
        median_edpX[i] = np.median(edpX[start_index + i].values[:numGM])
        median_edpY.append(np.median(edpY[start_index + i].values[:numGM]))
        logstd_edpX.append(np.std(np.log(edpX[start_index + i].values[:numGM])))
        logstd_edpY.append(np.std(np.log(edpY[start_index + i].values[:numGM])))
    
    fig, axs = plt.subplots(2, 2)
    axs[0, 0].plot(median_edpX[::-1], floor)
    axs[0, 0].set_title('Median X')
    axs[0, 1].plot(logstd_edpX, floor, 'tab:green')
    axs[0, 1].set_title('Axis [1, 0]')
    
    axs[1, 0].plot(median_edpY[::-1], floor, 'tab:orange')
    axs[1, 0].set_title('Axis [0, 1]')
    
    axs[1, 1].plot(logstd_edpY[::-1], floor, 'tab:red')
    axs[1, 1].set_title('Axis [1, 1]')

    for ax in axs.flat:
        ax.set(xlabel='x-label', ylabel='y-label')

    # Hide x labels and tick labels for top plots and y ticks for right plots.
    for ax in axs.flat:
        ax.label_outer()
#     print(median_edpX)

# plot_efficiency(9, 'SDR')
    

In [None]:
s4_96x48_high_suff = SummaryResutls_sufficiency(8, separate_direction=True)
s4_96x48_high_suff.T

In [None]:
s1_48x32_high = SummaryResutls_efficiency(2, separate_direction=True)
s1_48x32_high

In [None]:
def save_data_for_R(buildingIndex, IMs=['SaT1', 'Sa_avg', 'PGA', 'PGV'], separate_direction = False):
    dataDir = os.path.join(baseDir, *['Results', 'IM_study_240GMs', BuildingList[buildingIndex]])
    os.chdir(dataDir)
    sdr = pd.read_csv('SDR.csv', header = None)
    pfa = pd.read_csv('PFA.csv', header = None)
    
    if separate_direction:
        start = 0
        increment = 2
    else:
        start = 0
        increment = 1
    
    numStory = int(BuildingList[buildingIndex].split('_')[0][1])
    temp = {}
    for i in range(numStory):
        sdrX = sdr[3+i].values[:numGM][start::increment]
        sdrZ = sdr[3+i].values[numGM:numGM*2][start::increment]

        pfaX = pfa[4+i].values[:numGM][start::increment]
        pfaZ = pfa[4+i].values[numGM:numGM*2][start::increment]
        
        temp['story_%s_sdrX'%(i+1)] = sdrX
        temp['story_%s_sdrZ'%(i+1)] = sdrZ
        temp['story_%s_pfaX'%(i+1)] = pfaX
        temp['story_%s_pfaZ'%(i+1)] = pfaZ
    
    for j in range(len(IMs)):
        if IMs[j] == 'SaT1':
            temp[IMs[j]] = df_IMs[T[buildingIndex]].values
        else:
            temp[IMs[j]] = df_IMs[IMs[j]]
    final_df = pd.DataFrame(temp)
    final_df.to_csv('Inputs_for_%s.csv'%BuildingList[buildingIndex])
    return final_df
    
for ii in range(len(BuildingList)):
    save_data_for_R(ii)


In [None]:
s2_48x32_high= SummaryResutls_efficiency(4, ['SaT1', 'PGA', 'PGV'])
s2_48x32_high

In [None]:
s4_96x48_high= SummaryResutls_efficiency(8, ['SaT1', 'PGA', 'PGV', 'Sa_avg'])
s4_96x48_high

In [None]:
s4_96x48_veryhigh= SummaryResutls_efficiency(9, ['SaT1', 'PGA', 'PGV', 'Sa_avg'])
s4_96x48_veryhigh

Combining all the data efficiency into a big data frame.

In [None]:
d = []
for i in range(len(BuildingList)):
    d.append(SummaryResutls_efficiency(i, ['SaT1', 'PGA', 'PGV']))

finaldf = pd.concat(d, keys = BuildingList)
finaldf.head(50)

## Debugging

In [28]:
buildingIndex = 9

start_idx = 0
step_size = 2

baseDir = r'/Users/laxmandahal/Desktop/UCLA/Phd/Research/IM_study'
dataDir = os.path.join(baseDir, *['Results', 'IM_study_240GMs', BuildingList[buildingIndex]])

os.chdir(dataDir)
sdr = pd.read_csv('SDR.csv', header = None)
pfa = pd.read_csv('PFA.csv', header = None)

sdrX = sdr[3].values[:numGM][start_idx::step_size]
sdrZ = sdr[3].values[numGM:numGM*2][start_idx::step_size]

pfaX = pfa[4].values[:numGM][start_idx::step_size]
pfaZ = pfa[4].values[numGM:numGM*2][start_idx::step_size]

sa_T013 = spectral_acc[T[buildingIndex]].values[start_idx::step_size]
X = sm.add_constant(np.log(sa_T013))
Xx = sm.add_constant(np.log(sa_T013)[::2])

In [17]:
testEff = OLS(sdrX, sa_T013)


In [19]:
testEff.result.resid.shape


(120,)

In [21]:
distance_rjb.shape[::2]

(240,)

In [26]:
testSuff = sm.OLS(testEff.result.resid, sm.add_constant(np.log(distance_rjb[::2])))
testSuff.fit().summary()


0,1,2,3
Dep. Variable:,y,R-squared:,0.002
Model:,OLS,Adj. R-squared:,-0.006
Method:,Least Squares,F-statistic:,0.2594
Date:,"Fri, 11 Mar 2022",Prob (F-statistic):,0.611
Time:,18:07:00,Log-Likelihood:,-15.08
No. Observations:,120,AIC:,34.16
Df Residuals:,118,BIC:,39.74
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0397,0.082,0.485,0.629,-0.123,0.202
x1,-0.0122,0.024,-0.509,0.611,-0.060,0.035

0,1,2,3
Omnibus:,18.924,Durbin-Watson:,1.785
Prob(Omnibus):,0.0,Jarque-Bera (JB):,60.524
Skew:,-0.431,Prob(JB):,7.2e-14
Kurtosis:,6.371,Cond. No.,11.9


In [36]:
sa_T013.shape

(240,)

In [37]:
##### Using residual from efficiency to compute sufficiency instead of direct linear model
sa_T013 = spectral_acc[T[buildingIndex]].values
temp = {'I': np.ones(shape = int(numGM/2)),
        'lnEDP': np.log(sdrX),
        'lnSaT1': np.log(sa_T013[::2]),
        'M': gm_info['Magnitude'].values[::2]}
dummydf = pd.DataFrame(temp, index = None)
model = smf.ols('lnEDP ~ lnSaT1 + M', data = dummydf)
model_res = model.fit()
model_res.summary()

0,1,2,3
Dep. Variable:,lnEDP,R-squared:,0.734
Model:,OLS,Adj. R-squared:,0.729
Method:,Least Squares,F-statistic:,161.4
Date:,"Fri, 11 Mar 2022",Prob (F-statistic):,2.31e-34
Time:,18:18:35,Log-Likelihood:,-10.663
No. Observations:,120,AIC:,27.33
Df Residuals:,117,BIC:,35.69
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-6.0125,0.342,-17.560,0.000,-6.691,-5.334
lnSaT1,0.4562,0.027,16.788,0.000,0.402,0.510
M,0.1492,0.049,3.036,0.003,0.052,0.247

0,1,2,3
Omnibus:,15.653,Durbin-Watson:,1.744
Prob(Omnibus):,0.0,Jarque-Bera (JB):,61.756
Skew:,-0.02,Prob(JB):,3.89e-14
Kurtosis:,6.514,Cond. No.,99.1


In [None]:
# I = np.ones(shape = numGM)
# Y = np.log()
sa_T013 = spectral_acc[T[4]].values
temp = {'I': np.ones(shape = numGM),
        'lnEDP': np.log(sdrX),
        'lnSaT1': np.log(sa_T013),
        'M': gm_info['Magnitude'].values}
dummydf = pd.DataFrame(temp, index = None)
model = smf.ols('lnEDP ~ lnSaT1 + M', data = dummydf)
model_res = model.fit()
model_res.summary()

In [None]:
# I = np.ones(shape = numGM)
# Y = np.log()
sa_T013 = spectral_acc[T[4]].values
temp = {'I': np.ones(shape = numGM),
        'lnEDP': np.log(sdrX),
        'lnSaT1': np.log(sa_T013),
        'lnRjb': np.log(gm_info['Distance 1 (km)'].values)}
dummydf = pd.DataFrame(temp, index = None)
model = smf.ols('lnEDP ~ lnSaT1 + lnRjb', data = dummydf)
model_res = model.fit()
model_res.summary()

In [None]:
model_res.pvalues

In [None]:
sa_T013 = spectral_acc[T[4]].values
temp = {'I': np.ones(shape = numGM),
        'lnEDP': np.log(sdrX),
        'lnSaT1': np.log(sa_T013),
        'lnRjb': np.log(gm_info['Distance 1 (km)'].values),
        'M':gm_info['Magnitude'].values}
dummydf = pd.DataFrame(temp, index = None)
model = smf.ols('lnEDP ~ lnSaT1 + lnRjb + M', data = dummydf)
model_res = model.fit()
model_res.summary()

In [None]:
model_res.pvalues

In [None]:
sdrXx = sdr[3].values[:numGM][1::2]
len(sdrXx)

In [None]:
linear_fit = sm.OLS(np.log(sdrX), X)
fit_result = linear_fit.fit()
fit_result.summary()

In [None]:
np.std(fit_result.resid)

In [None]:
linear_fit = sm.OLS(np.log(sdrXx), Xx)
fit_result = linear_fit.fit()
fit_result.summary()

In [None]:
np.std(fit_result.resid)

In [None]:
temp = {'I': np.ones(shape = 240),
        'lnEDP': np.log(sdrX),
        'lnSaT1': np.log(sa_T013),
        'lnRjb': np.log(gm_info['Distance 1 (km)'].values),
        'M':gm_info['Magnitude'].values}
dummydf = pd.DataFrame(temp, index = None)
model = smf.ols('lnEDP ~ lnSaT1 + M', data = dummydf)
model_res = model.fit()
model_res.summary()

In [None]:
model_res.pvalues['lnRjb']

a = 'Yes' if model_res.pvalues['lnRjb'] >= 0.05 else 'No'

In [None]:
np.round(model_res.pvalues['lnRjb'], 2)



In [None]:
temp = {'I': np.ones(shape = 240),
        'lnEDP': np.log(sdrX),
        'lnSaT1': np.log(sa_T013),
        'lnRjb': np.log(gm_info['Distance 1 (km)'].values),
        'M':gm_info['Magnitude'].values}
dummydf = pd.DataFrame(temp, index = None)
model = smf.ols('lnEDP ~ lnSaT1 + lnRjb', data = dummydf)
model_res = model.fit()
model_res.summary()

In [None]:
sns.histplot(fit_result.resid)

In [None]:
mu, std = norm.fit(fit_result.resid)
mu, std

In [None]:
np.std(fit_result.resid)

In [None]:
np.std(fit_result.resid)

In [None]:
np.std(fit_result.resid)

In [None]:
ols_sdrX_saT1 = OLS(sdrX, sa_T013)

In [None]:
ols_sdrX_saT1.get_summary()

In [None]:
ols_sdrX_saT1.plot_model_fit()

In [None]:
ols_sdrX_saT1.plot_residuals_hist()