该脚本使用线性回归方法，筛选合适的走时数据

This script uses the linear regression method to select appropriate traveltime data

In [None]:
# load functions for data processing
%run ../utils/functions_for_data.ipynb

In [None]:
# 读取数据文件 read .dat file
fname = "output_data/alg1b_src_rec.dat"
[ev_info_obs, st_info_obs] = read_src_rec_file(fname)

# 数据分布画图 plot data distribution
fig_ev_st_distribution_dep(ev_info_obs, st_info_obs)


In [None]:
# 画出走时数据分布，走时相对于地震台站距离，以及去除异常值 traveltime visualization (traveltime with respect to distance) and discard outliers

# selection parameters
# retain data satisfying:     slope * dis + intercept + down < time < slope * dis + intercept + up
slope = 0.16
intercept = 0
up = 10
down = -10

# range of distance from source to receiver 距离范围
dis_min = 0 
dis_max = 500

ev_info_obs = fig_data_plot_remove_outliers(ev_info_obs,st_info_obs,slope,intercept,up,down,dis_min,dis_max)

In [None]:
# 画出不同震相的走时距离散点图，并做线性回归. plot distance-time scatter of given phases and do linear regression

# given phases and colors for plotting
phase_list = ["Pn","P","Pg","Pb"]
color_list = ["k","b","r","g"]

# range of distance from source to receiver 距离范围
dis_min = 0 
dis_max = 500

# 画图 plot
fig_data_plot_phase(ev_info_obs,st_info_obs,phase_list,color_list,dis_min,dis_max)

In [None]:
# 限制震中距 小于等于 100 km. discard data with distance > 100 km

epi_dis1 = 100      # threshold of epicentral distance = 100 km 震中距限制 100 km
epi_dis2 = 1000000
ev_info_obs = limit_epi_dis(ev_info_obs, st_info_obs, epi_dis1, epi_dis2)


# given phases and colors for plotting
phase_list = ["Pn","P","Pg","Pb"]
color_list = ["k","b","r","g"]

# range of distance from source to receiver 距离范围
dis_min = 0 
dis_max = 110

# 画图 plot
fig_data_plot_phase(ev_info_obs,st_info_obs,phase_list,color_list,dis_min,dis_max)

In [None]:
# 保留指定震相 (P, Pg, Pb). retain given phases (P, Pg, Pb)

phase_list = ["P","Pg","Pb"]
ev_info_obs = limit_data_phase(ev_info_obs,phase_list)

# given phases and colors for plotting
color_list = ["b","r","g"]

# range of distance from source to receiver 距离范围
dis_min = 0 
dis_max = 110

# 画图 plot
fig_data_plot_phase(ev_info_obs,st_info_obs,phase_list,color_list,dis_min,dis_max)

In [None]:
# 综合所有数据，做线性回归，保留残差在 3*SEE 之内的 数据. Do linear regression of all data and retain data with residual < 3*SEE

[dis_obs,time_obs] = data_dis_time(ev_info_obs,st_info_obs)
(slope,intercept,SEE) = linear_regression(dis_obs,time_obs)
up      =  3*SEE
down    = -3*SEE

# range of distance from source to receiver 距离范围
dis_min = 0 
dis_max = 110

ev_info_obs = fig_data_plot_remove_outliers(ev_info_obs,st_info_obs,slope,intercept,up,down,dis_min,dis_max)
[dis_obs,time_obs] = data_dis_time(ev_info_obs,st_info_obs)
(slope_2,intercept_2,SEE_2) = linear_regression(dis_obs,time_obs)

print("The (slope,intercept,SEE) of original data is (%6.3f,%6.3f,%6.3f)",slope,intercept,SEE)
print("The (slope,intercept,SEE) of filtered data is (%6.3f,%6.3f,%6.3f)",slope_2,intercept_2,SEE_2)

In [None]:
# output data 输出数据
out_path = "output_data"

# 保存用于TomoATT反演的数据文件 save data for TomoATT
out_fname = "%s/alg2_src_rec.dat"%(out_path)
write_src_rec_file(out_fname,ev_info_obs,st_info_obs)

# 保存用于画图的地震列表文件 save earthquake list for plotting
out_fname_ev = "%s/alg2_ev_list.dat"%(out_path)
write_src_list_file(out_fname_ev,ev_info_obs)

# 保存用于画图的台站列表文件 save station list for plotting
out_fname_st = "%s/alg2_st_list.dat"%(out_path)
write_rec_list_file(out_fname_st,ev_info_obs,st_info_obs)