# Component 1: Tohoku Earthquake Dataset

## 1. Tohoku Earthquake Location Data
We used here the backend **matplotlib nbagg** instead of **matplotlib inline** because'inline' does not support some interactive functions we need later on. __[(1)](https://stackoverflow.com/questions/27704490/interactive-pixel-information-of-an-image-in-python)__ __[(2)](https://matplotlib.org/faq/usage_faq.html)__ 

In [1]:
%matplotlib nbagg

In [3]:
#import the libaries we need to use

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime

##  Import display 
from IPython.display import display


### ipywidget libraries
from ipywidgets import HBox, VBox, IntSlider, Play, jslink
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

### bqplot libraries
from bqplot import (
    Axis, ColorAxis, LinearScale, DateScale, DateColorScale, OrdinalScale, Mercator, Orthographic,
    OrdinalColorScale, ColorScale, Scatter, Lines, Figure, Tooltip, Map, ColorAxis, AlbersUSA, topo_load
)

In [4]:
#reads into the location.txt file, provides headers
locations=pd.read_table("data/location.txt",names=["longitude","latitude","default1","default2"],sep="\t")

By viewing the tables in the above step, we found out that the two columns 'default1' and 'default2' are irrelevant for our analysis, so we decided to cut them out.

In [5]:
#drop the irrelevant columns default1, defulat 2
locations.drop(["default1","default2"],inplace=True,axis=1)

The number 1000 to 1437 corresponds to the **station**, so we define it here:

In [6]:
locations["station"]=np.arange(1000,1438)

In [7]:
#resetting the index to 'station'
locations.set_index("station", inplace=True)

In [8]:
#A sanity check to see if our index worked, by locating index number 1
locations.loc[1001]

longitude   -98.102
latitude     26.938
Name: 1001, dtype: float64

## Location of Tohoku earthquake  

According to NASA's __[Earth Observertory website](https://earthobservatory.nasa.gov/IOTD/view.php?id=49621)__, the Tohoku earthquake struck Japan at "at 38.3 degrees North latitude and 142.4 degrees East longitude". Based on this information, we set the center location of Tohoku accordingly (Longitude, Latitude).

In [9]:
#center point of the tohoku earthquake
tohoku_location=(-142.4,38.3)

In [10]:
locations.index

Int64Index([1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009,
            ...
            1428, 1429, 1430, 1431, 1432, 1433, 1434, 1435, 1436, 1437],
           dtype='int64', name='station', length=438)

In [11]:
#calculate the distance from tohoku location to each station
from haversine import haversine
locations["distance"]=[haversine(locations.loc[i],tohoku_location) for i in locations.index]

In [12]:
locations.head()

Unnamed: 0_level_0,longitude,latitude,distance
station,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1000,-98.683,27.065,4882.202882
1001,-98.102,26.938,4945.643921
1002,-98.068,26.463,4951.01387
1003,-117.11,32.889,2836.018544
1004,-107.79,32.532,3862.182187


In [13]:
#sort the location by the distances from the center point
locations=locations.sort_values("distance")

In [14]:
locations.index

Int64Index([1211, 1193, 1228, 1244, 1194, 1288, 1257, 1165, 1272, 1151,
            ...
            1063, 1050, 1121, 1359, 1375, 1242, 1286, 1269, 1304, 1287],
           dtype='int64', name='station', length=438)

## 2. Tohoku Earthquake time and magnitude data

In [15]:
#read into the time & magnitude file
array_vals=pd.read_csv("data/data_tohoku_norm_transpose.csv",header=None)

As each row in the Tohoku dataset corresponds to the magnitude data for each second increment after the Earthquake happened, we create a date range to get time for 4 hours with the frequency 1 second each according to the 14401 seconds of recording earthquake data. 

In [16]:
v = pd.date_range("2:46PM", "6:46PM", freq="1s")
v -= v[0]
array_vals["time"] = v
array_vals.set_index("time", inplace=True)

array_vals.columns=np.arange(1000,1438)

### Data Normalization: We normalize the magnitude in range [0,1]

In [18]:
#normalize magnitude in range [0.1] 
min_val=array_vals.min().min()
max_val=array_vals.max().max()
norm_array_vals=(array_vals-min_val)/(max_val-min_val)

### With the use of new json file using the station id in 4 digit. 
norm_array_vals.columns = np.arange(1000,1438)

In [19]:
#reorder the columns in norm_array_vals and array_vals using the locations index 
#so that columns in norm_array_vals and array_vals will be sorted by the distance from the station to Tohoku EQ center 
norm_array_vals=norm_array_vals[locations.index]
array_vals=array_vals[locations.index]

In [20]:
#checking how the tables look like now 
norm_array_vals.head()
norm_array_vals.transpose().head()

time,0 days 00:00:00,0 days 00:00:01,0 days 00:00:02,0 days 00:00:03,0 days 00:00:04,0 days 00:00:05,0 days 00:00:06,0 days 00:00:07,0 days 00:00:08,0 days 00:00:09,...,0 days 03:59:51,0 days 03:59:52,0 days 03:59:53,0 days 03:59:54,0 days 03:59:55,0 days 03:59:56,0 days 03:59:57,0 days 03:59:58,0 days 03:59:59,0 days 04:00:00
1211,0.623412,0.623189,0.622979,0.622786,0.622615,0.622467,0.622345,0.622248,0.622175,0.622125,...,0.618684,0.618954,0.619258,0.61959,0.619947,0.620323,0.620714,0.621114,0.621514,0.621904
1193,0.624967,0.625078,0.625176,0.62526,0.62533,0.625384,0.625421,0.625443,0.62545,0.625445,...,0.621636,0.621831,0.62203,0.622227,0.62242,0.622607,0.622785,0.622952,0.623107,0.623248
1228,0.624088,0.623944,0.623804,0.623673,0.623554,0.623448,0.623355,0.623276,0.623212,0.623163,...,0.6206,0.620829,0.621076,0.621333,0.621592,0.621845,0.62209,0.622323,0.622544,0.62389
1244,0.623702,0.623781,0.623859,0.623933,0.624003,0.624066,0.624122,0.62417,0.62421,0.624241,...,0.616177,0.616465,0.616844,0.617301,0.617824,0.618397,0.619002,0.619622,0.620242,0.62389
1194,0.62413,0.624215,0.624296,0.624369,0.624433,0.624488,0.624534,0.62457,0.624597,0.624616,...,0.626709,0.626415,0.626131,0.625859,0.625601,0.625357,0.625127,0.624911,0.62471,0.624527


### Check both array_vals and norm_array_vals if any null values

In [19]:
#station number is in numerical order
### Check for the null values 
array_vals.isnull().any().any()
norm_array_vals.isnull().any().any()

True

### Replacing the NaN values of station 1063 with the average values of two adjaction stations.

In [34]:
# There are no data for station 1063 we fill its values with the mean values of stations 1049 and 1050
# 1049 and 1050 are selected because they are two adjaction stations by the order of distance from Tohoku Earthquake center.
avg=norm_array_vals.mean()
avg2=array_vals.mean()
norm_array_vals= norm_array_vals.fillna((avg[1049]+avg[1050] )/2)
array_vals=array_vals.fillna((avg2[1049]+avg2[1050] )/2)

In [30]:
array_vals.head()

Unnamed: 0_level_0,1211,1193,1228,1244,1194,1288,1257,1165,1272,1151,...,1063,1050,1121,1359,1375,1242,1286,1269,1304,1287
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00:00:00,-0.001273,0.002861,0.000524,-0.000501,0.000636,0.000211,-9e-05,0.000501,-0.000291,-0.000892,...,-1.816115e-07,-0.000249,-0.000137,7.7e-05,-0.000191,0.002128,4.9e-05,2.5e-05,-0.0004,6.3e-05
00:00:01,-0.001865,0.003157,0.000142,-0.000291,0.000863,0.000425,-0.000125,0.000159,-0.000324,-0.001083,...,-1.816115e-07,-0.000256,-8.8e-05,0.000217,0.000294,0.001515,9e-06,-0.000152,0.000151,-0.000126
00:00:02,-0.002425,0.003419,-0.000229,-8.4e-05,0.001077,0.000629,-0.000157,-0.000176,-0.000352,-0.001258,...,-1.816115e-07,-0.000261,-3.9e-05,0.000351,0.000764,0.000905,-2.9e-05,-0.000323,0.000685,-0.000309
00:00:03,-0.002937,0.003642,-0.000577,0.000114,0.001272,0.000818,-0.000186,-0.000493,-0.000377,-0.001414,...,-1.816115e-07,-0.000263,8e-06,0.000476,0.001206,0.000318,-6.7e-05,-0.000482,0.001187,-0.00048
00:00:04,-0.003392,0.003827,-0.000895,0.000298,0.001443,0.000985,-0.000212,-0.000787,-0.000397,-0.001546,...,-1.816115e-07,-0.000263,5.2e-05,0.000588,0.00161,-0.000232,-0.000103,-0.000627,0.001645,-0.000635


In [22]:
#Station numbers are in order by distance to the center location 
norm_array_vals.head()

Unnamed: 0_level_0,1211,1193,1228,1244,1194,1288,1257,1165,1272,1151,...,1063,1050,1121,1359,1375,1242,1286,1269,1304,1287
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00:00:00,0.623412,0.624967,0.624088,0.623702,0.62413,0.62397,0.623856,0.624079,0.623781,0.623555,...,0.62389,0.623797,0.623839,0.623919,0.623819,0.624691,0.623909,0.6239,0.62374,0.623914
00:00:01,0.623189,0.625078,0.623944,0.623781,0.624215,0.62405,0.623844,0.62395,0.623769,0.623483,...,0.62389,0.623794,0.623857,0.623972,0.624001,0.62446,0.623894,0.623833,0.623947,0.623843
00:00:02,0.622979,0.625176,0.623804,0.623859,0.624296,0.624127,0.623831,0.623824,0.623758,0.623417,...,0.62389,0.623792,0.623876,0.624023,0.624178,0.624231,0.623879,0.623769,0.624148,0.623774
00:00:03,0.622786,0.62526,0.623673,0.623933,0.624369,0.624198,0.62382,0.623705,0.623749,0.623359,...,0.62389,0.623791,0.623894,0.624069,0.624344,0.62401,0.623865,0.623709,0.624337,0.62371
00:00:04,0.622615,0.62533,0.623554,0.624003,0.624433,0.624261,0.623811,0.623595,0.623741,0.623309,...,0.62389,0.623791,0.62391,0.624111,0.624496,0.623803,0.623852,0.623655,0.624509,0.623652


In [23]:
norm_array_vals.transpose().head()

time,0 days 00:00:00,0 days 00:00:01,0 days 00:00:02,0 days 00:00:03,0 days 00:00:04,0 days 00:00:05,0 days 00:00:06,0 days 00:00:07,0 days 00:00:08,0 days 00:00:09,...,0 days 03:59:51,0 days 03:59:52,0 days 03:59:53,0 days 03:59:54,0 days 03:59:55,0 days 03:59:56,0 days 03:59:57,0 days 03:59:58,0 days 03:59:59,0 days 04:00:00
1211,0.623412,0.623189,0.622979,0.622786,0.622615,0.622467,0.622345,0.622248,0.622175,0.622125,...,0.618684,0.618954,0.619258,0.61959,0.619947,0.620323,0.620714,0.621114,0.621514,0.621904
1193,0.624967,0.625078,0.625176,0.62526,0.62533,0.625384,0.625421,0.625443,0.62545,0.625445,...,0.621636,0.621831,0.62203,0.622227,0.62242,0.622607,0.622785,0.622952,0.623107,0.623248
1228,0.624088,0.623944,0.623804,0.623673,0.623554,0.623448,0.623355,0.623276,0.623212,0.623163,...,0.6206,0.620829,0.621076,0.621333,0.621592,0.621845,0.62209,0.622323,0.622544,0.62389
1244,0.623702,0.623781,0.623859,0.623933,0.624003,0.624066,0.624122,0.62417,0.62421,0.624241,...,0.616177,0.616465,0.616844,0.617301,0.617824,0.618397,0.619002,0.619622,0.620242,0.62389
1194,0.62413,0.624215,0.624296,0.624369,0.624433,0.624488,0.624534,0.62457,0.624597,0.624616,...,0.626709,0.626415,0.626131,0.625859,0.625601,0.625357,0.625127,0.624911,0.62471,0.624527


# Create spectrogram using imshow

In matplotlib, **imshow()** function can be used to create the spectrogram with x axis of the time (in seconds) and 
y axis of the detector position (not the real detector name but in order of distance from the Tohoku earthquake from smallest to largest.)

We can use orginal array (before normalization) **array_vals** with vmin and vmax papameters set to 0 and 1 to normalize. 
The other way is we can use directly the normalization 2D array - **norm_array_vals** without specifying values for vmin and vmax.

In [85]:
# Define function make_spect() to create the spectrogram
def make_spect():
    fig, ax = plt.subplots(figsize=(6,4))
    plt.imshow(array_vals.transpose(), aspect = 'auto', cmap = 'viridis',vmin=0,vmax=1)
    plt.colorbar(label="Tohoku Earthquake Magnitude")
    plt.xlabel('Time (Seconds)')
    plt.ylabel('Detector')
    ax.set_xlim(0,len(array_vals)-1)
    ax.set_ylim(0,437)
    ann = ax.annotate("", xy=(0,0),xytext=(0,15),textcoords="offset points",
                        bbox=dict(boxstyle="square", fc="w"))
    ann.set_visible(False)

    def hover(event):
        if event.inaxes == ax:
            #base on the location of the mouse, set the place to put tooltip so that 
            #it can't be covered by other images or hidden on screen
            if event.xdata<(ax.get_xlim()[1]-ax.get_xlim()[0])/2:
                if event.ydata <(ax.get_ylim()[1]-ax.get_ylim()[0])/2:
                    ann.xy=(event.xdata+100,event.ydata)
                else:
                    ann.xy=(event.xdata+100,event.ydata-100)
            else:
                if event.ydata <(ax.get_ylim()[1]-ax.get_ylim()[0])/2:
                    ann.xy=(event.xdata-8000,event.ydata)
                else:
                    ann.xy=(event.xdata-8000,event.ydata-100)
            ### The use of new json for map needed 4 digit station id, 
            ### so replaced the station id from 4 digit to 3.
            
            #Content of the Tooltip
            ann.set_text("detector#=%s\ntime=%s\nmagnitude=%s" %(str(locations.index.values[int(event.ydata)]),
                                                                str(datetime.timedelta(seconds=int(event.xdata))),
                                                                array_vals[int(event.ydata)+1000][int(event.xdata)]))
            ann.set_visible(True)
            
        else:
            ann.set_visible(False)

    fig.canvas.mpl_connect('motion_notify_event', hover)

In [86]:
make_spect()

<IPython.core.display.Javascript object>

## Defining the call back function for the interactivity of the map and waveform.

In [59]:
## get the station id of station based on selection 

def get_station_id():
     
    if(len(states_map.selected )> 0):
        station_id = states_map.selected[0]
    else:
        station_id = initial_station
    return station_id
### Get the waveform for a station from the starting to 
### the selected interval 

def wave_form_detect(station, time):
    x = range(0, time)
    y = array_vals.iloc[:time][station]
    return x, y

### Update the wave whenever the station is changed. 

def upd_wave_det(self, target):
    #print(states_map.selected)
    #print(target['data'])
    new_x, new_y = wave_form_detect(get_station_id(), slider.value)
    wave.x = new_x
    wave.y = new_y


### Update the wave whenever the time  is changed.     
def upd_wave_time(change):
    #print(states_map.selected)
    #print(target['data'])
    new_x, new_y = wave_form_detect(get_station_id(), change['new'])
    wave.x = new_x
    wave.y = new_y
    
    
### Define the color in the linear scale of the stations based on the time. 

def get_col(time): 
    #temp = np.array(norm_array_vals.iloc[time].values.flatten())
    #c_map = np.log10(np.nan_to_num(temp))
    temp = norm_array_vals.iloc[time]
    c_map = np.log10(temp)
    return c_map

## update the detector colors whenever the time is changed. 
   
def upd_col_lat(change): 
    #scat_plot.color=get_col(slider.value)
    states_map.color=get_col(slider.value).to_dict()
    #rint(change.new)
    
def upd_wf_title_det(self, target):
    waveform.title = 'Waveform for detector: ' + str(get_station_id()-1000 ) +  ' for duration : 0 - ' + str(slider.value) +' s.'
    
    
def upd_wf_title_time(change):
    #print(change)
    
    waveform.title = 'Waveform for detector: ' + str(get_station_id()-1000) +  ' for duration : 0 - ' + str(slider.value) +' s.'

### Create a slider for selecting the time between 0 to 4hrs: 

In [60]:
time = pd.Series(range(0,array_vals.shape[0]))
#slider = interactive(get_time, interval=(time.min()+1, time.max()+1, 1))
slider =  widgets.IntSlider(min=time.min(), max=time.max(), value=1500, description='Time(s):')
slider.layout = {'min_width':'100%'}
display(slider)

In [87]:
### Creating Maps using bqplot

### projecttion for the USA states map. 
sc_geo = AlbersUSA()
sc_geo.scale_factor=1080


states_map = Map(map_data=topo_load('map_data/TransportableArrMap.json'),
                #map_data=topo_load('map_data/USStatesMap.json'),
                 scales={'projection': sc_geo,'color': ColorScale(scheme='PuRd')},
                 color = get_col(slider.value).to_dict(),
                 interactions = {'click': 'select'},
                 selected_style={'opacity':5, 'fill': 'Green', 'stroke': 'white'},
                 unselected_style={'opacity': 1.0},
                 hovered_styles={'hovered_fill':'Orange'})

## setting the hover highlight to false: 
states_map.hover_highlight=False
states_map.selected_styles = {'selected_fill': 'Red', 'selected_stroke': 'Orange', 'selected_stroke_width': 10.0}

def sel_one_state(self, target):
    if(len(states_map.selected) == 0):
           states_map.selected=[]          
    if (len(states_map.selected)>0):
        if(target['data']['id'] < 1000):
            states_map.selected=[]
        else:
            states_map.selected=[]
            states_map.selected=[target['data']['id']]    
        
### Callback function call for selecting one point 
states_map.on_element_click(sel_one_state)


### setup the axes for the color scale 
col_sc = ColorScale(scheme='PuRd', scale_type='linear', min = array_vals.min().min(),max =array_vals.max().max() )
ax_c = ColorAxis(scale=col_sc, label='Tohoku Earthquake Magnitude',side='left',tick_format='0.3f')

### 
detector_loc = Figure(marks=[states_map],axes=[ax_c], title='Location of earthquake detectors in USA.')
#map_fig = Figure(marks=[states_map],axes=[ax_c] ,title='US States Map Example')

detector_loc

In [88]:
###  SEt the scales for the waveform 

x = LinearScale()
y = LinearScale(min=-1.7, max=1.7)


### create a animation time variable so that the transformation is smooth. 

try:
    ani_time = int(slider.value/10)
except TypeError:
    ani_time = 500


### Create a line plot using the X and Y values. 

wave = Lines(scales={'x': x, 'y': y}, colors=['red'],
               enable_move=False)

ax_x = Axis(scale=x, tick_format='0.f', label = 'Time (seconds)')
ax_y = Axis(scale=y, tick_format='0.3f', label = 'Magnitude', orientation='vertical')

waveform = Figure(marks=[wave], axes=[ax_x, ax_y], 
                title='Waveform:',
                animation_duration=ani_time)

# Calculate the waveform for the station for default values...

initial_station = np.random.randint(1000,1000 + len(locations))
initial_timeinterval= np.random.randint(10, len(array_vals))

### CFill the plot for the first time 
wave.x, wave.y = wave_form_detect(initial_station, initial_timeinterval)
waveform.title = 'Waveform for detector: ' + str(initial_station - 1000) +  ' for duration : 0 - ' + str(slider.value) +' s.'


#waveform

In [89]:
### Function calls for the callbacks. 

## The below function updates the wave on 
## selection of the detector. 
states_map.on_element_click(upd_wave_det)

## For updating the title with time and station 
states_map.on_element_click(upd_wf_title_det)

## For updating the color of the station based on slider valus 
slider.observe(upd_col_lat, names='value')

## For updating the title of the wavefor  based on slider valus 
slider.observe(upd_wf_title_time, names='value')

## For updating the waveform based on slider valus 
slider.observe(upd_wave_time, names='value')

In [90]:
def display_canvas():
    ## Plot the spectogram 
    make_spect()

    ## display the sliders 
    #H1 = widgets.HBox([ipywidgets.HTML("00:00:00"), slider, ipywidgets.HTML("04:00:00")])
    states_map.selected=[]

    display(slider)

    wave.x, wave.y = wave_form_detect(initial_station, initial_timeinterval)

    waveform.title = 'Waveform for detector: ' + str(initial_station-1000) + ' for duration : 0 - ' + str(slider.value) +' s.'

In [91]:
## make waveform and detector plot side by side 
plots = widgets.HBox( children=[waveform,detector_loc ])
#plots

In [92]:
### For Part 2 creation of Movie. 

## Create a play button
## step up the min and max values with the step of 100s 
time_interval = 60

play_button = Play(min=1000, max=14400,step=100, interval=time_interval)
jslink((play_button, 'value'), (slider, 'value'))

play_button.interval

60

In [93]:
display_canvas()
VBox([HBox([play_button]), plots])

<IPython.core.display.Javascript object>