In [33]:
# statistical libraries
import numpy as np
import pandas as pd
import seaborn as sns
import networkx as nx

# visualization libraries
import matplotlib.pyplot as plt
from matplotlib import cm
import plotly
import plotly.plotly as py
from plotly.graph_objs import Scatter, Layout

# import own library functionality
import paths
from TRMM import TRMM
from Visualization import Visualization

# force autoreload of external modules on save
%load_ext autoreload
%autoreload 2
%matplotlib inline
plotly.offline.init_notebook_mode(connected=True)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [34]:
YEARS = range(1998, 2017)
MONTHS = [3, 4, 5]
AGGREGATION_RESOLUTION = 6.125

In [35]:
df = TRMM.load_dataset(
    [1998, 1999],
    [3],
    aggregation_resolution=AGGREGATION_RESOLUTION,
    timestamp=True,
    invalidate=False,
    lon_slice=slice(61.125, 97.625),
    lat_slice=slice(4.125, 40.625),
    version='v3')

df

> Loading from cache...


Unnamed: 0_level_0,Unnamed: 1_level_0,888710400,888796800,888883200,888969600,889056000,889142400,889228800,889315200,889401600,889488000,...,922060800,922147200,922233600,922320000,922406400,922492800,922579200,922665600,922752000,922838400
latitude,longitude,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
7.0625,64.0625,17.519999,37.979999,0.0,3.12,13.77,40.71,2.43,126.989996,17.31,2.4,...,22.349999,0.15,2.61,8.46,0.18,132.329996,95.339997,15.03,0.06,0.09
7.0625,70.1875,62.069998,132.749995,60.899998,309.239991,59.849999,163.229994,0.09,32.579999,0.0,1.23,...,261.35999,290.189994,87.239997,4.68,0.15,13.859999,0.0,0.09,0.27,11.79
7.0625,76.3125,18.766308,406.731051,381.768621,305.470296,129.686964,96.375979,5.896414,14.139805,1.77,98.171943,...,728.081333,201.245418,429.425794,51.737041,16.852562,175.298025,1173.312872,765.352465,192.40758,125.021775
7.0625,82.4375,10.965764,1584.294771,1070.678084,540.545089,602.719107,965.044956,77.829585,327.843446,33.430342,3.875963,...,137.303224,36.495898,296.368789,104.852074,121.749747,131.307561,1237.759318,654.566401,478.459535,1715.077897
7.0625,88.5625,0.03,2192.699962,989.519978,349.979993,41.939999,6.96,6.9,10.44,31.41,29.519999,...,3.63,219.149995,533.579983,378.899989,427.019987,187.199994,745.079972,188.909994,1863.809947,3106.88993
7.0625,94.6875,40.127661,432.045043,765.144575,365.105595,221.634823,77.496575,65.745736,184.723788,238.366374,91.833446,...,211.101068,2215.821856,1663.424394,7538.982306,1090.911808,887.150378,3063.389917,5127.563519,1726.224865,3818.133584
13.1875,64.0625,53.459999,0.03,0.0,0.03,0.09,0.0,0.06,0.27,0.0,0.36,...,0.0,0.06,0.0,0.09,0.0,0.36,0.0,0.06,0.0,0.0
13.1875,70.1875,0.03,0.0,0.09,0.15,0.162,0.0,0.03,0.006,0.03,0.0,...,0.0,0.03,0.239932,0.097975,4.92,0.0,0.03,0.023986,0.06,0.0
13.1875,76.3125,6.779792,11.418573,6.86309,5.550914,4.350656,13.86487,0.745503,2.803146,3.115347,1.925135,...,27.127935,14.282321,210.949447,45.748709,20.262048,10.026435,1.794897,10.671747,1.362776,2.675273
13.1875,82.4375,0.0,0.0,0.21,43.475724,0.0,0.0,1.452584,1.00257,0.883523,0.06,...,0.0,12.51,28.32,8.007215,0.006,0.09,0.0,6.239481,0.0,0.09


In [84]:
def calculate_time_lag(i, j):
        """
        Calculate the time lag between two three-tuples

        :param i: Tuple like (l-1, l, l+1)
        :param j: Tuple like (m-1, m, m+1)

        :return: Time lag (number)
        """
        
        return 0.5 * min(i[2] - i[1], i[1] - i[0], j[2] - j[1], j[1] - j[0])

In [100]:
def calculate_synchronization(row1, row2):
        """
        Calculate the number of synchronous events between two rows

        :param row1: First row
        :param row2: Second row

        :return: The number of synchronous events (integer)
        """

        # initialize the total number of synchronous events for the two grid points
        num_sync_events = 0

        # iterate over all windows of size three in the first row
        for i_prev, i_current, i_next in TRMM.sliding_window(row1, window_size=3, padded=True):
            # calculate the max timestamp of row2 that could be synchronous
            earliest = i_current - 0.5 * min(i_current - i_prev, i_next - i_current)

            # print(row2.index)
            before_earliest = row2.index.get_loc(earliest, method='nearest') - 1
            after_current = row2.index.get_loc(i_next, method='nearest') + 1
            
            if before_earliest < 0:
                before_earliest = 0
            if after_current > len(row2.index) - 1:
                after_current = len(row2.index) - 1
                
            print(before_earliest, row2.iloc[before_earliest], after_current, row2.iloc[after_current])

            # pass through all events at the same or at a later time at the second location
            # for j_prev, j_current, j_next in TRMM.sliding_window(row2, window_size=3):
            for j_prev, j_current, j_next in TRMM.sliding_window(row2, window_size=3, padded=True):
                # the following optimizations reduce runtime from ~0.3s to 0.02s

                # calculate the difference
                current_diff = i_current - j_current
                # print('current diff', current_diff)

                # if the difference gets negative, break
                # the second pass will encompass these combinations
                # if current_diff < 0:
                    # break

                # if the events occur at the same time, they will be counted twice
                # thus only add half the value
                if current_diff == 0:
                    num_sync_events += 0.5
                    continue

                # continue for timestamps that cannot possibly be synchronous
                # i.e. are much too early
                # if j_current < earliest:
                    # continue

                # calculate the time lag based on the current state of the two sliding windows
                time_lag = TRMM.calculate_time_lag((i_prev, i_current, i_next), (j_prev, j_current, j_next))
                # print('time lag', i_current, j_current, time_lag)
                
                # if the second event lies within the time lag, it is fully synchronous
                if 0 < current_diff <= time_lag:
                    num_sync_events += 1.0
                    
        return num_sync_events

In [90]:
df.iloc[0].quantile(0.9)

1671.3299574583768

In [40]:
df.iloc[0][df.iloc[0] >= df.iloc[0].quantile(0.9)]

890179200    1677.089958
890265600    5501.519918
890438400    1723.919950
891043200    3226.709923
891129600    4252.499904
921715200    2439.539940
921801600    3157.889906
Name: (7.0625, 64.0625), dtype: float64

In [65]:
extreme_events = TRMM.extract_extreme_events(df, quantile=0.90, pad=False)
extreme_events

Unnamed: 0_level_0,Unnamed: 1_level_0,888710400,888796800,888883200,888969600,889056000,889142400,889228800,889315200,889401600,889488000,...,922060800,922147200,922233600,922320000,922406400,922492800,922579200,922665600,922752000,922838400
latitude,longitude,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
7.0625,64.0625,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7.0625,70.1875,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7.0625,76.3125,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False
7.0625,82.4375,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7.0625,88.5625,False,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,True
7.0625,94.6875,False,False,False,False,False,False,False,False,False,False,...,False,False,False,True,False,False,False,True,False,False
13.1875,64.0625,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
13.1875,70.1875,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
13.1875,76.3125,False,False,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
13.1875,82.4375,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [66]:
row_0 = extreme_events.iloc[0]
row_1 = extreme_events.iloc[1]
row_10 = extreme_events.iloc[10]
row_30 = extreme_events.iloc[30]

In [67]:
true_0 = row_0[row_0]
true_0

890179200    True
890265600    True
890438400    True
891043200    True
891129600    True
921715200    True
921801600    True
Name: (7.0625, 64.0625), dtype: bool

In [68]:
true_1 = row_1[row_1]
true_1

890006400    True
890092800    True
890179200    True
890438400    True
891129600    True
921110400    True
921628800    True
Name: (7.0625, 70.1875), dtype: bool

In [69]:
true_10 = row_10[row_10]
true_10

889747200    True
889833600    True
890265600    True
890352000    True
921456000    True
921542400    True
922406400    True
Name: (13.1875, 88.5625), dtype: bool

In [70]:
true_30 = row_30[row_30]
true_30

890179200    True
891216000    True
920419200    True
920592000    True
922406400    True
922579200    True
922665600    True
Name: (37.6875, 64.0625), dtype: bool

In [101]:
true_t1 = pd.Series([True, True, True, True, True, True], index=[0, 7, 9, 10, 12, 999999999])
true_t2 = pd.Series([True, True, True, True, True, True], index=[0, 1, 5, 7, 10, 999999999])

calculate_synchronization(true_t1, true_t1)

0 True 2 True
0 True 3 True
1 True 4 True
2 True 5 True
3 True 5 True
4 True 5 True


3.0

In [102]:
true_t1 = pd.Series([True, True, True, True, True, True], index=[0, 7, 9, 10, 12, 999999999])
true_t2 = pd.Series([True, True, True, True, True, True], index=[0, 1, 5, 7, 10, 999999999])

calculate_synchronization(true_t1, true_t1)

0 True 2 True
0 True 3 True
1 True 4 True
2 True 5 True
3 True 5 True
4 True 5 True


3.0

In [104]:
for row1, row2 in [(true_0, true_0), (true_0, true_1), (true_0, true_10), (true_0, true_30), (true_t1, true_t2)]:
    sync1 = calculate_synchronization(row1, row2)
    sync2 = calculate_synchronization(row2, row1)    
    sync, count = TRMM.calculate_sync_strength(row1, row2)
    print(float(sync1), float(sync2), sync, float(count))

0 True 2 True
0 True 3 True
1 True 4 True
2 True 5 True
3 True 6 True
4 True 6 True
5 True 6 True
0 True 2 True
0 True 3 True
1 True 4 True
2 True 5 True
3 True 6 True
4 True 6 True
5 True 6 True
3.5 3.5 1.0 5.0
1 True 3 True
1 True 4 True
2 True 5 True
3 True 5 True
3 True 6 True
5 True 6 True
5 True 6 True
0 True 1 True
0 True 1 True
0 True 3 True
0 True 5 True
2 True 6 True
4 True 6 True
4 True 6 True
1.5 1.5 0.4 2.0
1 True 3 True
1 True 4 True
2 True 4 True
2 True 4 True
2 True 6 True
4 True 6 True
4 True 6 True
0 True 1 True
0 True 2 True
0 True 3 True
0 True 6 True
4 True 6 True
4 True 6 True
5 True 6 True
0.5 0.5 0.2 1.0
0 True 1 True
0 True 1 True
0 True 2 True
0 True 2 True
0 True 5 True
3 True 5 True
3 True 6 True
0 True 5 True
1 True 6 True
4 True 6 True
4 True 6 True
5 True 6 True
5 True 6 True
5 True 6 True
0.5 0.5 0.0 0.0
0 True 4 True
2 True 5 True
3 True 5 True
3 True 5 True
3 True 5 True
4 True 5 True
0 True 1 True
0 True 2 True
0 True 2 True
0 True 4 True
1 True 5 Tru

In [123]:
for row1, row2 in [(true_0, true_0), (true_0, true_1), (true_0, true_10), (true_0, true_30), (true_t1, true_t2)]:
    sync1 = calculate_synchronization(row1, row2)
    sync2 = calculate_synchronization(row2, row1)    
    sync, count = TRMM.calculate_sync_strength(row1, row2)
    print(float(sync1), float(sync2), sync, float(count))

3.5 3.5 1.0 7.0
1.5 1.5 0.42857142857142855 3.0
0.5 0.5 0.14285714285714285 1.0
0.5 0.5 0.14285714285714285 1.0
1.0 1.0 0.5 2.0


In [16]:
extreme_events.shape[0] * extreme_events.shape[1]

2232

In [12]:
extreme_events.sum().sum()

252

In [63]:
TRMM.calculate_synchronization(first_true, second_true)

1.0

In [64]:
TRMM.calculate_synchronization(second_true, first_true)

1.0

In [58]:
TRMM.calculate_synchronization(extreme_events.iloc[0], extreme_events.iloc[30])

30.0

In [41]:
sync_strength, sync_count = TRMM.calculate_sync_strength(extreme_events.iloc[0], extreme_events.iloc[1])

print(sync_strength, sync_count)

0.4 2.0


0.4