# Import library

In [None]:
import os, zipfile
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

from google.colab import files

# Read Datasets

In [None]:
# Download datasets
!wget --no-check-certificate \https://gitlab.com/iameko/earthquake-prediction/-/raw/main/dataset/earthquake.zip \-O /tmp/earthquake.zip

--2020-12-15 08:48:23--  https://gitlab.com/iameko/earthquake-prediction/-/raw/main/dataset/earthquake.zip
Resolving gitlab.com (gitlab.com)... 172.65.251.78, 2606:4700:90:0:f22e:fbec:5bed:a9b9
Connecting to gitlab.com (gitlab.com)|172.65.251.78|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 188975372 (180M) [application/zip]
Saving to: ‘/tmp/earthquake.zip’


2020-12-15 08:48:29 (35.0 MB/s) - ‘/tmp/earthquake.zip’ saved [188975372/188975372]



In [None]:
# Extract datasets
local_zip = '/tmp/earthquake.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp')
zip_ref.close()

In [None]:
# Set base directory
base_dir = '/tmp/dataset/1900-2019.csv'

In [None]:
# Read datasets
df = pd.read_csv(base_dir, index_col=0)
df.head()

Unnamed: 0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,net,id,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource
0,1900-02-16T22:10:00.000Z,60.0,-140.0,,,,,,,,ushis,ushis387,2018-06-04T20:43:44.000Z,Southeastern Alaska,earthquake,,,,,reviewed,ushis,ushis
1,1900-04-09T14:00:00.000Z,41.4,-81.9,,3.4,fa,,,,,ushis,ushis388,2018-06-04T20:43:44.000Z,"Cleveland urban area, Ohio",earthquake,,,,,reviewed,ushis,bar
2,1900-04-30T22:41:14.000Z,36.9,-121.6,,4.5,ml,,,,,ushis,ushis389,2018-06-04T20:43:44.000Z,Central California,earthquake,,,,,reviewed,ushis,cdmg
3,1900-07-29T06:59:00.000Z,-10.0,165.0,0.0,7.6,mw,,,,,iscgem,iscgem16957713,2020-07-09T21:50:21.653Z,Santa Cruz Islands,earthquake,,,,,automatic,iscgem,iscgem
4,1900-08-01T07:45:00.000Z,40.0,-112.1,,,,,,,,ushis,ushis390,2018-06-04T20:43:44.000Z,Utah,earthquake,,,,,reviewed,ushis,ushis


In [None]:
# Select dataset from column 'time'
# df = df.iloc[:, 1:]
len(df)

3801805

In [None]:
df.tail()

Unnamed: 0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,net,id,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource
3801800,2019-12-31T23:53:22.516Z,45.0723,-74.7236,18.07,1.6,ml,,106.0,0.397,0.3,us,us70006ulk,2020-03-14T22:22:47.040Z,"6km N of Cornwall, Canada",earthquake,1.9,7.1,0.115,10.0,reviewed,us,us
3801801,2019-12-31T23:53:33.540Z,17.9996,-66.9188,2.0,1.51,md,3.0,209.0,0.0466,0.15,pr,pr2019365201,2020-01-02T20:30:07.142Z,"1km NNW of Fuig, Puerto Rico",earthquake,1.71,10.84,0.29,2.0,reviewed,pr,pr
3801802,2019-12-31T23:54:01.780Z,17.9241,-66.8786,6.0,2.19,md,6.0,221.0,0.051,0.07,pr,pr2019365202,2020-01-02T20:29:47.798Z,"6km SSE of Guanica, Puerto Rico",earthquake,0.74,0.36,0.3,6.0,reviewed,pr,pr
3801803,2019-12-31T23:56:59.933Z,-20.5382,168.9571,21.46,4.3,mb,,147.0,1.276,0.5,us,us7000709t,2020-03-14T22:22:47.040Z,"114km SSW of Isangel, Vanuatu",earthquake,5.6,7.7,0.16,11.0,reviewed,us,us
3801804,2019-12-31T23:59:40.580Z,17.8571,-66.9298,4.0,1.9,md,4.0,293.0,0.1603,0.14,pr,pr2019365203,2020-01-03T01:31:23.482Z,"12km S of Guanica, Puerto Rico",earthquake,2.86,2.46,0.29,2.0,reviewed,pr,pr


# Data Preprocessing

In [None]:
# Select dataset
def select_datasets(df, year_from=1900, year_to=2019, mag=0, types='earthquake'):
  cond1 = df['year'] >= year_from
  cond2 = df['year'] <= year_to
  cond3 = df['mag'] >= mag
  cond4 = df['type'] == types
  return df[cond1 & cond2 & cond3 & cond4]

# Select columns
def select_columns(df, columns):
  return df[columns]

# Add column year and month for filtering or grouping
def add_column_year_month(df):
  df['year'] = pd.to_datetime(df['time']).dt.year
  df['month'] = pd.to_datetime(df['time']).dt.month
  return df

# Simple statistics
def stat_df(df):
  print('===== LENGTH =====')
  print(len(df))
  print('===== INFO =====')
  print(df.info())
  print('===== CHECK NaN =====')
  print(df.isna().sum())
  print('===== END =====')

In [None]:
# Copy to new variable
dataset = df.copy()

In [None]:
# Select time	latitude longitude mag type
dataset = select_columns(dataset, ['time', 'latitude', 'longitude', 'mag', 'type'])

# Add column year and month
dataset = add_column_year_month(dataset)

# Select dataset from 1966-2016
dataset = select_datasets(dataset, 1966, 2016, 4.5)

# Stats of dataset
stat_df(dataset)

# Check head
dataset.head()

===== LENGTH =====
222529
===== INFO =====
<class 'pandas.core.frame.DataFrame'>
Int64Index: 222529 entries, 27282 to 3297157
Data columns (total 7 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   time       222529 non-null  object 
 1   latitude   222529 non-null  float64
 2   longitude  222529 non-null  float64
 3   mag        222529 non-null  float64
 4   type       222529 non-null  object 
 5   year       222529 non-null  int64  
 6   month      222529 non-null  int64  
dtypes: float64(3), int64(2), object(2)
memory usage: 13.6+ MB
None
===== CHECK NaN =====
time         0
latitude     0
longitude    0
mag          0
type         0
year         0
month        0
dtype: int64
===== END =====


Unnamed: 0,time,latitude,longitude,mag,type,year,month
27282,1966-01-05T17:21:29.000Z,13.248,95.42,5.8,earthquake,1966,1
27300,1966-01-11T03:10:54.000Z,0.586,120.141,5.9,earthquake,1966,1
27302,1966-01-11T14:06:17.000Z,33.643,137.232,5.3,earthquake,1966,1
27303,1966-01-11T14:16:32.000Z,33.675,137.244,6.0,earthquake,1966,1
27312,1966-01-13T10:41:14.000Z,52.829,172.048,5.7,earthquake,1966,1


In [None]:
# Baseline data select from Mainland China

# Select area
def select_areas(df, lats, lons):
  cond1 = df['latitude'] >= lats[0]
  cond2 = df['latitude'] <= lats[1]
  cond3 = df['longitude'] >= lons[0]
  cond4 = df['longitude'] <= lons[1]
  df = df[cond1 & cond2]
  df = df[cond3 & cond4]
  return df

# Divide area into equal rectangles
def divide_areas(coordinates, n_rectangle):
    step = (coordinates[1]-coordinates[0]) / n_rectangle
    return [(coordinates[0] + (step*i), coordinates[0] + (step*(i+1))) for i in range(n_rectangle)]

# Classify the area into n_rectangle [1, 2, ..., N]
# Example, 9 rectangles
def classify_areas(row, lats, lons):
  box = 0
  for i in range(0, len(lats)):
    for j in range(0, len(lons)):
      lat = lats[i]
      lon = lons[j]
      cond1 = lat[0] <= row['latitude'] < lat[1]
      cond2 = lon[0] <= row['longitude'] < lon[1]
      box += 1
      if cond1 & cond2:
        return box

In [None]:
# List of lats and lons
lats = [23, 45]
lons = [75, 119]

# Select area
dataset_areas = select_areas(dataset, lats, lons)

# Stats of dataset
stat_df(dataset_areas)

# Check head
dataset_areas.head()

===== LENGTH =====
5603
===== INFO =====
<class 'pandas.core.frame.DataFrame'>
Int64Index: 5603 entries, 27374 to 3294371
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   time       5603 non-null   object 
 1   latitude   5603 non-null   float64
 2   longitude  5603 non-null   float64
 3   mag        5603 non-null   float64
 4   type       5603 non-null   object 
 5   year       5603 non-null   int64  
 6   month      5603 non-null   int64  
dtypes: float64(3), int64(2), object(2)
memory usage: 350.2+ KB
None
===== CHECK NaN =====
time         0
latitude     0
longitude    0
mag          0
type         0
year         0
month        0
dtype: int64
===== END =====


Unnamed: 0,time,latitude,longitude,mag,type,year,month
27374,1966-02-05T15:12:31.000Z,26.246,103.187,6.4,earthquake,1966,2
27397,1966-02-13T10:44:40.000Z,26.12,103.222,5.9,earthquake,1966,2
27460,1966-03-06T02:10:56.000Z,31.525,80.487,6.7,earthquake,1966,3
27461,1966-03-06T02:15:56.000Z,31.422,80.554,6.3,earthquake,1966,3
27465,1966-03-07T21:29:19.000Z,37.156,114.875,6.5,earthquake,1966,3


In [None]:
# Divide the area into three parts
divided_lats = divide_areas(lats, 3)
divided_lons = divide_areas(lons, 3)
print(divided_lats)
print(divided_lons)

[(23.0, 30.333333333333332), (30.333333333333332, 37.666666666666664), (37.666666666666664, 45.0)]
[(75.0, 89.66666666666667), (89.66666666666667, 104.33333333333333), (104.33333333333333, 119.0)]


In [None]:
# Classify the area into 1-9
dataset_areas['classify'] = dataset_areas.apply(lambda row: classify_areas(row, divided_lats, divided_lons), axis=1)

# Check head
dataset_areas.head()

Unnamed: 0,time,latitude,longitude,mag,type,year,month,classify
27374,1966-02-05T15:12:31.000Z,26.246,103.187,6.4,earthquake,1966,2,2
27397,1966-02-13T10:44:40.000Z,26.12,103.222,5.9,earthquake,1966,2,2
27460,1966-03-06T02:10:56.000Z,31.525,80.487,6.7,earthquake,1966,3,4
27461,1966-03-06T02:15:56.000Z,31.422,80.554,6.3,earthquake,1966,3,4
27465,1966-03-07T21:29:19.000Z,37.156,114.875,6.5,earthquake,1966,3,6


In [None]:
# If we check dataset above, we can see that 1966-01-01 is not found because there is no earthquake in those areas
# To make it back, we should call the time (year, month) then merge with dataset above

# Create df for time
def create_df_time(df):
  years, months = [], []
  times = list(df[['year', 'month']].set_index(['year', 'month']).index.unique())
  for idx in times:
    years.append(idx[0])
    months.append(idx[1])
  return pd.DataFrame({'year': years, 'month': months})

# Merge dataset_areas and dataset unique time
def merge_time_areas(df_area, df_time):
  df = pd.merge(df_time, df_area, on=['year', 'month'], how='left')
  df['classify'] = df['classify'].fillna(0)
  df['classify'] = df['classify'].apply(lambda row: int(row))
  return df

In [None]:
# Create dataset time
df_time = create_df_time(dataset)

# Merge two datasets
merged_areas = merge_time_areas(dataset_areas, df_time)

# Check head
merged_areas.head()

Unnamed: 0,year,month,time,latitude,longitude,mag,type,classify
0,1966,1,,,,,,0
1,1966,2,1966-02-05T15:12:31.000Z,26.246,103.187,6.4,earthquake,2
2,1966,2,1966-02-13T10:44:40.000Z,26.12,103.222,5.9,earthquake,2
3,1966,3,1966-03-06T02:10:56.000Z,31.525,80.487,6.7,earthquake,4
4,1966,3,1966-03-06T02:15:56.000Z,31.422,80.554,6.3,earthquake,4


In [None]:
# Pivot merged_areas and aggregate by classify column
merged_areas_agg = pd.pivot_table(merged_areas, columns='classify',index=['year', 'month'], aggfunc={'classify': 'count'}, fill_value=0).reset_index()
merged_areas_agg

Unnamed: 0_level_0,year,month,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify
classify,Unnamed: 1_level_1,Unnamed: 2_level_1,0,1,2,3,4,5,6,7,8,9
0,1966,1,1,0,0,0,0,0,0,0,0,0
1,1966,2,0,0,2,0,0,0,0,0,0,0
2,1966,3,0,0,0,0,2,0,4,0,0,2
3,1966,4,1,0,0,0,0,0,0,0,0,0
4,1966,5,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
607,2016,8,0,0,3,0,7,0,0,1,1,0
608,2016,9,0,1,8,0,0,0,0,0,1,1
609,2016,10,0,0,5,0,0,5,0,0,0,0
610,2016,11,0,1,2,0,0,1,0,1,2,0


In [None]:
# Check if there is no column
def not_in_columns(df, n_dims):
  result = []
  for i in range(0, n_dims):
    check = str(i + 1) not in df.columns.values.astype(str)
    if check:
      result.append(str(i + 1))
  return result

# Select only column [3: ] classified columns as baseline freq
def set_baseline_freq(df, n_dims):
  df = df.iloc[:, 3:]
  df.columns = df.columns.droplevel(0) # drop level only 1-N
  df = df.rename_axis(None, axis=1) # rename index name
  # Check columns that are not in df
  columns = not_in_columns(df, n_dims)
  if len(columns) > 0:
    for i in columns:
      df[i] = 0
  # Add prefix
  df = df.add_prefix('region_')
  # Sort columns
  sorted_columns = ['region_' + str((j + 1)) for j in range(0, n_dims)]
  df = df[sorted_columns]
  return df

# Change the cell values to multihot, 1=there is earthquake, 0=no earthquake
def multihot(x):
  return 1 if x > 0 else 0

# Baseline multihot
def set_baseline_multihot(df, n_dims): 
  for idx in range(0, n_dims):
    df.iloc[:, idx] = df.iloc[:, idx].apply(lambda x: multihot(x))
  return df

In [None]:
# Get baseline freq
baseline_freq = set_baseline_freq(merged_areas_agg, 9)
baseline_freq

Unnamed: 0,region_1,region_2,region_3,region_4,region_5,region_6,region_7,region_8,region_9
0,0,0,0,0,0,0,0,0,0
1,0,2,0,0,0,0,0,0,0
2,0,0,0,2,0,4,0,0,2
3,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...
607,0,3,0,7,0,0,1,1,0
608,1,8,0,0,0,0,0,1,1
609,0,5,0,0,5,0,0,0,0
610,1,2,0,0,1,0,1,2,0


In [None]:
# Get baseline multihot
baseline_multihot = set_baseline_multihot(set_baseline_freq(merged_areas_agg, 9), 9)
baseline_multihot

Unnamed: 0,region_1,region_2,region_3,region_4,region_5,region_6,region_7,region_8,region_9
0,0,0,0,0,0,0,0,0,0
1,0,1,0,0,0,0,0,0,0
2,0,0,0,1,0,1,0,0,1
3,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...
607,0,1,0,1,0,0,1,1,0
608,1,1,0,0,0,0,0,1,1
609,0,1,0,0,1,0,0,0,0
610,1,1,0,0,1,0,1,1,0


# Saving to files

In [None]:
# Save to csv
baseline_freq.to_csv('baseline_freq.csv')
baseline_multihot.to_csv('baseline_multihot.csv')

# Download csv files
files.download('baseline_freq.csv')
files.download('baseline_multihot.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Proposed Feature Engineering

The dataset will be divided into 3 parts, but range 1966-2016 and magnitude more than 0.

- part 1: 12 regions
- part 2: 24 regions
- part 3: 48 regions
- part 4: 72 regions
- part 5: 96 regions
- part 6: 120 regions

In [None]:
# Copy datasets
df_proposed = df.copy()

# Select time	latitude longitude mag type
df_proposed = select_columns(df_proposed, ['time', 'latitude', 'longitude', 'mag', 'type'])

# Add column year and month
df_proposed = add_column_year_month(df_proposed)

# Select df_proposed from 1966-2016 with magnitude >= 2.5
df_proposed = select_datasets(df_proposed, 1966, 2016, 6.0)
df_proposed = df_proposed[df_proposed['mag'] < 8.0]

# Stats of df_proposed
stat_df(df_proposed)

# Check head
df_proposed.head()

===== LENGTH =====
7099
===== INFO =====
<class 'pandas.core.frame.DataFrame'>
Int64Index: 7099 entries, 27303 to 3295924
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   time       7099 non-null   object 
 1   latitude   7099 non-null   float64
 2   longitude  7099 non-null   float64
 3   mag        7099 non-null   float64
 4   type       7099 non-null   object 
 5   year       7099 non-null   int64  
 6   month      7099 non-null   int64  
dtypes: float64(3), int64(2), object(2)
memory usage: 443.7+ KB
None
===== CHECK NaN =====
time         0
latitude     0
longitude    0
mag          0
type         0
year         0
month        0
dtype: int64
===== END =====


Unnamed: 0,time,latitude,longitude,mag,type,year,month
27303,1966-01-11T14:16:32.000Z,33.675,137.244,6.0,earthquake,1966,1
27341,1966-01-22T14:27:09.000Z,55.956,-153.942,6.2,earthquake,1966,1
27355,1966-01-28T05:42:17.000Z,-17.114,168.559,6.0,earthquake,1966,1
27371,1966-02-04T10:39:12.000Z,-15.899,167.914,6.4,earthquake,1966,2
27372,1966-02-05T02:01:45.000Z,39.093,21.777,6.2,earthquake,1966,2


In [None]:
# Check minimum and maximum lat and lon
def min_max_locations(df):
  return df['latitude'].min(), df['latitude'].max(), df['longitude'].min(), df['longitude'].max()

In [None]:
# Get minimum and maximum lat and lon
lat_min, lat_max, lon_min, lon_max = min_max_locations(df_proposed)
print(lat_min, lat_max, lon_min, lon_max)

-66.44800000000001 84.948 -179.997 179.998


In [None]:
# Actually, latitude area [-90. 90] and longitude area [-180, 180]
# So, the size would be 180x360 and it will be costly with too many rectangles
# We can divide it, for example by 15 degree
# So, 180/15 = 12 and 360/15 = 24 ~ 12x24=288 rectangles
# List of lats and lons
lats_proposed = [-90, 90]
lons_proposed = [-180, 180]

# Divide the area into 12 parts
divided_lats_proposed1 = divide_areas(lats_proposed, 3)
divided_lons_proposed1 = divide_areas(lons_proposed, 4)

# Divide the area into 24 parts
divided_lats_proposed2 = divide_areas(lats_proposed, 4)
divided_lons_proposed2 = divide_areas(lons_proposed, 6)

# Divide the area into 48 parts
divided_lats_proposed3 = divide_areas(lats_proposed, 6)
divided_lons_proposed3 = divide_areas(lons_proposed, 8)

# Divide the area into 72 parts
divided_lats_proposed4 = divide_areas(lats_proposed, 6)
divided_lons_proposed4 = divide_areas(lons_proposed, 12)

# Divide the area into 96 parts
divided_lats_proposed5 = divide_areas(lats_proposed, 8)
divided_lons_proposed5 = divide_areas(lons_proposed, 12)

# Divide the area into 120 parts
divided_lats_proposed6 = divide_areas(lats_proposed, 8)
divided_lons_proposed6 = divide_areas(lons_proposed, 15)

print(divided_lats_proposed1)
print(divided_lons_proposed1)


[(-90.0, -30.0), (-30.0, 30.0), (30.0, 90.0)]
[(-180.0, -90.0), (-90.0, 0.0), (0.0, 90.0), (90.0, 180.0)]


In [None]:
# Classify the area into 12 parts
df_dataset1 = df_proposed.copy()
df_dataset1['classify'] = df_dataset1.apply(lambda row: classify_areas(row, divided_lats_proposed1, divided_lons_proposed1), axis=1)
print('Finish 12 parts')
df_dataset2 = df_proposed.copy()
df_dataset2['classify'] = df_dataset2.apply(lambda row: classify_areas(row, divided_lats_proposed2, divided_lons_proposed2), axis=1)
print('Finish 24 parts')
df_dataset3 = df_proposed.copy()
df_dataset3['classify'] = df_dataset3.apply(lambda row: classify_areas(row, divided_lats_proposed3, divided_lons_proposed3), axis=1)
print('Finish 48 parts')
df_dataset4 = df_proposed.copy()
df_dataset4['classify'] = df_dataset4.apply(lambda row: classify_areas(row, divided_lats_proposed4, divided_lons_proposed4), axis=1)
print('Finish 72 parts')
df_dataset5 = df_proposed.copy()
df_dataset5['classify'] = df_dataset5.apply(lambda row: classify_areas(row, divided_lats_proposed5, divided_lons_proposed5), axis=1)
print('Finish 96 parts')
df_dataset6 = df_proposed.copy()
df_dataset6['classify'] = df_dataset6.apply(lambda row: classify_areas(row, divided_lats_proposed6, divided_lons_proposed6), axis=1)
print('Finish 120 parts')

# Check head
# df_proposed.head()

Finish 12 parts
Finish 24 parts
Finish 48 parts
Finish 72 parts
Finish 96 parts
Finish 120 parts


In [None]:
# Create df_proposed time
df_time_proposed = create_df_time(df_proposed)

# Merge two datasets
merged_areas_proposed1 = merge_time_areas(df_dataset1, df_time_proposed)
merged_areas_proposed2 = merge_time_areas(df_dataset2, df_time_proposed)
merged_areas_proposed3 = merge_time_areas(df_dataset3, df_time_proposed)
merged_areas_proposed4 = merge_time_areas(df_dataset4, df_time_proposed)
merged_areas_proposed5 = merge_time_areas(df_dataset5, df_time_proposed)
merged_areas_proposed6 = merge_time_areas(df_dataset6, df_time_proposed)

# Check head
merged_areas_proposed1.head()

Unnamed: 0,year,month,time,latitude,longitude,mag,type,classify
0,1966,1,1966-01-11T14:16:32.000Z,33.675,137.244,6.0,earthquake,12
1,1966,1,1966-01-22T14:27:09.000Z,55.956,-153.942,6.2,earthquake,9
2,1966,1,1966-01-28T05:42:17.000Z,-17.114,168.559,6.0,earthquake,8
3,1966,2,1966-02-04T10:39:12.000Z,-15.899,167.914,6.4,earthquake,8
4,1966,2,1966-02-05T02:01:45.000Z,39.093,21.777,6.2,earthquake,11


In [None]:
# Pivot merged_areas_proposed and aggregate by classify column
merged_areas_proposed_agg1 = pd.pivot_table(merged_areas_proposed1, columns='classify',index=['year', 'month'], aggfunc={'classify': 'count'}, fill_value=0).reset_index()
merged_areas_proposed_agg2 = pd.pivot_table(merged_areas_proposed2, columns='classify',index=['year', 'month'], aggfunc={'classify': 'count'}, fill_value=0).reset_index()
merged_areas_proposed_agg3 = pd.pivot_table(merged_areas_proposed3, columns='classify',index=['year', 'month'], aggfunc={'classify': 'count'}, fill_value=0).reset_index()
merged_areas_proposed_agg4 = pd.pivot_table(merged_areas_proposed4, columns='classify',index=['year', 'month'], aggfunc={'classify': 'count'}, fill_value=0).reset_index()
merged_areas_proposed_agg5 = pd.pivot_table(merged_areas_proposed5, columns='classify',index=['year', 'month'], aggfunc={'classify': 'count'}, fill_value=0).reset_index()
merged_areas_proposed_agg6 = pd.pivot_table(merged_areas_proposed6, columns='classify',index=['year', 'month'], aggfunc={'classify': 'count'}, fill_value=0).reset_index()
merged_areas_proposed_agg1

Unnamed: 0_level_0,year,month,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify
classify,Unnamed: 1_level_1,Unnamed: 2_level_1,1,2,3,4,5,6,7,8,9,10,11,12
0,1966,1,0,0,0,0,0,0,0,1,1,0,0,1
1,1966,2,0,2,1,0,1,0,1,7,0,0,2,0
2,1966,3,0,0,0,0,1,0,1,5,0,0,2,5
3,1966,4,0,2,0,1,0,0,0,3,1,0,1,2
4,1966,5,0,0,0,1,0,1,0,3,2,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2016,8,1,2,0,0,1,2,0,5,0,0,1,1
607,2016,9,0,0,0,3,2,2,0,3,0,0,0,3
608,2016,10,0,1,0,0,1,0,0,3,0,0,2,1
609,2016,11,0,3,0,5,0,1,0,1,0,0,1,2


In [None]:
merged_areas_proposed_agg2

Unnamed: 0_level_0,year,month,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify
classify,Unnamed: 1_level_1,Unnamed: 2_level_1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24
0,1966,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0
1,1966,2,0,0,2,0,0,0,1,0,0,0,1,4,0,0,0,1,3,2,0,0,0,0,0,0
2,1966,3,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,7,4,0,0,0,0,0,0
3,1966,4,0,0,0,0,1,0,0,2,0,0,0,2,0,0,0,1,0,2,1,0,0,0,0,1
4,1966,5,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,1,0,2,2,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2016,8,1,0,2,0,0,0,1,1,1,0,0,3,0,0,0,1,1,2,0,0,0,0,0,0
607,2016,9,0,0,0,0,0,1,2,1,0,0,0,4,0,1,0,0,0,3,0,0,0,0,0,1
608,2016,10,0,0,0,0,0,0,1,1,0,0,1,2,0,0,0,2,0,1,0,0,0,0,0,0
609,2016,11,0,0,0,0,0,0,0,3,0,0,0,6,0,1,0,0,1,2,0,0,0,0,0,0


In [None]:
merged_areas_proposed_agg3

Unnamed: 0_level_0,year,month,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify
classify,Unnamed: 1_level_1,Unnamed: 2_level_1,1,3,4,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,36,37,38,39,40,41,42,43,44,45,47,48
0,1966,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0
1,1966,2,0,0,0,0,0,0,0,2,0,1,0,0,1,0,0,0,0,0,1,3,0,0,0,0,0,1,2,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0
2,1966,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,3,1,0,0,0,0,2,5,0,0,0,0,0,0,0,0
3,1966,4,0,0,0,0,0,0,2,0,0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,2,0,0,0,0,0,0,0
4,1966,5,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,2,0,0,1,0,0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2016,8,0,0,0,0,0,1,0,2,0,0,0,0,1,0,1,1,0,0,1,2,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0
607,2016,9,0,0,0,0,0,0,0,0,0,0,0,3,2,0,1,0,0,0,0,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0
608,2016,10,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,2,0,1,0,0,0,0,0,0,0,0
609,2016,11,0,0,0,0,0,0,3,0,0,0,0,5,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0


In [None]:
merged_areas_proposed_agg4

Unnamed: 0_level_0,year,month,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify
classify,Unnamed: 1_level_1,Unnamed: 2_level_1,1,4,5,6,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33,34,35,36,37,39,40,41,42,43,44,45,46,47,49,50,51,53,54,55,56,57,58,59,60,61,62,64,66,67,70,71,72
0,1966,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
1,1966,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,3,0,0,0,0,0,0,0,1,1,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0
2,1966,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,4,0,0,0,0,0,0,0,2,5,0,0,0,0,0,0,0,0,0,0
3,1966,4,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0
4,1966,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2016,8,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,1,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0
607,2016,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,2,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0
608,2016,10,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,1,0,0,0,0,0,0,0,0,0
609,2016,11,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0


In [None]:
merged_areas_proposed_agg5

Unnamed: 0_level_0,year,month,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify
classify,Unnamed: 1_level_1,Unnamed: 2_level_1,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,30,31,32,33,34,35,36,37,38,39,40,42,43,44,45,46,47,48,49,51,52,53,54,55,56,57,58,59,62,63,64,65,66,67,68,69,70,71,72,73,74,76,77,78,79,81,82,83,84,88,90,91,94,95
0,1966,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1966,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1966,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,2,5,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,1966,4,0,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0
4,1966,5,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2016,8,0,1,0,0,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
607,2016,9,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,2,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
608,2016,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
609,2016,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
merged_areas_proposed_agg6

Unnamed: 0_level_0,year,month,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify,classify
classify,Unnamed: 1_level_1,Unnamed: 2_level_1,16,17,18,20,21,22,23,24,25,27,28,29,30,31,33,34,35,37,39,40,41,42,43,44,45,46,48,49,50,52,54,55,56,57,58,59,60,62,63,64,65,66,67,69,70,71,72,73,74,78,79,80,81,82,83,84,85,86,87,88,89,91,92,93,95,97,99,102,103,104,105,110,113,114,117,118,119
0,1966,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1966,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1966,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,7,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,1966,4,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0
4,1966,5,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2016,8,0,0,1,0,0,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
607,2016,9,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2,2,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
608,2016,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
609,2016,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
# Get baseline multihot
dataset_multihot1 = set_baseline_multihot(set_baseline_freq(merged_areas_proposed_agg1, 12), 12)
dataset_multihot1

Unnamed: 0,region_1,region_2,region_3,region_4,region_5,region_6,region_7,region_8,region_9,region_10,region_11,region_12
0,0,0,0,0,0,0,0,1,1,0,0,1
1,0,1,1,0,1,0,1,1,0,0,1,0
2,0,0,0,0,1,0,1,1,0,0,1,1
3,0,1,0,1,0,0,0,1,1,0,1,1
4,0,0,0,1,0,1,0,1,1,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...
606,0,1,0,0,1,1,0,1,0,0,1,1
607,0,0,0,1,1,1,0,1,0,0,0,1
608,0,1,0,0,1,0,0,1,0,0,1,1
609,0,1,0,1,0,1,0,1,0,0,1,1


In [None]:
# Get baseline multihot
dataset_multihot2 = set_baseline_multihot(set_baseline_freq(merged_areas_proposed_agg2, 24), 24)
dataset_multihot2

Unnamed: 0,region_1,region_2,region_3,region_4,region_5,region_6,region_7,region_8,region_9,region_10,region_11,region_12,region_13,region_14,region_15,region_16,region_17,region_18,region_19,region_20,region_21,region_22,region_23,region_24
0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0
1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0
2,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,0,0
3,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1
4,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,1,0,0,0,0,0,0
607,0,0,0,0,0,1,1,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1
608,0,0,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0
609,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0


In [None]:
# Get baseline multihot
dataset_multihot3 = set_baseline_multihot(set_baseline_freq(merged_areas_proposed_agg3, 48), 48)
dataset_multihot3

Unnamed: 0,region_1,region_2,region_3,region_4,region_5,region_6,region_7,region_8,region_9,region_10,region_11,region_12,region_13,region_14,region_15,region_16,region_17,region_18,region_19,region_20,region_21,region_22,region_23,region_24,region_25,region_26,region_27,region_28,region_29,region_30,region_31,region_32,region_33,region_34,region_35,region_36,region_37,region_38,region_39,region_40,region_41,region_42,region_43,region_44,region_45,region_46,region_47,region_48
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0
607,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
608,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0
609,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0


In [None]:
# Get baseline multihot
dataset_multihot4 = set_baseline_multihot(set_baseline_freq(merged_areas_proposed_agg4, 72), 72)
dataset_multihot4

Unnamed: 0,region_1,region_2,region_3,region_4,region_5,region_6,region_7,region_8,region_9,region_10,region_11,region_12,region_13,region_14,region_15,region_16,region_17,region_18,region_19,region_20,region_21,region_22,region_23,region_24,region_25,region_26,region_27,region_28,region_29,region_30,region_31,region_32,region_33,region_34,region_35,region_36,region_37,region_38,region_39,region_40,region_41,region_42,region_43,region_44,region_45,region_46,region_47,region_48,region_49,region_50,region_51,region_52,region_53,region_54,region_55,region_56,region_57,region_58,region_59,region_60,region_61,region_62,region_63,region_64,region_65,region_66,region_67,region_68,region_69,region_70,region_71,region_72
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
607,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0
608,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
609,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
# Get baseline multihot
dataset_multihot5 = set_baseline_multihot(set_baseline_freq(merged_areas_proposed_agg5, 96), 96)
dataset_multihot5

Unnamed: 0,region_1,region_2,region_3,region_4,region_5,region_6,region_7,region_8,region_9,region_10,region_11,region_12,region_13,region_14,region_15,region_16,region_17,region_18,region_19,region_20,region_21,region_22,region_23,region_24,region_25,region_26,region_27,region_28,region_29,region_30,region_31,region_32,region_33,region_34,region_35,region_36,region_37,region_38,region_39,region_40,...,region_57,region_58,region_59,region_60,region_61,region_62,region_63,region_64,region_65,region_66,region_67,region_68,region_69,region_70,region_71,region_72,region_73,region_74,region_75,region_76,region_77,region_78,region_79,region_80,region_81,region_82,region_83,region_84,region_85,region_86,region_87,region_88,region_89,region_90,region_91,region_92,region_93,region_94,region_95,region_96
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,...,0,0,1,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,...,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,...,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,...,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
607,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,...,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
608,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
609,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
# Get baseline multihot
dataset_multihot6 = set_baseline_multihot(set_baseline_freq(merged_areas_proposed_agg6, 120), 120)
dataset_multihot6

Unnamed: 0,region_1,region_2,region_3,region_4,region_5,region_6,region_7,region_8,region_9,region_10,region_11,region_12,region_13,region_14,region_15,region_16,region_17,region_18,region_19,region_20,region_21,region_22,region_23,region_24,region_25,region_26,region_27,region_28,region_29,region_30,region_31,region_32,region_33,region_34,region_35,region_36,region_37,region_38,region_39,region_40,...,region_81,region_82,region_83,region_84,region_85,region_86,region_87,region_88,region_89,region_90,region_91,region_92,region_93,region_94,region_95,region_96,region_97,region_98,region_99,region_100,region_101,region_102,region_103,region_104,region_105,region_106,region_107,region_108,region_109,region_110,region_111,region_112,region_113,region_114,region_115,region_116,region_117,region_118,region_119,region_120
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
607,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
608,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
609,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
dataset_freq1 = set_baseline_freq(merged_areas_proposed_agg1, 12)
dataset_freq2 = set_baseline_freq(merged_areas_proposed_agg2, 24)
dataset_freq3 = set_baseline_freq(merged_areas_proposed_agg3, 48)
dataset_freq4 = set_baseline_freq(merged_areas_proposed_agg4, 72)
dataset_freq5 = set_baseline_freq(merged_areas_proposed_agg5, 96)
dataset_freq6 = set_baseline_freq(merged_areas_proposed_agg6, 120)

In [None]:
# Save to csv
"""
dataset_multihot1.to_csv('dataset_multihot1.csv')
dataset_multihot2.to_csv('dataset_multihot2.csv')
dataset_multihot3.to_csv('dataset_multihot3.csv')
dataset_multihot4.to_csv('dataset_multihot4.csv')
dataset_multihot5.to_csv('dataset_multihot5.csv')
dataset_multihot6.to_csv('dataset_multihot6.csv')

# Download csv files
files.download('dataset_multihot1.csv')
files.download('dataset_multihot2.csv')
files.download('dataset_multihot3.csv')
files.download('dataset_multihot4.csv')
files.download('dataset_multihot5.csv')
files.download('dataset_multihot6.csv')

"""

"\ndataset_multihot1.to_csv('dataset_multihot1.csv')\ndataset_multihot2.to_csv('dataset_multihot2.csv')\ndataset_multihot3.to_csv('dataset_multihot3.csv')\ndataset_multihot4.to_csv('dataset_multihot4.csv')\ndataset_multihot5.to_csv('dataset_multihot5.csv')\ndataset_multihot6.to_csv('dataset_multihot6.csv')\n\n# Download csv files\nfiles.download('dataset_multihot1.csv')\nfiles.download('dataset_multihot2.csv')\nfiles.download('dataset_multihot3.csv')\nfiles.download('dataset_multihot4.csv')\nfiles.download('dataset_multihot5.csv')\nfiles.download('dataset_multihot6.csv')\n\n"

In [None]:
dataset_freq1.to_csv('dataset_freq1_6.0_8.0m.csv')
dataset_freq2.to_csv('dataset_freq2_6.0_8.0m.csv')
dataset_freq3.to_csv('dataset_freq3_6.0_8.0m.csv')
dataset_freq4.to_csv('dataset_freq4_6.0_8.0m.csv')
dataset_freq5.to_csv('dataset_freq5_6.0_8.0m.csv')
dataset_freq6.to_csv('dataset_freq6_6.0_8.0m.csv')

# Download csv files
files.download('dataset_freq1_6.0_8.0m.csv')
files.download('dataset_freq2_6.0_8.0m.csv')
files.download('dataset_freq3_6.0_8.0m.csv')
files.download('dataset_freq4_6.0_8.0m.csv')
files.download('dataset_freq5_6.0_8.0m.csv')
files.download('dataset_freq6_6.0_8.0m.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>