In [100]:
import pandas as pd
import os
from math import radians, sin, cos, sqrt, atan2
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import accuracy_score
import pandas as pd
import datetime as dt
import numpy as np
from tqdm import tqdm

In [101]:
def subsample_of_num_stations(df, stations_glon_glat=None):
    if stations_glon_glat == None:
        return df

    condition = False

    for glon, glat in stations_glon_glat:
        condition = condition | (df['glon'] == glon) & (df['glat'] == glat)

    return df[condition]

In [102]:
BASE_PATH = "/Users/longnguyen/Documents/LongBm/UW_Engine_Capstone/"

def create_official_dataset(files):
    result = pd.DataFrame()

    total_entries = 0

    for file in tqdm(files):
        dataset = pd.read_csv(BASE_PATH + file)
        total_entries += len(dataset)
        result = pd.concat([result, dataset], ignore_index=True)

    assert(total_entries == len(result))

    print('Num entries:', total_entries)

    return result

In [103]:
def generate_file_names(start, end):
    return [f'dataset_{i}.csv' for i in range(start, end + 1)]

In [104]:
train_files = generate_file_names(2010, 2019)
test_files = generate_file_names(2020, 2023)

In [124]:
train_files + test_files

['dataset_2010.csv',
 'dataset_2011.csv',
 'dataset_2012.csv',
 'dataset_2013.csv',
 'dataset_2014.csv',
 'dataset_2015.csv',
 'dataset_2016.csv',
 'dataset_2017.csv',
 'dataset_2018.csv',
 'dataset_2019.csv',
 'dataset_2020.csv',
 'dataset_2021.csv',
 'dataset_2022.csv',
 'dataset_2023.csv']

In [127]:
stations_distinct_locations = pd.DataFrame()

for file in (train_files + test_files):
    if file != "stations_locations.csv":
        print(file)
        df = pd.read_csv(file)
        df = df[['glat', 'glon']].drop_duplicates()
        stations_distinct_locations = pd.concat([stations_distinct_locations, df], ignore_index=True)
        stations_distinct_locations = stations_distinct_locations[['glat', 'glon']].drop_duplicates()

dataset_2010.csv
dataset_2011.csv
dataset_2012.csv
dataset_2013.csv
dataset_2014.csv
dataset_2015.csv
dataset_2016.csv
dataset_2017.csv
dataset_2018.csv
dataset_2019.csv
dataset_2020.csv
dataset_2021.csv
dataset_2022.csv
dataset_2023.csv


In [129]:
stations_distinct_locations.to_csv("stations_locations.csv")

In [7]:
dir_list = os.listdir(BASE_PATH + "GICFolders")

In [10]:
gic_datas = [el for el in dir_list if ".csv" in el]

In [105]:
gic_datas

['20150317.csv',
 '20151220.csv',
 '20150919.csv',
 '20170927.csv',
 '20210512.csv',
 '20131002.csv',
 '20220409.csv',
 '20170907.csv',
 '20150622.csv',
 '20230423.csv',
 '20180825.csv',
 '20170527.csv',
 '20230226.csv',
 '20230323.csv',
 '20151006.csv',
 '20150911.csv',
 '20211103.csv']

In [99]:
len(gic_datas)

17

In [17]:
samples = pd.read_csv('GICFolders/20150317.csv')
samples

Unnamed: 0,SampleDateTime,GICMeasured,Lat,Long
0,2015-03-17 03:00:00,1.444000,41.7,87.5
1,2015-03-17 03:05:00,1.457000,41.7,87.5
2,2015-03-17 03:10:00,-0.468333,41.7,87.5
3,2015-03-17 03:15:00,1.046333,41.7,87.5
4,2015-03-17 03:20:00,0.244000,41.7,87.5
...,...,...,...,...
36535,2015-03-18 05:40:00,3.823867,34.1,99.2
36536,2015-03-18 05:45:00,3.865600,34.1,99.2
36537,2015-03-18 05:50:00,3.818667,34.1,99.2
36538,2015-03-18 05:55:00,3.835467,34.1,99.2


In [30]:
import geopy.distance

def distance(lat1, lon1, lat2, lon2):
    """
    Calculate the distance between two points on Earth given their latitude and longitude
    using the Haversine formula.
    """
    # Convert latitude and longitude from degrees to radians
    return geopy.distance.geodesic((lat1, lon1), (lat2, lon2)).miles

# def records_within_radius(df, center_lat, center_lon, radius):
#     """
#     Return all records within the specified radius from a center point defined by latitude and longitude.
#     """
#     records_within_radius = []
#     for index, row in df.iterrows():
#         record_lat, record_lon = row['Lat'], row['Long']
#         if distance(center_lat, center_lon, record_lat, record_lon) <= radius:
#             records_within_radius.append(row)
#     return pd.DataFrame(records_within_radius)

In [130]:
devices_locations = pd.read_csv("devices_locations.csv", index_col=[0])
stations_locations = pd.read_csv("stations_locations.csv", index_col=[0])

In [145]:
devices_locations.Long = -devices_locations['Long'] + 360
devices_locations

Unnamed: 0,Lat,Long
0,41.7,272.5
1,41.8,272.0
2,40.2,284.7
3,42.8,277.5
4,46.6,272.6
...,...,...
288,27.9,262.6
289,32.8,265.5
290,42.5,272.1
291,41.6,273.7


In [142]:
stations_locations

Unnamed: 0,glat,glon
0,36.850,2.930
1,3.870,11.520
2,14.280,39.460
3,14.280,39.460
4,5.330,355.870
...,...,...
577,54.929,264.713
578,-77.850,166.670
579,47.500,242.300
580,47.500,242.300


In [143]:
def records_within_radius(stations, devices, radius):
    """
    Return all records within the specified radius from a center point defined by latitude and longitude.
    """
    stations2devices = {}
    for index, station in stations.iterrows():
        station_record_lat, station_record_lon = station['glat'], station['glon']
        temp = []
        for index, device in devices.iterrows():
            device_record_lat, device_record_lon = device['Lat'], device['Long']
            if distance(station_record_lat, station_record_lon, device_record_lat, device_record_lon) <= radius:
                temp.append((device_record_lat, device_record_lon))

        stations2devices[(station_record_lat, station_record_lon)] = temp

    return stations2devices

In [146]:
stations2devices = records_within_radius(stations_locations, devices_locations, 650)

In [135]:
list(stations2devices.keys())

[(36.85, 2.93),
 (3.87, 11.52),
 (14.279999999999998, 39.46),
 (14.28, 39.46),
 (5.33, 355.87),
 (5.33, 355.87000000000006),
 (7.0, 125.4),
 (7.0, 125.40000000000002),
 (43.25, 76.92),
 (9.03, 38.77),
 (18.62, 72.87),
 (68.35, 18.82),
 (-65.245, 295.742),
 (82.5, 297.5),
 (28.170000000000005, 129.33),
 (28.17, 129.33),
 (69.6, 61.21),
 (65.6, 322.37),
 (-37.8, 77.57),
 (-11.77, 282.85),
 (69.3, 16.03),
 (-13.8, 188.22),
 (-13.800000000000002, 188.22),
 (42.383, 13.317000000000002),
 (42.383, 13.317),
 (43.46, 142.17),
 (-7.949, 345.624),
 (-7.949000000000001, 345.624),
 (-23.77, 133.88),
 (-67.57, 291.88),
 (-70.7, 44.28),
 (-74.01, 42.99),
 (-77.32, 39.71),
 (-77.51, 336.58),
 (-79.08, 335.88),
 (-80.0, 77.0),
 (-80.89, 337.74),
 (-82.78, 347.06),
 (-84.35, 336.14),
 (-85.36, 2.06),
 (-85.36, 95.98),
 (-87.0, 28.41),
 (-87.0, 28.410000000000004),
 (-86.51, 68.172),
 (-88.03, 316.13),
 (-88.03000000000002, 316.13),
 (-81.0, 3.0),
 (49.07, 14.02),
 (51.83, 20.8),
 (66.9, 208.45),
 (55.6

In [147]:
stations2devices[(38.5, 263.7)]

[(41.7, 272.5),
 (41.8, 272.0),
 (44.0, 271.4),
 (44.3, 272.5),
 (44.8, 271.8),
 (44.4, 271.3),
 (43.7, 272.3),
 (43.1, 272.0),
 (43.3, 270.6),
 (42.3, 272.1),
 (42.0, 272.1),
 (40.8, 271.4),
 (45.1, 266.2),
 (37.2, 273.1),
 (36.5, 272.8),
 (34.9, 270.0),
 (35.3, 270.2),
 (36.2, 271.2),
 (34.8, 274.3),
 (30.5, 262.5),
 (32.6, 262.8),
 (42.6, 263.7),
 (39.4, 272.6),
 (38.9, 275.4),
 (29.8, 264.8),
 (29.4, 265.1),
 (31.4, 264.6),
 (34.1, 260.8),
 (41.5, 272.4),
 (34.1, 273.3),
 (35.8, 273.5),
 (46.8, 267.7),
 (42.1, 272.2),
 (41.2, 272.1),
 (41.3, 269.5),
 (38.3, 265.4),
 (41.1, 264.3),
 (41.8, 272.2),
 (42.2, 271.1),
 (41.8, 271.8),
 (41.6, 272.1),
 (42.1, 272.1),
 (39.5, 262.5),
 (43.6, 267.3),
 (45.0, 267.2),
 (44.8, 268.5),
 (45.0, 266.5),
 (45.5, 265.7),
 (33.7, 273.4),
 (45.1, 266.5),
 (44.4, 263.5),
 (33.3, 272.6),
 (42.0, 271.8),
 (43.4, 272.0),
 (44.4, 271.6),
 (43.7, 264.3),
 (44.8, 266.4),
 (45.4, 266.1),
 (45.9, 268.5),
 (42.0, 273.4),
 (41.7, 273.5),
 (41.0, 274.7),
 (39.2, 

In [138]:
# samples.Long = 360 - samples.Long
samples

Unnamed: 0,SampleDateTime,GICMeasured,Lat,Long
0,2015-03-17 03:00:00,1.444000,41.7,272.5
1,2015-03-17 03:05:00,1.457000,41.7,272.5
2,2015-03-17 03:10:00,-0.468333,41.7,272.5
3,2015-03-17 03:15:00,1.046333,41.7,272.5
4,2015-03-17 03:20:00,0.244000,41.7,272.5
...,...,...,...,...
36535,2015-03-18 05:40:00,3.823867,34.1,260.8
36536,2015-03-18 05:45:00,3.865600,34.1,260.8
36537,2015-03-18 05:50:00,3.818667,34.1,260.8
36538,2015-03-18 05:55:00,3.835467,34.1,260.8


In [148]:
condition = False

for device in stations2devices[(38.5, 263.7)]:
    device_lat, device_lon = device
    condition = condition | ((samples['Lat'] == device_lat) & (samples['Long'] == device_lon))

In [149]:
condition

0        True
1        True
2        True
3        True
4        True
         ... 
36535    True
36536    True
36537    True
36538    True
36539    True
Length: 36540, dtype: bool

In [150]:
df_within = samples[condition]

In [151]:
# Written by Sean Jung
import numpy as np
from matplotlib import pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

# print(distinct_locations)
# Define the radius of the circle in miles (adjust as needed)
temp_lats = [38.5]
temp_lons = [263.7]
# df.Long = -df.Long
# Plotting using Plotly
fig = px.scatter_geo(df_within, lat='Lat', lon='Long')
# fig.add_trace()
fig.add_scattergeo(lat=temp_lats, lon=temp_lons, mode='markers', name='Stations')
fig.update_layout(mapbox={"style": "carto-positron"})

# for station in stations_locations.iterrows():
#     fig.add_trace(
#         go.Scattergeo(
#             lon=[station[1]['glon']],
#             lat=[station[1]['glat']],
#             mode='markers',
#             marker=dict(size=15, color='red', opacity=0.1),
#             showlegend=False,
#         )
#     )
fig.show()

In [152]:
max_datetime = samples['SampleDateTime'].max()
min_datetime = samples['SampleDateTime'].min()
year = max_datetime[0:4]

print(min_datetime)
print(max_datetime)
print(year)

2015-03-17 03:00:00
2015-03-18 06:00:00
2015


In [153]:
mag_perb_dataset = pd.read_csv(f'dataset_{year}.csv')
mag_perb_dataset
mag_perb_dataset[(min_datetime <= mag_perb_dataset['datetime']) &  (mag_perb_dataset['datetime'] <= max_datetime)]

Unnamed: 0,datetime,glon,glat,mlon,mlat,mlt,dbn_nez,dbe_nez,BX_GSE,BY_GSM,BZ_GSM,flow_speed,proton_density,T,Pressure,SYM_H,CLOCK_ANGLE_GSM
21636,2015-03-17 03:00:00,2.930,36.85,77.884224,27.490238,3.241954,5.044789,12.178289,-5.83,-1.50,6.16,415.9,16.23,34695.0,5.61,13,345.38
21637,2015-03-17 03:05:00,2.930,36.85,77.884224,27.490238,3.327220,4.454782,11.908767,-6.41,-0.75,5.54,418.3,16.55,37155.0,5.79,13,352.24
21638,2015-03-17 03:10:00,2.930,36.85,77.884224,27.490238,3.412268,4.329567,10.443799,-5.67,-1.45,6.22,416.7,17.07,34732.0,5.93,12,346.98
21639,2015-03-17 03:15:00,2.930,36.85,77.884224,27.490238,3.497073,3.937102,9.872845,-4.75,-1.24,6.64,410.9,16.61,33844.0,5.61,11,349.70
21640,2015-03-17 03:20:00,2.930,36.85,77.884224,27.490238,3.581615,4.188336,9.495521,-4.09,-1.46,7.24,407.8,15.75,32376.0,5.24,11,348.26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26818121,2015-03-18 05:40:00,245.518,62.48,-56.854180,69.424225,20.933499,-61.469942,-154.732654,5.54,-10.94,-2.03,569.1,3.84,233312.0,2.49,-104,262.70
26818122,2015-03-18 05:45:00,245.518,62.48,-56.854180,69.424225,21.017922,-99.267113,-125.631277,5.75,-11.28,0.26,573.8,2.92,155148.0,1.92,-107,266.58
26818123,2015-03-18 05:50:00,245.518,62.48,-56.854180,69.424225,21.102361,-105.466078,-102.581549,7.95,-9.46,2.22,539.4,2.90,171723.0,1.68,-109,281.68
26818124,2015-03-18 05:55:00,245.518,62.48,-56.854180,69.424225,21.186793,-72.762909,-88.610469,8.55,-8.42,-0.19,543.9,3.08,133553.0,1.83,-112,274.62


In [154]:
stations_lists = mag_perb_dataset[['glon', 'glat']].drop_duplicates()
stations_lists

Unnamed: 0,glon,glat
0,2.930,36.850
105120,11.520,3.870
210240,355.870,5.330
210241,355.870,5.330
266718,125.400,7.000
...,...,...
26455926,290.813,-12.586
26553243,283.124,-11.952
26651037,129.720,60.020
26754404,138.500,9.300


In [155]:
# station_rows = None
# device_rows = None
result = pd.DataFrame()
for idx, stat in stations_lists.iterrows():
    devices = stations2devices[(stat['glat'], stat['glon'])]
    condition = False

    if len(devices) != 0:
        for device in devices:
            device_lat, device_lon = device
            condition = condition | ((samples['Lat'] == device_lat) & (samples['Long'] == device_lon))

        station_rows = mag_perb_dataset[(mag_perb_dataset['glat'] == stat['glat']) & (mag_perb_dataset['glon'] == stat['glon'])]
        device_rows = samples[condition]

        output = pd.merge(station_rows ,device_rows, left_on='datetime',right_on='SampleDateTime')

        result = pd.concat([result, output], ignore_index=True)

        # break



In [156]:
result

Unnamed: 0,datetime,glon,glat,mlon,mlat,mlt,dbn_nez,dbe_nez,BX_GSE,BY_GSM,...,flow_speed,proton_density,T,Pressure,SYM_H,CLOCK_ANGLE_GSM,SampleDateTime,GICMeasured,Lat,Long
0,2015-03-17 03:00:00,254.76,40.14,-38.690166,48.520866,19.470329,8.752354,2.677541,-5.83,-1.5,...,415.9,16.23,34695.0,5.61,13,345.38,2015-03-17 03:00:00,-0.088000,33.4,247.5
1,2015-03-17 03:00:00,254.76,40.14,-38.690166,48.520866,19.470329,8.752354,2.677541,-5.83,-1.5,...,415.9,16.23,34695.0,5.61,13,345.38,2015-03-17 03:00:00,-0.179000,47.4,258.8
2,2015-03-17 03:00:00,254.76,40.14,-38.690166,48.520866,19.470329,8.752354,2.677541,-5.83,-1.5,...,415.9,16.23,34695.0,5.61,13,345.38,2015-03-17 03:00:00,-0.610667,47.4,258.8
3,2015-03-17 03:00:00,254.76,40.14,-38.690166,48.520866,19.470329,8.752354,2.677541,-5.83,-1.5,...,415.9,16.23,34695.0,5.61,13,345.38,2015-03-17 03:00:00,-0.183000,47.4,258.8
4,2015-03-17 03:00:00,254.76,40.14,-38.690166,48.520866,19.470329,8.752354,2.677541,-5.83,-1.5,...,415.9,16.23,34695.0,5.61,13,345.38,2015-03-17 03:00:00,0.778333,47.4,258.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
236690,2015-03-18 06:00:00,236.58,48.52,-62.046574,53.616430,8.231421,-15.277131,-1.853077,6.88,-8.1,...,530.0,3.16,188434.0,1.77,-112,290.62,2015-03-18 06:00:00,4.300000,47.6,245.4
236691,2015-03-18 06:00:00,236.58,48.52,-62.046574,53.616430,8.231421,-15.277131,-1.853077,6.88,-8.1,...,530.0,3.16,188434.0,1.77,-112,290.62,2015-03-18 06:00:00,0.400000,47.9,238.1
236692,2015-03-18 06:00:00,236.58,48.52,-62.046574,53.616430,8.231421,-15.277131,-1.853077,6.88,-8.1,...,530.0,3.16,188434.0,1.77,-112,290.62,2015-03-18 06:00:00,-1.360000,47.5,239.7
236693,2015-03-18 06:00:00,236.58,48.52,-62.046574,53.616430,8.231421,-15.277131,-1.853077,6.88,-8.1,...,530.0,3.16,188434.0,1.77,-112,290.62,2015-03-18 06:00:00,-2.360000,47.8,237.8


In [157]:
result[['Long', 'Lat']].drop_duplicates()

Unnamed: 0,Long,Lat
0,247.5,33.4
1,258.8,47.4
5,248.1,33.3
6,263.7,42.6
7,260.8,34.1
...,...,...
39142,238.1,47.9
39143,239.7,47.5
39144,240.1,46.9
42069,237.8,47.8


In [158]:
samples[['Long', 'Lat']].drop_duplicates()

Unnamed: 0,Long,Lat
0,272.5,41.7
325,272.0,41.8
650,284.7,40.2
1300,277.5,42.8
1625,272.6,46.6
...,...,...
34265,283.4,38.5
34915,284.9,39.5
35240,285.4,39.4
35890,264.6,31.4


In [92]:
result = result.drop(columns=['SampleDateTime'])

In [93]:
result.sort_values(by='datetime').reset_index(inplace=True)

In [94]:
result

Unnamed: 0,datetime,glon,glat,mlon,mlat,mlt,dbn_nez,dbe_nez,BX_GSE,BY_GSM,BZ_GSM,flow_speed,proton_density,T,Pressure,SYM_H,CLOCK_ANGLE_GSM,GICMeasured,Lat,Long
0,2015-03-17 03:00:00,254.76,40.14,-38.690166,48.520866,19.470329,8.752354,2.677541,-5.83,-1.50,6.16,415.9,16.23,34695.0,5.61,13,345.38,0.833333,42.6,263.7
1,2015-03-17 03:05:00,254.76,40.14,-38.690166,48.520866,19.555594,7.297107,2.797410,-6.41,-0.75,5.54,418.3,16.55,37155.0,5.79,13,352.24,0.782000,42.6,263.7
2,2015-03-17 03:10:00,254.76,40.14,-38.690166,48.520866,19.640642,6.188830,2.997668,-5.67,-1.45,6.22,416.7,17.07,34732.0,5.93,12,346.98,0.808000,42.6,263.7
3,2015-03-17 03:15:00,254.76,40.14,-38.690166,48.520866,19.725447,6.219528,3.101161,-4.75,-1.24,6.64,410.9,16.61,33844.0,5.61,11,349.70,0.805333,42.6,263.7
4,2015-03-17 03:20:00,254.76,40.14,-38.690166,48.520866,19.809989,6.363662,3.004373,-4.09,-1.46,7.24,407.8,15.75,32376.0,5.24,11,348.26,0.844000,42.6,263.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139050,2015-03-18 06:00:00,236.58,48.52,-62.046574,53.616430,8.231421,-15.277131,-1.853077,6.88,-8.10,4.36,530.0,3.16,188434.0,1.77,-112,290.62,4.300000,47.6,245.4
139051,2015-03-18 06:00:00,236.58,48.52,-62.046574,53.616430,8.231421,-15.277131,-1.853077,6.88,-8.10,4.36,530.0,3.16,188434.0,1.77,-112,290.62,0.400000,47.9,238.1
139052,2015-03-18 06:00:00,236.58,48.52,-62.046574,53.616430,8.231421,-15.277131,-1.853077,6.88,-8.10,4.36,530.0,3.16,188434.0,1.77,-112,290.62,-1.360000,47.5,239.7
139053,2015-03-18 06:00:00,236.58,48.52,-62.046574,53.616430,8.231421,-15.277131,-1.853077,6.88,-8.10,4.36,530.0,3.16,188434.0,1.77,-112,290.62,-2.360000,47.8,237.8


In [161]:
result = None
for file in tqdm(gic_datas):
    gic_data = pd.read_csv(f'GICFolders/{file}')
    gic_data.Long = 360 - gic_data.Long

    max_datetime = gic_data['SampleDateTime'].max()
    min_datetime = gic_data['SampleDateTime'].min()
    year = max_datetime[0:4]

    print(min_datetime)
    print(max_datetime)
    print(year)

    mag_perb_dataset = pd.read_csv(f'dataset_{year}.csv')
    mag_perb_data = mag_perb_dataset[(min_datetime <= mag_perb_dataset['datetime']) &  (mag_perb_dataset['datetime'] <= max_datetime)]

    stations_lists = mag_perb_data[['glon', 'glat']].drop_duplicates()

    result = pd.DataFrame()
    for idx, stat in stations_lists.iterrows():
        devices = stations2devices[(stat['glat'], stat['glon'])]
        condition = False

        if len(devices) != 0:
            for device in devices:
                device_lat, device_lon = device
                condition = condition | ((gic_data['Lat'] == device_lat) & (gic_data['Long'] == device_lon))

            station_rows = mag_perb_data[(mag_perb_data['glat'] == stat['glat']) & (mag_perb_data['glon'] == stat['glon'])]
            device_rows = gic_data[condition]

            output = pd.merge(station_rows ,device_rows, left_on='datetime',right_on='SampleDateTime')

            result = pd.concat([result, output], ignore_index=True)

    if (len(result[['Long', 'Lat']].drop_duplicates()) != len(gic_data[['Long', 'Lat']].drop_duplicates())):
        print("Result:", len(result[['Long', 'Lat']].drop_duplicates()))
        print("GIC data:", len(gic_data[['Long', 'Lat']].drop_duplicates()))

    result = result.drop(columns=['SampleDateTime'])
    result.sort_values(by='datetime').reset_index(inplace=True)

    result.to_csv(f"GIC_dataset_{file}")
    # break

  0%|          | 0/17 [00:00<?, ?it/s]

2015-03-17 03:00:00
2015-03-18 06:00:00
2015


  6%|▌         | 1/17 [00:58<15:38, 58.69s/it]

2015-12-20 03:00:00
2015-12-21 09:00:00
2015


 12%|█▏        | 2/17 [01:52<13:53, 55.59s/it]

2015-09-19 18:00:00
2015-09-20 18:00:00
2015


 18%|█▊        | 3/17 [03:02<14:30, 62.18s/it]

2017-09-27 15:00:00
2017-09-29 00:00:00
2017


 24%|██▎       | 4/17 [04:08<13:51, 63.94s/it]

2021-05-12 00:00:00
2021-05-13 12:00:00
2021


 29%|██▉       | 5/17 [05:06<12:20, 61.73s/it]

2013-10-02 00:00:00
2013-10-03 03:00:00
2013


 35%|███▌      | 6/17 [06:25<12:23, 67.58s/it]

2022-04-09 00:00:00
2022-04-10 23:55:00
2022


 41%|████      | 7/17 [07:31<11:09, 66.99s/it]

2017-09-07 21:00:00
2017-09-09 03:00:00
2017


 47%|████▋     | 8/17 [08:35<09:55, 66.21s/it]

2015-06-22 03:00:00
2015-06-23 15:00:00
2015


 53%|█████▎    | 9/17 [09:44<08:55, 66.88s/it]

2023-04-23 00:00:00
2023-04-24 23:55:00
2023
Result: 146
GIC data: 148


 59%|█████▉    | 10/17 [10:12<06:24, 54.96s/it]

2018-08-25 18:00:00
2018-08-27 00:00:00
2018


 65%|██████▍   | 11/17 [11:14<05:42, 57.08s/it]

2017-05-27 15:00:00
2017-05-28 15:00:00
2017


 71%|███████   | 12/17 [12:31<05:16, 63.28s/it]

2023-02-26 12:00:00
2023-02-28 23:55:00
2023
Result: 252
GIC data: 254


 76%|███████▋  | 13/17 [13:07<03:39, 54.83s/it]

2023-03-23 00:00:00
2023-03-24 23:55:00
2023
Result: 255
GIC data: 257


 82%|████████▏ | 14/17 [13:34<02:19, 46.66s/it]

2015-10-06 18:00:00
2015-10-09 09:00:00
2015


 88%|████████▊ | 15/17 [14:44<01:46, 53.44s/it]

2015-09-11 03:00:00
2015-09-11 18:00:00
2015


 94%|█████████▍| 16/17 [15:46<00:56, 56.07s/it]

2021-11-03 15:00:00
2021-11-04 23:55:00
2021
Result: 101
GIC data: 102


100%|██████████| 17/17 [16:38<00:00, 58.76s/it]


In [162]:
print(len(gic_datas))

17


In [160]:
pd.read_csv("GIC_dataset_20150317.csv", index_col=[0])

Unnamed: 0,datetime,glon,glat,mlon,mlat,mlt,dbn_nez,dbe_nez,BX_GSE,BY_GSM,BZ_GSM,flow_speed,proton_density,T,Pressure,SYM_H,CLOCK_ANGLE_GSM,GICMeasured,Lat,Long
0,2015-03-17 03:00:00,254.76,40.14,-38.690166,48.520866,19.470329,8.752354,2.677541,-5.83,-1.5,6.16,415.9,16.23,34695.0,5.61,13,345.38,-0.088000,33.4,247.5
1,2015-03-17 03:00:00,254.76,40.14,-38.690166,48.520866,19.470329,8.752354,2.677541,-5.83,-1.5,6.16,415.9,16.23,34695.0,5.61,13,345.38,-0.179000,47.4,258.8
2,2015-03-17 03:00:00,254.76,40.14,-38.690166,48.520866,19.470329,8.752354,2.677541,-5.83,-1.5,6.16,415.9,16.23,34695.0,5.61,13,345.38,-0.610667,47.4,258.8
3,2015-03-17 03:00:00,254.76,40.14,-38.690166,48.520866,19.470329,8.752354,2.677541,-5.83,-1.5,6.16,415.9,16.23,34695.0,5.61,13,345.38,-0.183000,47.4,258.8
4,2015-03-17 03:00:00,254.76,40.14,-38.690166,48.520866,19.470329,8.752354,2.677541,-5.83,-1.5,6.16,415.9,16.23,34695.0,5.61,13,345.38,0.778333,47.4,258.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
236690,2015-03-18 06:00:00,236.58,48.52,-62.046574,53.616430,8.231421,-15.277131,-1.853077,6.88,-8.1,4.36,530.0,3.16,188434.0,1.77,-112,290.62,4.300000,47.6,245.4
236691,2015-03-18 06:00:00,236.58,48.52,-62.046574,53.616430,8.231421,-15.277131,-1.853077,6.88,-8.1,4.36,530.0,3.16,188434.0,1.77,-112,290.62,0.400000,47.9,238.1
236692,2015-03-18 06:00:00,236.58,48.52,-62.046574,53.616430,8.231421,-15.277131,-1.853077,6.88,-8.1,4.36,530.0,3.16,188434.0,1.77,-112,290.62,-1.360000,47.5,239.7
236693,2015-03-18 06:00:00,236.58,48.52,-62.046574,53.616430,8.231421,-15.277131,-1.853077,6.88,-8.1,4.36,530.0,3.16,188434.0,1.77,-112,290.62,-2.360000,47.8,237.8


In [114]:
result

Unnamed: 0,datetime,glon,glat,mlon,mlat,mlt,dbn_nez,dbe_nez,BX_GSE,BY_GSM,BZ_GSM,flow_speed,proton_density,T,Pressure,SYM_H,CLOCK_ANGLE_GSM,GICMeasured,Lat,Long
0,2015-03-17 03:00:00,254.76,40.14,-38.690166,48.520866,19.470329,8.752354,2.677541,-5.83,-1.50,6.16,415.9,16.23,34695.0,5.61,13,345.38,0.833333,42.6,263.7
1,2015-03-17 03:05:00,254.76,40.14,-38.690166,48.520866,19.555594,7.297107,2.797410,-6.41,-0.75,5.54,418.3,16.55,37155.0,5.79,13,352.24,0.782000,42.6,263.7
2,2015-03-17 03:10:00,254.76,40.14,-38.690166,48.520866,19.640642,6.188830,2.997668,-5.67,-1.45,6.22,416.7,17.07,34732.0,5.93,12,346.98,0.808000,42.6,263.7
3,2015-03-17 03:15:00,254.76,40.14,-38.690166,48.520866,19.725447,6.219528,3.101161,-4.75,-1.24,6.64,410.9,16.61,33844.0,5.61,11,349.70,0.805333,42.6,263.7
4,2015-03-17 03:20:00,254.76,40.14,-38.690166,48.520866,19.809989,6.363662,3.004373,-4.09,-1.46,7.24,407.8,15.75,32376.0,5.24,11,348.26,0.844000,42.6,263.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139050,2015-03-18 06:00:00,236.58,48.52,-62.046574,53.616430,8.231421,-15.277131,-1.853077,6.88,-8.10,4.36,530.0,3.16,188434.0,1.77,-112,290.62,4.300000,47.6,245.4
139051,2015-03-18 06:00:00,236.58,48.52,-62.046574,53.616430,8.231421,-15.277131,-1.853077,6.88,-8.10,4.36,530.0,3.16,188434.0,1.77,-112,290.62,0.400000,47.9,238.1
139052,2015-03-18 06:00:00,236.58,48.52,-62.046574,53.616430,8.231421,-15.277131,-1.853077,6.88,-8.10,4.36,530.0,3.16,188434.0,1.77,-112,290.62,-1.360000,47.5,239.7
139053,2015-03-18 06:00:00,236.58,48.52,-62.046574,53.616430,8.231421,-15.277131,-1.853077,6.88,-8.10,4.36,530.0,3.16,188434.0,1.77,-112,290.62,-2.360000,47.8,237.8
