In [1]:
from goesaws import *
import datetime
import pandas as pd
import sys

date = str(datetime.date.today())

bucket_name = 'noaa-goes16'
product_name = 'ABI-L2-MCMIPM'
lightning_mapper = 'GLM-L2-LCFA'
yr = 2022
day_of_year =141
hr = 19
minutes = 30

In [2]:
abiprefix = gen_prefix(product=product_name,year = yr, day=day_of_year, hour = hr)
abifiles = gen_fn(bucket=bucket_name,prefix=abiprefix)
abidatasets = [gen_data(key=abifiles[i], bucket = bucket_name) for i in range(0,minutes)]
abiDS = xr.concat(abidatasets,dim = 't')

In [3]:
skyprefix = gen_prefix(product = "ABI-L2-ACMM", year = yr, day = day_of_year, hour = hr)
skyfiles = gen_fn(bucket = bucket_name, prefix=skyprefix)
skydatasets = [gen_data(key=skyfiles[i], bucket = bucket_name) for i in range(0,minutes)]
skyDS = xr.concat(skydatasets,dim = 't')

In [4]:
dataset2_resampled = skyDS.interp(
    x=abiDS["x"].data,
    y=abiDS["y"].data,
    t = abiDS["t"].data,
    method='linear',
    kwargs={'fill_value': "extrapolate"}
)

In [5]:
nfile = xr.merge([abiDS,dataset2_resampled])
nfile = nfile.coarsen(x=4,y=4).mean()
nfile = calc_latlon(nfile)
#nfile

In [6]:
df = nfile.to_dataframe()

In [7]:
features = ["CMI_C01", "CMI_C02","CMI_C03","CMI_C04","CMI_C05","CMI_C06","CMI_C07","CMI_C08","CMI_C09","CMI_C10","CMI_C11","CMI_C12","CMI_C13","CMI_C14","CMI_C15","CMI_C16","ACM", "BCM", "Cloud_Probabilities","lat","lon"]
ndf = df[features].copy()
ndf["time"] = ndf.index.get_level_values('t')
ndf.drop_duplicates(inplace=True)


In [8]:
### We want to get the lightning data from the GLM-L2-LCFA product
### We will use the same time period as the ABI data

#event_names = ["event_lat","event_lon","event_time_offset"]
names = ["group_lat","group_lon","group_time_offset"]

glmprefix = gen_prefix(product = "GLM-L2-LCFA", year = yr, day = day_of_year, hour = hr)
glmfiles = gen_fn(bucket = bucket_name, prefix=glmprefix)
#There should be 180 GLM files per hour (one every 20 seconds)
glmdatasets = [gen_data(key=glmfiles[i], bucket = bucket_name) for i in range(0,minutes*3)]

latitudes = [np.array(glmdatasets[i][names[0]].data) for i in range(0,minutes*3)]
lats = np.concatenate(latitudes)

# event_latitudes = [np.array(glmdatasets[i][event_names[0]].data) for i in range(0,minutes*3)]
# event_lats = np.concatenate(event_latitudes)


longitudes = [np.array(glmdatasets[i][names[1]].data) for i in range(0,minutes*3)]
lons= np.concatenate(longitudes)

# event_longitudes = [np.array(glmdatasets[i][event_names[1]].data) for i in range(0,minutes*3)]
# event_lons = np.concatenate(event_longitudes)

times = [np.array(glmdatasets[i][names[2]].data) for i in range(0,minutes*3)]
times = np.concatenate(times)

# event_times = [np.array(glmdatasets[i][event_names[2]].data) for i in range(0,minutes*3)]
# event_times = np.concatenate(event_times)

strikes = list(zip(lats,lons))
#event_stikes = list(zip(event_lats,event_lons))

In [9]:
from scipy.spatial import cKDTree
import pandas as pd

ndf = ndf.reset_index(drop=True)  # reset the index of the ndf dataframe
#ndf['time'] = ndf['time'].astype(np.int64)  # convert the time column to datetime objects
tree = cKDTree(ndf[['lat', 'lon']].values)
distances, indices = tree.query(strikes)

#event_distances, event_indices = tree.query(event_stikes)

# filter out the indices that are not present in the ndf dataframe
valid_indices = indices[indices < len(ndf)]

lightning_df = pd.DataFrame({
    'strike_lat': [strike[0] for strike in strikes],
    'strike_lon': [strike[1] for strike in strikes],

    # 'event_lat': [event_strike[0] for event_strike in event_stikes],
    # 'event_lon': [event_strike[1] for event_strike in event_stikes],

    'time': times, 
    #'event_time': event_times,
    'nearest_lat': ndf.loc[valid_indices, 'lat'].values,
    'nearest_lon': ndf.loc[valid_indices, 'lon'].values,

    # 'nearest_event_lat': ndf.loc[event_indices, 'lat'].values,
    # 'nearest_event_lon': ndf.loc[event_indices, 'lon'].values,


    'distance': distances[indices < len(ndf)],
    # 'event_distance': event_distances[event_indices < len(ndf)]
})


lightning_df["lightning"] = 0
lightning_df.loc[distances < 5, 'lightning'] = 1
lightning_df["Coordinates"] = list(zip(lightning_df["nearest_lat"],lightning_df["nearest_lon"]))
lightning_df 



Unnamed: 0,strike_lat,strike_lon,time,nearest_lat,nearest_lon,distance,lightning,Coordinates
0,34.454247,-80.839836,2022-05-21 18:59:59.092851772,34.460515,-80.798870,0.041443,1,"(34.4605150351257, -80.79886953428033)"
1,34.418884,-80.719597,2022-05-21 18:59:59.094759150,34.459560,-80.707742,0.042368,1,"(34.4595597904671, -80.70774201065737)"
2,34.400440,-80.717041,2022-05-21 18:59:59.098955382,34.361793,-80.700003,0.042237,1,"(34.36179266729644, -80.70000291869006)"
3,34.454361,-80.851654,2022-05-21 18:59:59.102770138,34.461486,-80.890022,0.039024,1,"(34.461485619633535, -80.89002226030158)"
4,34.407429,-80.712997,2022-05-21 18:59:59.104677516,34.361793,-80.700003,0.047450,1,"(34.36179266729644, -80.70000291869006)"
...,...,...,...,...,...,...,...,...
522093,26.564121,-86.408112,2022-05-21 19:29:59.403757437,27.878545,-86.364231,1.315156,1,"(27.87854469932127, -86.36423057674222)"
522094,26.564112,-86.408104,2022-05-21 19:29:59.406046290,27.878545,-86.364231,1.315165,1,"(27.87854469932127, -86.36423057674222)"
522095,26.539068,-86.405098,2022-05-21 19:29:59.407953669,27.878545,-86.364231,1.340100,1,"(27.87854469932127, -86.36423057674222)"
522096,26.525238,-86.426109,2022-05-21 19:29:59.409861047,27.878545,-86.364231,1.354721,1,"(27.87854469932127, -86.36423057674222)"


In [10]:
ndf['time_int'] = ndf['time'].astype(np.int64)

tree = cKDTree(ndf[['time_int']].values.reshape(-1, 1))

time_query = lightning_df['time'].astype(np.int64).values

distances, indices = tree.query(time_query.reshape(-1, 1))

lightning_df['nearest_time'] = ndf.loc[indices, 'time'].values



In [11]:
features = ["CMI_C01", "CMI_C02","CMI_C03","CMI_C04","CMI_C05","CMI_C06","CMI_C07","CMI_C08","CMI_C09","CMI_C10","CMI_C11","CMI_C12","CMI_C13","CMI_C14","CMI_C15","CMI_C16","ACM", "BCM", "Cloud_Probabilities","lat","lon","Coordinates","time","Lightning"]
ndf["Coordinates"] = list(zip(ndf["lat"],ndf["lon"]))





In [12]:
strike_df = lightning_df[lightning_df["lightning"] == 1]

# strike_df = strike_df.sort_values(by=['time'])

# ### Merge strike_df with ndf based on coordinate and nearest time
# def find_nearest_time(x):
#     return ndf.iloc[(np.abs(ndf['time'] - x['time'])).argmin()]['time']

# # Add a second column to df1 with the nearest time from df2
# strike_df['nearest_time'] = strike_df.apply(lambda x: find_nearest_time(x), axis=1)

# strike_df





In [13]:
## join Strike_df and ndf on nearest time and coordinates, filling lightning with 0 if na 

merged_df = strike_df.merge(ndf, left_on=['nearest_time','Coordinates'], right_on=['time','Coordinates'], how='left')
#merged_df = merged_df.fillna(0)
merged_df.drop_duplicates(inplace=True)
we_want = merged_df.groupby(['Coordinates','nearest_time']).count()["lightning"].reset_index()



In [14]:
## merge two dataframes on nearest time and coordinates, filling lightning with 0 if na

final_df = we_want.merge(ndf, left_on = ['nearest_time','Coordinates'], right_on = ['time','Coordinates'], how = 'right')
final_df = final_df.fillna(0)
final_df["Lightning"] = final_df["lightning"].apply(lambda x: 1 if x > 0 else 0)
final_df 

Unnamed: 0,Coordinates,nearest_time,lightning,CMI_C01,CMI_C02,CMI_C03,CMI_C04,CMI_C05,CMI_C06,CMI_C07,...,CMI_C15,CMI_C16,ACM,BCM,Cloud_Probabilities,lat,lon,time,time_int,Lightning
0,"(40.13103323474366, -93.38155072424266)",0,0.0,0.331051,0.283293,0.454821,0.001786,0.371012,0.296587,301.391052,...,274.793091,263.456177,1.734227,0.734227,0.505974,40.131033,-93.381551,2022-05-21 19:00:31.268224000,1653159631268224000,0
1,"(40.12698592712501, -93.2741436595977)",0,0.0,0.296647,0.242758,0.444345,0.001746,0.330734,0.259226,301.146332,...,275.392426,263.776367,1.375754,0.500453,0.446567,40.126986,-93.274144,2022-05-21 19:00:31.268224000,1653159631268224000,0
2,"(40.122965705716815, -93.16685328500078)",0,0.0,0.301825,0.247361,0.450754,0.001825,0.329881,0.260476,300.795197,...,275.280731,263.769440,1.593902,0.593902,0.484379,40.122966,-93.166853,2022-05-21 19:00:31.268224000,1653159631268224000,0
3,"(40.11897247748837, -93.05967862352706)",0,0.0,0.344345,0.293075,0.490972,0.002063,0.365119,0.283512,299.780243,...,275.146698,263.745361,1.828127,0.828127,0.638011,40.118972,-93.059679,2022-05-21 19:00:31.268224000,1653159631268224000,0
4,"(40.115006150366405, -92.95261870531907)",0,0.0,0.335952,0.285119,0.485000,0.002103,0.361805,0.283472,300.846741,...,275.355164,263.828003,1.796349,0.811898,0.613374,40.115006,-92.952619,2022-05-21 19:00:31.268224000,1653159631268224000,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
468745,"(27.80295961693488, -80.17422458142033)",0,0.0,0.269047,0.204861,0.211805,0.104127,0.113472,0.120476,273.945557,...,246.188995,239.389008,3.000000,1.000000,0.999214,27.802960,-80.174225,2022-05-21 19:29:28.393143040,1653161368393143040,0
468746,"(27.802322483769377, -80.09039916473711)",0,0.0,0.267758,0.204801,0.210972,0.111329,0.120794,0.126012,274.205017,...,244.551056,237.994751,3.000000,1.000000,0.999214,27.802322,-80.090399,2022-05-21 19:29:28.393143040,1653161368393143040,0
468747,"(27.801695861529403, -80.0065919270383)",0,0.0,0.210139,0.146647,0.146170,0.066865,0.090238,0.087520,283.027771,...,256.489288,246.983368,3.000000,1.000000,0.999204,27.801696,-80.006592,2022-05-21 19:29:28.393143040,1653161368393143040,0
468748,"(27.801079742704182, -79.92280256024858)",0,0.0,0.192599,0.132837,0.134742,0.030595,0.100397,0.090714,291.675354,...,268.844421,256.932465,3.000000,1.000000,0.999200,27.801080,-79.922803,2022-05-21 19:29:28.393143040,1653161368393143040,0


Right now, all of the lightning strikes might not necessarily be close to the assigned value. Also we have to deal with the time component. But this is much better than before. 

In [15]:
final_df["lightning"].sum()

433646.0

In [17]:
final_df[final_df["lightning"] > 0]

Unnamed: 0,Coordinates,nearest_time,lightning,CMI_C01,CMI_C02,CMI_C03,CMI_C04,CMI_C05,CMI_C06,CMI_C07,...,CMI_C15,CMI_C16,ACM,BCM,Cloud_Probabilities,lat,lon,time,time_int,Lightning
77,"(39.892965838717515, -85.38919277895775)",2022-05-21 19:00:31.268224,2.0,0.832559,0.748432,0.818491,0.167639,0.269365,0.242361,267.646301,...,235.438263,230.875458,3.0,1.0,0.999458,39.892966,-85.389193,2022-05-21 19:00:31.268224000,1653159631268224000,1
80,"(39.886507083263545, -85.08676360975025)",2022-05-21 19:00:31.268224,2.0,0.325535,0.260992,0.326647,0.060774,0.132083,0.123095,281.147644,...,260.695770,249.678925,3.0,1.0,0.999512,39.886507,-85.086764,2022-05-21 19:00:31.268224000,1653159631268224000,1
81,"(39.884398354519796, -84.98606588357747)",2022-05-21 19:00:31.268224,20.0,0.245516,0.182361,0.273115,0.045357,0.148234,0.112857,290.772522,...,266.123230,252.911514,3.0,1.0,0.999458,39.884398,-84.986066,2022-05-21 19:00:31.268224000,1653159631268224000,1
96,"(39.85538988889179, -83.48185081537908)",2022-05-21 19:00:31.268224,5.0,0.943610,0.845773,0.904067,0.565218,0.265754,0.372976,255.955521,...,214.271820,214.671158,3.0,1.0,0.999473,39.855390,-83.481851,2022-05-21 19:00:31.268224000,1653159631268224000,1
97,"(39.85362920331557, -83.3819590923714)",2022-05-21 19:00:31.268224,2.0,0.958392,0.844047,0.899007,0.654662,0.275535,0.397837,254.755585,...,204.544815,206.164505,3.0,1.0,0.999473,39.853629,-83.381959,2022-05-21 19:00:31.268224000,1653159631268224000,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
468697,"(27.84608393757221, -84.22541326743054)",2022-05-21 19:29:28.393143040,159.0,0.959801,0.864702,0.905535,0.528373,0.213036,0.316170,247.666443,...,212.283981,212.832809,3.0,1.0,0.998710,27.846084,-84.225413,2022-05-21 19:29:28.393143040,1653161368393143040,1
468698,"(27.84493076889334, -84.14034058334617)",2022-05-21 19:29:28.393143040,15.0,0.949146,0.858114,0.899702,0.558571,0.220436,0.327520,248.738693,...,209.812225,210.663971,3.0,1.0,0.998710,27.844931,-84.140341,2022-05-21 19:29:28.393143040,1653161368393143040,1
468699,"(27.84378863468443, -84.05530165587767)",2022-05-21 19:29:28.393143040,5.0,0.966547,0.871051,0.913690,0.568670,0.224841,0.332420,248.832825,...,210.735413,211.521194,3.0,1.0,0.998710,27.843789,-84.055302,2022-05-21 19:29:28.393143040,1653161368393143040,1
468701,"(27.841537412356363, -83.88532369465985)",2022-05-21 19:29:28.393143040,1.0,0.932360,0.840793,0.885594,0.586349,0.251666,0.360615,253.137390,...,207.507965,208.546783,3.0,1.0,0.998741,27.841537,-83.885324,2022-05-21 19:29:28.393143040,1653161368393143040,1


In [16]:
final_df.to_csv("/Users/robbiefeldstein/Documents/Programming/Research/Datasets/group_May_22.csv")
