# Evaluation of the prediction of solar storms in 2024

In [3]:
from magpy.stream import read
from magpy.core import plot as mp

import json
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib

  from pkg_resources import resource_filename


In [4]:
eval_graph = pd.read_csv('/home/christopher/geomagnetic_storms/cme4evaluation.csv')
eval_graph

Unnamed: 0.1,Unnamed: 0,index,start,arrival,KPrange,Kp,K,InRangeGlobal,InRangeLocal,timingKp,timingK,Onsetdst
0,0,CME-20240105T015300,2024-01-05T01:53:00,2024-01-08T09:39:00,2.66667 - 5.0,0.000000,1.0,lower,lower,FA,FA,FA
1,1,CME-20240106T081200,2024-01-06T08:12:00,2024-01-09T13:00:00,2.0 - 4.5,1.000000,2.0,lower,yes,M,H,FA
2,2,CME-20240106T173600,2024-01-06T17:36:00,2024-01-10T01:00:00,2.0 - 4.0,1.000000,2.0,lower,yes,H,H,FA
3,3,CME-20240109T154800,2024-01-09T15:48:00,2024-01-12T10:00:00,3.0 - 4.5,0.333333,0.0,lower,lower,FA,FA,FA
4,4,CME-20240120T092400,2024-01-20T09:24:00,2024-01-22T16:50:00,4.44444 - 6.44444,2.000000,2.0,lower,lower,M,FA,FA
...,...,...,...,...,...,...,...,...,...,...,...,...
181,181,CME-20241226T191200,2024-12-26T19:12:00,2024-12-30T13:00:00,2.0 - 3.0,1.000000,1.0,lower,lower,M,M,FA
182,182,CME-20241228T064800,2024-12-28T06:48:00,2025-01-01T10:00:00,2.0 - 4.0,,,,,,,FA
183,183,CME-20241229T012300,2024-12-29T01:23:00,2024-12-31T16:51:00,4.66667 - 6.33333,,,,,,,FA
184,184,CME-20241229T062400,2024-12-29T06:24:00,2024-12-31T16:11:00,5.11111 - 7.22222,,,,,,,FA


In [5]:
eval_graph.drop(eval_graph.columns[eval_graph.columns.str.contains('unnamed', case=False)],axis=1, inplace=True)
eval_graph

Unnamed: 0,index,start,arrival,KPrange,Kp,K,InRangeGlobal,InRangeLocal,timingKp,timingK,Onsetdst
0,CME-20240105T015300,2024-01-05T01:53:00,2024-01-08T09:39:00,2.66667 - 5.0,0.000000,1.0,lower,lower,FA,FA,FA
1,CME-20240106T081200,2024-01-06T08:12:00,2024-01-09T13:00:00,2.0 - 4.5,1.000000,2.0,lower,yes,M,H,FA
2,CME-20240106T173600,2024-01-06T17:36:00,2024-01-10T01:00:00,2.0 - 4.0,1.000000,2.0,lower,yes,H,H,FA
3,CME-20240109T154800,2024-01-09T15:48:00,2024-01-12T10:00:00,3.0 - 4.5,0.333333,0.0,lower,lower,FA,FA,FA
4,CME-20240120T092400,2024-01-20T09:24:00,2024-01-22T16:50:00,4.44444 - 6.44444,2.000000,2.0,lower,lower,M,FA,FA
...,...,...,...,...,...,...,...,...,...,...,...
181,CME-20241226T191200,2024-12-26T19:12:00,2024-12-30T13:00:00,2.0 - 3.0,1.000000,1.0,lower,lower,M,M,FA
182,CME-20241228T064800,2024-12-28T06:48:00,2025-01-01T10:00:00,2.0 - 4.0,,,,,,,FA
183,CME-20241229T012300,2024-12-29T01:23:00,2024-12-31T16:51:00,4.66667 - 6.33333,,,,,,,FA
184,CME-20241229T062400,2024-12-29T06:24:00,2024-12-31T16:11:00,5.11111 - 7.22222,,,,,,,FA


In [6]:
eval_graph.to_csv('Final_evaluation_graph.csv')

Important considerations
-

The InRangeGlobal and InRangeLocal columns were made by looking at the data from the kp, k and cme charts and considering that:
- yes: the Kp or K is within the predicted KPrange.
- lower: the Kp or K is lower than the predicted KPrange.
- higher: the Kp or K is higher than the predicted KPrange.

The timingKp and timingK columns were made by looking at the data from the kp, k and cme charts and considering that:
- H (hit): when the prediction matches the K or Kp value at the exact same time it was predicted.
- M (miss): when the prediction matches the K or Kp value in a 24-48h range (24h early or late at most) but not at the exact same time it was predicted.
- FA (false alarm): when none of the conditions above were true.

The Onsetdst column was automatically generated with a python loop which was programmed the following way:
- H (hit): when the prediction matched the onset of the storm or was during 1 hour before (range between 1 hour before and the end of the storm).
- M (miss): when the prediction did not match the onset of the storm and was not in the H range but was 12h before or after the actual event.
- FA (false alarm): when none of the conditions above were true.

Kp, K and Dst rates calculation
-

In [7]:
eval_graph.loc[eval_graph.timingK.isin(['FA']),:].count()

index            34
start            34
arrival          34
KPrange          34
Kp               34
K                34
InRangeGlobal    34
InRangeLocal     34
timingKp         34
timingK          34
Onsetdst         34
dtype: int64

In [17]:
eval_graph.loc[eval_graph.timingK.isin(['M']),:].count()

index            79
start            79
arrival          79
KPrange          79
Kp               79
K                78
InRangeGlobal    79
InRangeLocal     78
timingKp         79
timingK          79
Onsetdst         79
dtype: int64

In [9]:
eval_graph.loc[eval_graph.timingK.isin(['H']),:].count()

index            69
start            69
arrival          69
KPrange          69
Kp               69
K                69
InRangeGlobal    69
InRangeLocal     68
timingKp         69
timingK          69
Onsetdst         69
dtype: int64

In [15]:
# Hit rate (H/(H+M))
69/(69+79)

0.46621621621621623

In [18]:
#Accuracy (H/(H+M+FA))
69/(69+34+79)

0.3791208791208791

In [21]:
eval_graph.loc[eval_graph.timingKp.isin(['FA']),:].count()

index            31
start            31
arrival          31
KPrange          31
Kp               31
K                30
InRangeGlobal    31
InRangeLocal     30
timingKp         31
timingK          31
Onsetdst         31
dtype: int64

In [19]:
eval_graph.loc[eval_graph.timingKp.isin(['M']),:].count()

index            86
start            86
arrival          86
KPrange          86
Kp               86
K                86
InRangeGlobal    86
InRangeLocal     86
timingKp         86
timingK          86
Onsetdst         86
dtype: int64

In [20]:
eval_graph.loc[eval_graph.timingKp.isin(['H']),:].count()

index            65
start            65
arrival          65
KPrange          65
Kp               65
K                65
InRangeGlobal    65
InRangeLocal     64
timingKp         65
timingK          65
Onsetdst         65
dtype: int64

In [22]:
# Hit rate (H/(H+M))
65/(65+86)

0.4304635761589404

In [21]:
#Accuracy (H/(H+M+FA))
65/(65+86+31)

0.35714285714285715

In [24]:
eval_graph.loc[eval_graph.Onsetdst.isin(['H']),:].count()

index            70
start            70
arrival          70
KPrange          70
Kp               70
K                70
InRangeGlobal    70
InRangeLocal     69
timingKp         70
timingK          70
Onsetdst         70
dtype: int64

In [26]:
eval_graph.loc[eval_graph.Onsetdst.isin(['M']),:].count()

index            23
start            23
arrival          23
KPrange          23
Kp               23
K                23
InRangeGlobal    23
InRangeLocal     23
timingKp         23
timingK          23
Onsetdst         23
dtype: int64

In [27]:
eval_graph.loc[eval_graph.Onsetdst.isin(['FA']),:].count()

index            93
start            93
arrival          93
KPrange          93
Kp               89
K                88
InRangeGlobal    89
InRangeLocal     88
timingKp         89
timingK          89
Onsetdst         93
dtype: int64

In [10]:
# Hit rate (H/(H+M))
70/(23+70)

0.7526881720430108

In [25]:
#Accuracy (H/(H+M+FA))
70/(70+93+23)

0.3763440860215054