## Participants
- Felipe Saadi
- Gabriel Caetano
- Luiz Granville
- Marcelo Feitoza
- Pedro Munhoz

---

### Libraries installation

In [2]:
! pip install cdflib spacepy scalecast pmdarima seaborn numpy --quiet



## Importing libraries and data (.windMission)

In [3]:
import pandas as pd
import numpy as np
from spacepy import pycdf

from getDatasets import downloadLatestDataset

# if the dataset does not exist, download it
if not os.path.exists('./data/latestDataset.cdf'):
  downloadLatestDataset()

windMission = pycdf.CDF("./data/latestDataset.cdf") # The Wind Mission’s Ion Parameters'

Exception: Cannot load CDF C library; checked . Try 'os.environ["CDF_LIB"] = library_directory' before import.

In [None]:
data = pycdf.concatCDF([
  # spacepy.pycdf.CDF(f) for f in glob.glob('*.cdf')
  pycdf.CDF("./datasets/wi_h2_mfi_20220101_v04.cdf"),
  pycdf.CDF("./datasets/wi_h2_mfi_20220102_v04.cdf"),
  pycdf.CDF("./datasets/wi_h2_mfi_20220104_v04.cdf"),
  pycdf.CDF("./datasets/wi_h2_mfi_20220105_v04.cdf"),
  pycdf.CDF("./datasets/wi_h2_mfi_20220106_v04.cdf"),
  pycdf.CDF("./datasets/wi_h2_mfi_20220108_v04.cdf")
])

In [None]:
data.keys

<function SpaceData.keys>

In [None]:
data['Epoch'][-1]

dmarray([datetime.datetime(2022, 1, 8, 23, 59, 59, 964000)], dtype=object)

In [None]:
data.keys()

dict_keys(['Epoch', 'Time_PB5', 'BF1', 'BGSM', 'BGSE', 'RANGE', 'SPC_MODE', 'MAG_MODE', 'Epoch1', 'Time1_PB5', 'NUM1_PTS_O', 'ZERO1_O', 'SENS1_O', 'AMPL1_O', 'ORTH1_O', 'PAYLD1_O', 'FLAG1_O', 'NUM1_PTS_I', 'ZERO1_I', 'SENS1_I', 'AMPL1_I', 'ORTH1_I', 'PAYLD1_I', 'FLAG1_I', 'label_time', 'format_time', 'unit_time', 'label_bgsm', 'label_bgse', 'cartesian'])

## Data description

In [None]:
data['Epoch'].shape()

In [None]:
years, months, days, hours, minutes, seconds = [], [], [], [], [], []

for i in range((len(data['Epoch'])-15), len(data['Epoch'])):
  # separate the date and time into columns
  date = str(data['Epoch'][i][0]).split(" ")

  # separate the date into year, month, day
  year = date[0].split("-")[0]
  month = date[0].split("-")[1]
  day = date[0].split("-")[2]

  # separate the time into hour, minute, second
  hour = date[1].split(":")[0]
  minute = date[1].split(":")[1]
  second = date[1].split(":")[2]

  # append the values to the lists
  years.append(year)
  months.append(month)
  days.append(day)
  hours.append(hour)
  minutes.append(minute)
  seconds.append(second)


In [None]:
# Create a column for each of the values
data['Year'] = years
data['Month'] = months
data['Day'] = days
data['Hour'] = hours
data['Minute'] = minutes
data['Second'] = seconds

In [None]:
data['Date'] = date

In [None]:
data['Date'][35:50]

[1641006003298.0,
 1641006003390.0,
 1641006003482.0,
 1641006003574.0,
 1641006003666.0,
 1641006003758.0,
 1641006003850.0,
 1641006003942.0,
 1641006004034.0,
 1641006004126.0,
 1641006004218.0,
 1641006004310.0,
 1641006004402.0,
 1641006004494.0,
 1641006004586.0]

In [None]:
data['AMPL1_I'].attrs

{'FIELDNAM': 'Inner Sensor Amplitude Correction (1 min)',
 'MONOTON': 'FALSE',
 'SCALETYP': 'LINEAR',
 'CATDESC': 'Inner Sensor Amplitude Correction (1 min)',
 'FILLVAL': -1e+31,
 'DEPEND_0': 'Epoch1',
 'VAR_TYPE': 'metadata',
 'TIME_RES': '1 min'}

In [None]:
data['AMPL1_I'].attrs

In [None]:
data['RANGE'].attrs

In [None]:
data['FLAG1_I'].attrs

In [None]:
data['MAG_MODE'].attrs

windMission['Ma']

In [None]:
data['Epoch1'].attrs

In [None]:
print(data['Epoch'])

In [None]:
type(data)

In [None]:
# !pip install dtw-python

In [None]:
data['Epoch']

dmarray([[datetime.datetime(2022, 1, 1, 0, 0, 0, 78000)],
         [datetime.datetime(2022, 1, 1, 0, 0, 0, 170000)],
         [datetime.datetime(2022, 1, 1, 0, 0, 0, 262000)],
         ...,
         [datetime.datetime(2022, 1, 2, 23, 59, 59, 797000)],
         [datetime.datetime(2022, 1, 2, 23, 59, 59, 889000)],
         [datetime.datetime(2022, 1, 2, 23, 59, 59, 981000)]],
        dtype=object)

## Conversion to a common DataFrame

In [None]:
epoch = pd.DataFrame(data['Epoch']).rename(columns={0: 'Epoch'})
bgse = pd.DataFrame(data['BGSE']).rename(columns={0: 'Bx', 1: 'By', 2: 'Bz'})
bf1 = pd.DataFrame(data['BF1']).rename(columns={0: 'Magnetic field magnitude'})
stormRange = pd.DataFrame(data['RANGE']).rename(columns={0: 'Storm range'})
spc_mode = pd.DataFrame(data['SPC_MODE']).rename(columns={0: 'S/C operational mode'})
mag_mode = pd.DataFrame(data['MAG_MODE']).rename(columns={0: 'WIND/MFI operational mode'})

In [None]:
dataframe = [epoch, bgse, bf1, stormRange, spc_mode, mag_mode]
# dataframe = [bgse, bf1, stormRange, spc_mode, mag_mode, years, months, days, hours, minutes, seconds]
df = pd.concat(dataframe, axis=1)
df[15]
df[14951]
df[12418355]

Unnamed: 0,Epoch,Bx,By,Bz,Magnetic field magnitude,Storm range,S/C operational mode,WIND/MFI operational mode
0,2022-01-01 00:00:00.078,-5.987128,2.666523,-5.138955,8.32856,1,1,11
1,2022-01-01 00:00:00.170,-5.991735,2.682342,-5.102776,8.314696,1,1,11
2,2022-01-01 00:00:00.262,-6.024165,2.690811,-5.060466,8.315007,1,1,11
3,2022-01-01 00:00:00.354,-6.029044,2.691353,-5.057029,8.316628,1,1,11
4,2022-01-01 00:00:00.446,-6.059289,2.698425,-5.075548,8.352105,1,1,11
5,2022-01-01 00:00:00.538,-6.049378,2.70033,-5.079763,8.3481,1,1,11
6,2022-01-01 00:00:00.630,-6.095523,2.685486,-5.074874,8.373863,1,1,11
7,2022-01-01 00:00:00.722,-6.129284,2.644819,-5.112437,8.408341,1,1,11
8,2022-01-01 00:00:00.814,-6.154401,2.650718,-5.110364,8.427264,1,1,11
9,2022-01-01 00:00:00.906,-6.170061,2.632748,-5.112915,8.434626,1,1,11


In [53]:
df['Epoch'].index.freq = 'D'

In [61]:
df['Epoch'] = pd.DataFrame(df['Epoch'])

In [92]:
df.head(10)

Unnamed: 0,Epoch,Bx,By,Bz,Magnetic field magnitude,Storm range,S/C operational mode,WIND/MFI operational mode
0,2022-01-01 00:00:00.078,-5.987128,2.666523,-5.138955,8.32856,1,1,11
1,2022-01-01 00:00:00.170,-5.991735,2.682342,-5.102776,8.314696,1,1,11
2,2022-01-01 00:00:00.262,-6.024165,2.690811,-5.060466,8.315007,1,1,11
3,2022-01-01 00:00:00.354,-6.029044,2.691353,-5.057029,8.316628,1,1,11
4,2022-01-01 00:00:00.446,-6.059289,2.698425,-5.075548,8.352105,1,1,11
5,2022-01-01 00:00:00.538,-6.049378,2.70033,-5.079763,8.3481,1,1,11
6,2022-01-01 00:00:00.630,-6.095523,2.685486,-5.074874,8.373863,1,1,11
7,2022-01-01 00:00:00.722,-6.129284,2.644819,-5.112437,8.408341,1,1,11
8,2022-01-01 00:00:00.814,-6.154401,2.650718,-5.110364,8.427264,1,1,11
9,2022-01-01 00:00:00.906,-6.170061,2.632748,-5.112915,8.434626,1,1,11


# Regressão linear - Target: Magnetic field magnitude

In [110]:
from sklearn.model_selection import train_test_split

x_entrada = df[['Bx','By','Bz', 'Storm range', 'Epoch',
                'S/C operational mode', 'WIND/MFI operational mode']].values

y_saida = df['Magnetic field magnitude'].values
# 'Epoch'
X_train, X_test, Y_train, Y_test = train_test_split(x_entrada, y_saida, 
                                                    test_size = 0.3, 
                                                    random_state = 42)

KeyError: "None of [Index(['Bx', 'By', 'Bz', 'Storm range', 'Epoch', 'S/C operational mode',\n       'WIND/MFI operational mode'],\n      dtype='object')] are in the [columns]"

In [None]:
from sklearn.linear_model import LinearRegression

model = LinearRegression().fit(X_train, Y_train)
Y_pred = model.predict(X_test)
Y_pred

In [None]:
!pip install nsepy

In [None]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

print('Acuracidade (treino): ', model.score(X_train, Y_train))
print('Acuracidade (teste): ', model.score(X_test, Y_test))
print('R2 score: ', r2_score(Y_test, Y_pred))
print('Mean squared error: ', mean_squared_error(Y_test, Y_pred))
print('Mean absolute error: ', mean_absolute_error(Y_test, Y_pred))

NameError: name 'model' is not defined

In [None]:
print(Y_pred)

In [None]:
Y_pred2 = model.predict(X_train)
Y_pred2

In [None]:
Y_pred_total = np.concatenate((Y_pred2, Y_pred), axis=0)
print(len(Y_pred_total))

In [None]:
df2['Prediction'] = Y_pred_total
df2.describe()

In [None]:
# Rescale Prediction from 0 to 10 
df2['Prediction'] = df2['Prediction'] / df2['Prediction'].max() * 10
df2['Prediction'] = df2['Prediction'].round(2)
df2.head()

KeyError: 'Prediction'

## Saving model

In [None]:
import pickle
import joblib as jbl  

saved_model = pickle.dumps(model)
jbl.dump(model, 'model.pkl')
  
# Load the pickled model
modelPickle = pickle.loads(saved_model)
  
# Use the loaded pickled model to make predictions
modelPickle.predict(X_test)

df2.to_csv('dataframe2.csv', index=False)

## Arima Model

In [None]:
import pandas as pd
import numpy as np
from scalecast.Forecaster import Forecaster
from pmdarima import auto_arima
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(rc={'figure.figsize':(14,7)})

In [None]:
fs = Forecaster(
  y = df['Magnetic field magnitude'],
  model = auto_arima,
  current_dates = df['Epoch'],
  freq = 'D',
)

In [None]:
fs.generate_future_dates(7) 
fs.set_test_length(.2)
fs.set_estimator('arima')
fs.manual_forecast(call_me='arima1')

In [None]:
f.generate_future_dates(7) 
f.set_test_length(.2)
f.set_estimator('arima')
f.manual_forecast(call_me='arima1')

In [None]:
f.plot_test_set(ci=True)
plt.title('ARIMA Test-Set Performance',size=14)
plt.show()