Google Stocks

In [98]:
import pandas as pd
import numpy as np
from neuralprophet import NeuralProphet, set_log_level
import math

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import ipywidgets as widgets
from ipywidgets import interact_manual

plotting_backend = "plotly-static"

In [3]:
df = pd.read_csv(
    "https://raw.githubusercontent.com/ourownstory/neuralprophet-data/main/datasets/multivariate/ER_Europe_subset_10nodes.csv"
)
df["ID"] = df["ID"].astype(str)
df["ds"] = pd.to_datetime(df["ds"])
# # use one year for faster training
df = df[df["ID"] =="1"]
df=df.drop(columns=["solar","solar_fcs","ID"])
df.head()

Unnamed: 0,ds,y
0,2012-01-01 00:00:00,75.6549
1,2012-01-01 01:00:00,70.9958
2,2012-01-01 02:00:00,66.6382
3,2012-01-01 03:00:00,62.992
4,2012-01-01 04:00:00,61.0699


In [4]:
df.shape

(26304, 2)

Distrubution plots

In [6]:
fig = px.line(df, x=df["ds"], y=df["y"])
fig.show()

## ECG Data

Dataset for ECG taken from [Physio Bank ATM](https://archive.physionet.org/cgi-bin/atm/ATM) from the dataset [The MIT-BIH Long Term Database](https://archive.physionet.org/physiobank/database/ltdb/)

In [31]:
import pandas as pd

In [99]:
df=pd.read_csv("ECG_preds.csv")
df = df.iloc[1:].reset_index(drop=True)
df.head()

Unnamed: 0,'Elapsed time','ECG1','ECG2'
0,'0:00.000',0.16,-0.29
1,'0:00.008',0.18,-0.3
2,'0:00.016',0.17,-0.32
3,'0:00.023',0.18,-0.29
4,'0:00.031',0.18,-0.31


In [100]:
df_cleaned = df.rename(columns=lambda x: x.strip("'"))  
df_cleaned.head()

Unnamed: 0,Elapsed time,ECG1,ECG2
0,'0:00.000',0.16,-0.29
1,'0:00.008',0.18,-0.3
2,'0:00.016',0.17,-0.32
3,'0:00.023',0.18,-0.29
4,'0:00.031',0.18,-0.31


In [101]:
df_cleaned.ECG1 = df_cleaned.ECG1.astype('float64')
df_cleaned.ECG2 = df_cleaned.ECG2.astype('float64')

In [102]:
from datetime import datetime, timedelta
df_cleaned=df_cleaned.applymap(lambda x: x.strip("'") if isinstance(x, str) else x)  # Remove quotes from data
df_cleaned['Elapsed time'] = "00:0" + df_cleaned["Elapsed time"].astype(str) # Making it into hh:mm:ss format 
df_cleaned['Elapsed time'] = df_cleaned['Elapsed time'].apply(lambda x:timedelta(minutes=int(x.split(":")[1]), seconds=float(x.split(":")[2])))
df_cleaned.head()

Unnamed: 0,Elapsed time,ECG1,ECG2
0,0 days 00:00:00,0.16,-0.29
1,0 days 00:00:00.008000,0.18,-0.3
2,0 days 00:00:00.016000,0.17,-0.32
3,0 days 00:00:00.023000,0.18,-0.29
4,0 days 00:00:00.031000,0.18,-0.31


In [103]:
df_cleaned.head()

Unnamed: 0,Elapsed time,ECG1,ECG2
0,0 days 00:00:00,0.16,-0.29
1,0 days 00:00:00.008000,0.18,-0.3
2,0 days 00:00:00.016000,0.17,-0.32
3,0 days 00:00:00.023000,0.18,-0.29
4,0 days 00:00:00.031000,0.18,-0.31


In [104]:
start_time = datetime(2024, 1, 1, 0, 0, 0)
df_cleaned["Timestamp"] = start_time + df_cleaned["Elapsed time"]
df_cleaned.drop(columns=['Elapsed time'], inplace = True)
df_cleaned = df_cleaned[["Timestamp","ECG1", "ECG2"]]

In [105]:
df_cleaned.head()

Unnamed: 0,Timestamp,ECG1,ECG2
0,2024-01-01 00:00:00.000,0.16,-0.29
1,2024-01-01 00:00:00.008,0.18,-0.3
2,2024-01-01 00:00:00.016,0.17,-0.32
3,2024-01-01 00:00:00.023,0.18,-0.29
4,2024-01-01 00:00:00.031,0.18,-0.31


In [108]:
import plotly.express as px

fig = px.line(df, x=df_cleaned["Timestamp"], y=df_cleaned["ECG1"])
fig.show()

In [110]:
df_cleaned.to_csv('cleaned_ECG.csv',index=False)

## Energy Load Vales

data taken from https://www.entsoe.eu/data/power-stats/ values of 2024

In [173]:
import pandas as pd

In [174]:
df = pd.read_csv('monthly_hourly_load_values_2024.csv')
df.shape

(234273, 11)

In [175]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 234273 entries, 0 to 234272
Data columns (total 11 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   MeasureItem       234273 non-null  object 
 1   DateUTC           234273 non-null  object 
 2   DateShort         234273 non-null  object 
 3   TimeFrom          234273 non-null  object 
 4   TimeTo            234273 non-null  object 
 5   CountryCode       234273 non-null  object 
 6   Cov_ratio         234273 non-null  int64  
 7   Value             234273 non-null  float64
 8   Value_ScaleTo100  234273 non-null  float64
 9   CreateDate        234273 non-null  object 
 10  UpdateDate        234273 non-null  object 
dtypes: float64(2), int64(1), object(8)
memory usage: 19.7+ MB


In [176]:
df.head()

Unnamed: 0,MeasureItem,DateUTC,DateShort,TimeFrom,TimeTo,CountryCode,Cov_ratio,Value,Value_ScaleTo100,CreateDate,UpdateDate
0,Monthly Hourly Load Values,01-01-2024 00:00,01-01-2024,00:00,01:00,AL,100,731.0,731.0,06-12-2024 14:36:48,06-12-2024 14:36:48
1,Monthly Hourly Load Values,01-01-2024 01:00,01-01-2024,01:00,02:00,AL,100,620.0,620.0,06-12-2024 14:36:48,06-12-2024 14:36:48
2,Monthly Hourly Load Values,01-01-2024 02:00,01-01-2024,02:00,03:00,AL,100,554.0,554.0,06-12-2024 14:36:48,06-12-2024 14:36:48
3,Monthly Hourly Load Values,01-01-2024 03:00,01-01-2024,03:00,04:00,AL,100,523.0,523.0,06-12-2024 14:36:48,06-12-2024 14:36:48
4,Monthly Hourly Load Values,01-01-2024 04:00,01-01-2024,04:00,05:00,AL,100,528.0,528.0,06-12-2024 14:36:48,06-12-2024 14:36:48


In [177]:
df.MeasureItem.unique()

array(['Monthly Hourly Load Values'], dtype=object)

In [178]:
df.Cov_ratio.unique()

array([100], dtype=int64)

In [179]:
df.CreateDate.unique()

array(['06-12-2024 14:36:48'], dtype=object)

In [180]:
df.UpdateDate.unique()

array(['06-12-2024 14:36:48'], dtype=object)

In [181]:
correlation = df["Value"].corr(df["Value_ScaleTo100"])
correlation

1.0

In [182]:
df.drop(columns=['MeasureItem', 'Cov_ratio', 'CreateDate', 'UpdateDate', 'DateShort', "TimeFrom", "TimeTo", "Value_ScaleTo100"],inplace = True)

In [183]:
df.head()

Unnamed: 0,DateUTC,CountryCode,Value
0,01-01-2024 00:00,AL,731.0
1,01-01-2024 01:00,AL,620.0
2,01-01-2024 02:00,AL,554.0
3,01-01-2024 03:00,AL,523.0
4,01-01-2024 04:00,AL,528.0


In [199]:
print(len(df.CountryCode.unique()),' Different countries')
df.CountryCode.unique()

36  Different countries


array(['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES',
       'FI', 'FR', 'GB', 'GE', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU',
       'LV', 'MD', 'ME', 'MK', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SE',
       'SI', 'SK', 'XK'], dtype=object)

In [185]:
df_gb = df[df['CountryCode']=='GB']
df_gb.shape

(6396, 3)

In [186]:
df_al = df[df['CountryCode']=='AL']
df_al.shape
# different countries have different number of readings

(6120, 3)

In [187]:
df_gb["DateUTC"] = pd.to_datetime(df_gb["DateUTC"], format="%d-%m-%Y %H:%M")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [188]:
df_gb.head()

Unnamed: 0,DateUTC,CountryCode,Value
84918,2024-01-01 00:00:00,GB,638.5
84919,2024-01-01 01:00:00,GB,578.0
84920,2024-01-01 02:00:00,GB,565.5
84921,2024-01-01 03:00:00,GB,540.0
84922,2024-01-01 04:00:00,GB,500.5


In [189]:
import plotly.express as px

fig = px.line(df_gb, x=df_gb["DateUTC"], y=df_gb["Value"])
fig.show()

#inaccuracies in data

In [190]:
df_gb_c = df_gb.sort_values(by="DateUTC")
df_gb_c.drop(columns=['CountryCode'], inplace=True) #because only one country is there after filtering

In [191]:
import plotly.express as px

fig = px.line(df_gb_c, x=df_gb_c["DateUTC"], y=df_gb_c["Value"])
fig.show()

In [193]:
df_gb_c.head()

Unnamed: 0,DateUTC,Value
84918,2024-01-01 00:00:00,638.5
84919,2024-01-01 01:00:00,578.0
84920,2024-01-01 02:00:00,565.5
84921,2024-01-01 03:00:00,540.0
84922,2024-01-01 04:00:00,500.5


In [None]:
duplicates = df_gb_c[df_gb_c.duplicated(subset=["DateUTC"], keep='first')]
duplicates # There are duplicates in the data

Unnamed: 0,DateUTC,Value
87026,2024-03-31 03:00:00,566.5


In [205]:
df_gb_c = df_gb_c.drop_duplicates(subset=["DateUTC"], keep="first")

In [207]:
df_gb_c.shape

(6395, 2)

In [208]:
df_gb_c.to_csv('Load_in_Great_Britain.csv',index = False)