In [80]:
import pandas as pd
from prophet import Prophet
from matplotlib import pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

In [32]:
df_hour = pd.read_csv('/content/drive/MyDrive/DGdataset/Sample Time Series/hourly/sample_10.csv' , index_col=0)
df_hour['point_timestamp'] = pd.to_datetime(df_hour['point_timestamp'])
df_hour.head()

Unnamed: 0,point_timestamp,point_value
0,2022-12-08 13:00:00,
1,2022-12-08 14:00:00,11.0
2,2022-12-08 15:00:00,17.0
3,2022-12-08 16:00:00,27.0
4,2022-12-08 17:00:00,17.0


In [33]:
df_hour.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 228 entries, 0 to 227
Data columns (total 2 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   point_timestamp  228 non-null    datetime64[ns]
 1   point_value      227 non-null    float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 5.3 KB


In [31]:
df_daily = pd.read_csv('/content/drive/MyDrive/DGdataset/Sample Time Series/daily/sample_10.csv' , index_col=0)
df_daily['point_timestamp'] = pd.to_datetime(df_daily['point_timestamp'])
df_daily.head()

Unnamed: 0,point_timestamp,point_value
0,2021-10-15,
1,2021-10-16,442918.0
2,2021-10-17,111549.0
3,2021-10-18,500495.0
4,2021-10-19,544160.0


In [27]:
df_daily.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 745 entries, 0 to 744
Data columns (total 2 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   point_timestamp  745 non-null    datetime64[ns]
 1   point_value      745 non-null    int64         
dtypes: datetime64[ns](1), int64(1)
memory usage: 17.5 KB


In [24]:
df_week = pd.read_csv('/content/drive/MyDrive/DGdataset/Sample Time Series/weekly/sample_1.csv' , index_col=0)
df_week['point_timestamp'] = pd.to_datetime(df_week['point_timestamp'])
df_week.head()

Unnamed: 0,point_timestamp,point_value
0,2021-01-11,53423552.0
1,2021-01-18,56541361.0
2,2021-01-25,55957540.0
3,2021-02-01,56278317.0
4,2021-02-08,50410674.0


In [25]:
df_month = pd.read_csv('/content/drive/MyDrive/DGdataset/Sample Time Series/monthly/sample_1.csv' , index_col=0)
df_month['point_timestamp'] = pd.to_datetime(df_month['point_timestamp'])
df_month.head()

Unnamed: 0,point_timestamp,point_value
0,2018-01-01,0.000364
1,2018-02-01,0.000313
2,2018-03-01,7.9e-05
3,2018-04-01,4.4e-05
4,2018-05-01,0.0


In [138]:
MAX_iter = 10
frames = []
for i in range(1, MAX_iter):
  path = '/content/drive/MyDrive/DGdataset/Sample Time Series/daily/sample_{}.csv'.format(i)
  df_daily = pd.read_csv(path , index_col=0)
  frames.append(df_daily)


combined_df_daily = pd.concat(frames)
combined_df_daily['point_timestamp'] = pd.to_datetime(combined_df_daily['point_timestamp'])
combined_df_daily.head()

Unnamed: 0,point_timestamp,point_value
0,2019-07-14,6.0
1,2019-07-15,7.0
2,2019-07-16,6.0
3,2019-07-17,6.0
4,2019-07-18,7.0


In [139]:
combined_df_daily.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2967 entries, 0 to 95
Data columns (total 2 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   point_timestamp  2967 non-null   datetime64[ns]
 1   point_value      2871 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 69.5 KB


Handle missing values

In [140]:
combined_df_daily['point_value'].isnull().sum()

96

In [141]:
combined_df_daily.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2967 entries, 0 to 95
Data columns (total 2 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   point_timestamp  2967 non-null   datetime64[ns]
 1   point_value      2871 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 69.5 KB


In [142]:
combined_df_daily.set_index(combined_df_daily['point_timestamp'], inplace=True)
combined_df_daily["interpolated_value"] = combined_df_daily['point_value'].interpolate(method='time')

In [143]:
combined_df_daily = combined_df_daily.sort_index()

In [144]:
years = sorted(combined_df_daily.index.year.unique())

fig = px.line(combined_df_daily, x='point_timestamp', y='point_value',category_orders={'x': years})

fig.update_layout(updatemenus=[{'buttons': [{'method': 'relayout',
                                            'label': str(year),
                                            'args': [{'xaxis.range': [str(year) + '-01-01', str(year) + '-12-31']}]}
                                           for year in years],
                               'showactive': True,
                               'x': 0.01,
                               'xanchor': 'left',
                               'y': 1.15,
                               'yanchor': 'top'}])
fig.update_xaxes(rangeslider_visible=True)
fig.show()

In [145]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=combined_df_daily['point_timestamp'], y=combined_df_daily['interpolated_value'], mode='lines', name='Time Series'))

# Customize layout and labels
fig.update_layout(title='Time Series Chart', xaxis_title='Date', yaxis_title='Value')
fig.update_xaxes(rangeslider_visible=True)

# Display the plot
fig.show()

In [112]:
years = sorted(combined_df_daily.index.year.unique())

fig = px.line(combined_df_daily, x=combined_df_daily.index, y='interpolated_value',category_orders={'x': years})

fig.update_layout(updatemenus=[{'buttons': [{'method': 'relayout',
                                            'label': str(year),
                                            'args': [{'xaxis.range': [str(year) + '-01-01', str(year) + '-12-31']}]}
                                           for year in years],
                               'showactive': True,
                               'x': 0.01,
                               'xanchor': 'left',
                               'y': 1.15,
                               'yanchor': 'top'}])
fig.update_xaxes(rangeslider_visible=True)
fig.show()