# Visulaizations and Plots in Time Series Analysis

In [1]:
import pandas as pd
import plotly
import plotly.graph_objects as go
import plotly.express as px

## Dataset


In [2]:
# importing from kaggle
df = pd.read_csv('data/2/monthly-beer-production-in-austr.csv')

In [3]:
df.head()

Unnamed: 0,Month,Monthly beer production
0,1956-01,93.2
1,1956-02,96.0
2,1956-03,95.2
3,1956-04,77.1
4,1956-05,70.9


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 476 entries, 0 to 475
Data columns (total 2 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Month                    476 non-null    object 
 1   Monthly beer production  476 non-null    float64
dtypes: float64(1), object(1)
memory usage: 7.6+ KB


In [5]:
df['Year'] = df['Month'].apply(lambda x: x[:4])
df[['Month', 'Year']]

Unnamed: 0,Month,Year
0,1956-01,1956
1,1956-02,1956
2,1956-03,1956
3,1956-04,1956
4,1956-05,1956
...,...,...
471,1995-04,1995
472,1995-05,1995
473,1995-06,1995
474,1995-07,1995


## Waterfall Plot

In [6]:
df1 = pd.DataFrame(df[df['Year'] == '1965'])
df1


Unnamed: 0,Month,Monthly beer production,Year
108,1965-01,106.9,1965
109,1965-02,96.6,1965
110,1965-03,127.3,1965
111,1965-04,98.2,1965
112,1965-05,100.2,1965
113,1965-06,89.4,1965
114,1965-07,95.3,1965
115,1965-08,104.2,1965
116,1965-09,106.4,1965
117,1965-10,116.2,1965


In [7]:
class WaterfallChart:
    def __init__(self, df: pd.DataFrame, x: str, y: str, text = None):
        self.df = df
        self.x = x
        self.y = y
        self.text = text
    
    def _delta(self):
        vals = self.df[self.y].to_list()
        new = [vals[0]]
        for i in range(1, len(vals)):
            new.append(vals[i] - vals[i-1])
        return new
    
    def saveplot(self, title, filename, delta = False):
        x = self.df[self.x]
        if delta:
            y = self._delta()
        else:
            y = self.df[self.y]
        measure = ["absolute"] + ["relative"] * (len(self.df)-2)+["total"]
        if self.text is None:
            fig = go.Figure(go.Waterfall(x = x, measure = measure, y = y))
        else:
            measure = ["absolute"] + ["relative"] * (len(self.df)-2)+["total"]
            fig = go.Figure(go.Waterfall(x = x, measure = measure, y = y, text = self.text))
        fig.update_layout(title = title, showlegend = False)
        plotly.offline.plot(fig, filename='plots/' + filename + '.html')
        
    # def summary(self):
        
        
        
        

In [8]:
wp = WaterfallChart(df1, 'Month', 'Monthly beer production')

In [9]:
wp.saveplot(title = 'Monthly beer production in 1965', filename = 'waterfall1')

In [10]:
wp.saveplot(title = 'Monthly beer production in 1965', filename = 'waterfall2', delta = True)


/snap/core20/current/lib/x86_64-linux-gnu/libstdc++.so.6: version `GLIBCXX_3.4.29' not found (required by /lib/x86_64-linux-gnu/libproxy.so.1)
Failed to load module: /home/shail/snap/code/common/.cache/gio-modules/libgiolibproxy.so


In [11]:
# summary/explanations

/snap/core20/current/lib/x86_64-linux-gnu/libstdc++.so.6: version `GLIBCXX_3.4.29' not found (required by /lib/x86_64-linux-gnu/libproxy.so.1)
Failed to load module: /home/shail/snap/code/common/.cache/gio-modules/libgiolibproxy.so


Gtk-Message: 14:59:27.972: Failed to load module "canberra-gtk-module"
Gtk-Message: 14:59:27.973: Failed to load module "canberra-gtk-module"
Gtk-Message: 14:59:27.974: Failed to load module "canberra-gtk-module"
Gtk-Message: 14:59:27.974: Failed to load module "canberra-gtk-module"


Opening in existing browser session.
Opening in existing browser session.


## Gantt chart

In [14]:
df = pd.read_csv('data/schedule.csv')
df

Unnamed: 0,Task,Start,Finish,Category
0,Sleep,2018-07-01 0:00:00,2018-07-01 7:00:00,Rest
1,Breakfast,2018-07-01 7:30:00,2018-07-01 8:00:00,Eat
2,Music,2018-07-01 8:00:00,2018-07-01 12:00:00,Music
3,Math,2018-07-01 9:00:00,2018-07-01 12:00:00,Homework
4,Lunch,2018-07-01 12:00:00,2018-07-01 13:00:00,Eat
5,Physics,2018-07-01 13:00:00,2018-07-01 14:30:00,Homework
6,Music,2018-07-01 14:00:00,2018-07-01 15:30:00,Music
7,Nap,2018-07-01 16:00:00,2018-07-01 16:30:00,Rest
8,Tennis,2018-07-01 17:30:00,2018-07-01 19:00:00,Play
9,Dinner,2018-07-01 20:00:00,2018-07-01 20:30:00,Eat


In [17]:
fig = px.timeline(df, x_start = "Start", x_end = "Finish", y = "Task", color = "Category")
fig.update_yaxes(autorange="reversed")
fig.show()

In [18]:
class GanttChart:
    def __init__(self, df, task, x_start, x_end, hue = None):
        self.df =df
        self.task = task
        self.x_start = x_start
        self.x_end = x_end
        self.hue = hue
    
    def saveplot(self, title, filename):
        if self.hue is not None:
            fig = px.timeline(self.df, x_start = self.x_start, x_end = self.x_end, y = self.task, color = self.hue)
        else:
            fig = px.timeline(self.df, x_start = self.x_start, x_end = self.x_end, y = self.task)
        fig.update_layout(title = title, showlegend = True)
        fig.update_yaxes(autorange="reversed")
        plotly.offline.plot(fig, filename = 'plots/' + filename + '.html')
        
    # def summart(self):

In [20]:
chart = GanttChart(df, 'Task', 'Start', 'Finish', 'Category')
chart.saveplot(title = 'My Timetable', filename = 'gantt')

/snap/core20/current/lib/x86_64-linux-gnu/libstdc++.so.6: version `GLIBCXX_3.4.29' not found (required by /lib/x86_64-linux-gnu/libproxy.so.1)
Failed to load module: /home/shail/snap/code/common/.cache/gio-modules/libgiolibproxy.so
Gtk-Message: 15:13:45.116: Failed to load module "canberra-gtk-module"
Gtk-Message: 15:13:45.118: Failed to load module "canberra-gtk-module"


Opening in existing browser session.
