Plotting practice and some NLP (hopefully)

IMPORTANT NOTE:

subtitles in each season differ in terms of who the provider is. So, basically, there's no guarantee that SDH comments are consistent (each episode in each season has sounds / different sounds can be named differently)

In [10]:
import re
import pandas as pd
import numpy as np
from datetime import date, datetime, timedelta, time

import plotly.express as px

In [12]:
data = pd.read_csv('sdh_comments.csv')
data.dtypes

sdh_comment          object
interval_start       object
interval_end         object
interval_minute       int64
episode               int64
season                int64
season00episode00    object
dtype: object

In [13]:
# changing string type to datetime
data.interval_start = pd.to_datetime(data.interval_start.str[0:8],format="%H:%M:%S")
data.interval_end = pd.to_datetime(data.interval_end.str[0:8],format="%H:%M:%S")

# this doesn't work with the plotly gantt chart down below
# data.interval_start = pd.to_datetime(data.interval_start.str[0:8],format="%H:%M:%S").dt.time
# data.interval_end = pd.to_datetime(data.interval_end.str[0:8],format="%H:%M:%S").dt.time

In [14]:
data.dtypes

sdh_comment                  object
interval_start       datetime64[ns]
interval_end         datetime64[ns]
interval_minute               int64
episode                       int64
season                        int64
season00episode00            object
dtype: object

Practicing on Season 1 data:

In [15]:
# choosing season 1 and comments containing "musik"
# later we'll make clusters for comments to display different types of sounds: clicks, peeps, thunder, music, radio and so on
data_s01e01 = data[(data.season==1) & (data.sdh_comment.str.lower().str.contains('musik'))]
# |klavierspiel|klänge
data_s01e01

Unnamed: 0,sdh_comment,interval_start,interval_end,interval_minute,episode,season,season00episode00
103,ruhige Klaviermusik,1900-01-01 00:00:06,1900-01-01 00:00:08,0,7,1,S01E07
105,ruhige spieluhrartige Musik,1900-01-01 00:00:17,1900-01-01 00:00:20,0,7,1,S01E07
106,Musik wird zunehmend dissonant,1900-01-01 00:00:27,1900-01-01 00:00:29,0,7,1,S01E07
110,ruhige beklemmende Musik,1900-01-01 00:02:39,1900-01-01 00:02:42,2,7,1,S01E07
111,Ruhige beklemmende Musik läuft weiter,1900-01-01 00:02:51,1900-01-01 00:02:53,2,7,1,S01E07
...,...,...,...,...,...,...,...
2433,düstere Musik,1900-01-01 00:29:39,1900-01-01 00:29:41,29,9,1,S01E09
2435,Musik verstummt,1900-01-01 00:29:59,1900-01-01 00:30:01,29,9,1,S01E09
2467,melancholische Musik,1900-01-01 00:40:46,1900-01-01 00:40:48,40,9,1,S01E09
2473,melancholische Musik,1900-01-01 00:45:51,1900-01-01 00:45:53,45,9,1,S01E09


In [145]:
fig = px.timeline(data_s01e01, x_start='interval_start',
                            x_end='interval_end',
                            y='season00episode00',
                            # color='sdh_comment',
                            text='sdh_comment'
                            # ,template="xgridoff"
                            ,template='plotly_dark'
                            ,color_discrete_sequence=["rgb(120,130,90)"]                            
                            ,hover_name="sdh_comment"
                            ,hover_data={'sdh_comment':False,'interval_end':False}
                            ,height=600
                            )

fig.update_xaxes(tickformat="%H:%M:%S",
                showgrid=True, griddash='dot', gridcolor='grey',
                rangeslider=dict(visible=True,
                # ,bgcolor="white"
                )
                ,rangeslider_thickness = 0.1 #fraction of the whole fig heigth
                )
# https://plotly.com/python/reference/layout/xaxis/#layout-xaxis-rangeslider

fig.update_yaxes(autorange="reversed",categoryorder='category ascending')

# shapes were added in the background for more readability
for episode in data_s01e01.season00episode00.drop_duplicates():
    fig.add_hrect(y0=episode, y1=episode, line_width=30, fillcolor="black", opacity=0.03,layer="below")

fig.update_traces(textfont_size=8, textangle=0)
fig.update_layout(title="Music of Dark",xaxis_title="Time",yaxis_title="Episode",bargap=0,
                font_family="Courier New",
                font_color="white",
                # title_font_family="Times New Roman",
                # title_font_color="white"
                )
fig.show()

In [155]:
# idea: make color=season, plot based on episode number only
data = data[data.sdh_comment.str.lower().str.contains('musik')]
data.episode = data.episode.astype('str')
data.season = data.season.astype('str')
# |klavierspiel|klänge
data

Unnamed: 0,sdh_comment,interval_start,interval_end,interval_minute,episode,season,season00episode00
0,düster-melancholische Musik,1900-01-01 00:00:13,1900-01-01 00:00:16,0,2,3,S03E02
6,düstere Musik,1900-01-01 00:01:41,1900-01-01 00:01:46,1,2,3,S03E02
18,bedrohliche Streichermusik,1900-01-01 00:06:39,1900-01-01 00:06:43,6,2,3,S03E02
46,düstere Musik,1900-01-01 00:20:23,1900-01-01 00:20:27,20,2,3,S03E02
94,Musik verstummt,1900-01-01 00:48:14,1900-01-01 00:48:15,48,2,3,S03E02
...,...,...,...,...,...,...,...
2433,düstere Musik,1900-01-01 00:29:39,1900-01-01 00:29:41,29,9,1,S01E09
2435,Musik verstummt,1900-01-01 00:29:59,1900-01-01 00:30:01,29,9,1,S01E09
2467,melancholische Musik,1900-01-01 00:40:46,1900-01-01 00:40:48,40,9,1,S01E09
2473,melancholische Musik,1900-01-01 00:45:51,1900-01-01 00:45:53,45,9,1,S01E09


In [191]:
from matplotlib.pyplot import legend


fig = px.timeline(data, x_start='interval_start',
                            x_end='interval_end',
                            y='episode',
                            color='season',
                            text='sdh_comment'
                            # ,template="xgridoff"
                            ,template='plotly_dark'
                            ,color_discrete_map={"3":"rgb(179,205,227)","2":"rgb(36,121,108)","1":"rgb(153,153,51)"}
                            ,category_orders={"season": ["1", "2", "3"],"episode": ["10","9","8","7","6","5","4","3","2","1"]}                            
                            ,hover_name="sdh_comment"
                            ,hover_data={'sdh_comment':False,'interval_end':False}
                            ,height=700
                            )

fig.update_xaxes(tickformat="%H:%M:%S",
                showgrid=True, griddash='dot', gridcolor='grey',
                rangeslider=dict(visible=True,
                # ,bgcolor="white"
                )
                ,rangeslider_thickness = 0.1 #fraction of the whole fig heigth
                )
# https://plotly.com/python/reference/layout/xaxis/#layout-xaxis-rangeslider

fig.update_yaxes(autorange="reversed"
                # ,categoryorder='category ascending'
                )

# shapes were added in the background for more readability
for episode in range(0,10):
    fig.add_hrect(y0=episode, y1=episode, line_width=30, fillcolor="black", opacity=0.03,layer="below")

fig.update_traces(textfont_size=8, textangle=0,textposition="none")
fig.update_layout(title="Music of Dark",xaxis_title="Time",yaxis_title="Episode",legend_title="Season",bargap=0,
                font_family="Courier New",
                font_color="white",
                # title_font_family="Times New Roman",
                # title_font_color="white"
                )
fig.show()

In [189]:
# seems as so there's no music in season 3