# Super Chat QuickLook

- Source: All Vtubers' live streams (including prechat)
- Temporal coverage: start from 2021-03-16

# Preparation

In [None]:
import pandas as pd
import numpy as np
from glob import iglob
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
df = pd.concat([
    pd.read_csv(f,
                na_values='',
                keep_default_na=False)
    for f in iglob('../input/vtuber-livechat/superchats_*.csv')
],
               ignore_index=True)

# body length
df['bodylength'] = df['body'].str.len().fillna(0).astype('int')

df['impact'] = df['significance'].map({
    1: 1,
    2: 2,
    3: 5,
    4: 10,
    5: 20,
    6: 50,
    7: 100
})

channels = pd.read_csv('../input/vtuber-livechat/channels.csv')
df = pd.merge(df, channels, how='left', left_on='originChannelId', right_on='channelId').drop(columns=['originChannelId', 'channelId_x', 'channelId_y'])

df.index = pd.to_datetime(df['timestamp'])
df.sort_index(inplace=True)

# Convert UTC to JST
df.index = df.index.tz_convert('Asia/Tokyo')
df.info()

In [None]:
superchatColorMap = {
    'blue': 'rgb(21, 101, 192)',
    'lightblue': 'rgb(0, 229, 255)',
    'green': 'rgb(29, 233, 182)',
    'yellow': 'rgb(255, 202, 40)',
    'orange': 'rgb(245, 124, 0)',
    'magenta': 'rgb(233, 30, 99)',
    'red': 'rgb(230, 33, 23)'
}

superchatCatOrders = {
    'color': ['blue', 'lightblue', 'green', 'yellow', 'orange', 'magenta', 'red']
}

# Intensity

In [None]:
weekly = df.groupby([pd.Grouper(level='timestamp', freq='W'), 'color'])
weekly = weekly.agg(size=('color', 'size')).reset_index()
px.bar(weekly,
             title='Weekly Stats',
             x='timestamp',
             y='size',
             labels={
                 'size': '# of superchats'
             },
             color='color',
             color_discrete_map=superchatColorMap,
             category_orders=superchatCatOrders
)

In [None]:
sc = df.groupby(pd.Grouper(level='timestamp', freq='D')).size()
sc = sc.groupby(sc.index.weekday).mean().rename('mean').reset_index()
sc.timestamp = sc.timestamp.map({0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday', 4: 'Friday', 5: 'Saturday', 6: 'Sunday'})
px.bar(sc, x='timestamp', y='mean', color='mean', title='Average Number of Super Chat (JST)')

In [None]:
red = df[df['color'] == 'red']
red = red.groupby(pd.Grouper(level='timestamp', freq='D')).size()
red = red.groupby(red.index.weekday).mean().rename('mean').reset_index()
red.timestamp = red.timestamp.map({0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday', 4: 'Friday', 5: 'Saturday', 6: 'Sunday'})
px.bar(red, x='timestamp', y='mean', color='mean', title='Average Number of Red Super Chat (JST)')

# Currency

In [None]:
nbcur = df.groupby(['currency']).size().rename('count').reset_index()
px.bar(nbcur,
             title='Number of superchat by currency',
             x='currency',
             y='count',
             color='count',
             color_discrete_map=superchatColorMap,
             category_orders=superchatCatOrders,
             labels={
                 'count': 'Number of <b>superchats</b>'
             }
).update_xaxes(categoryorder='total descending')

In [None]:
nbcur = df.groupby(['currency']).agg({'currency': 'first', 'impact': 'sum'})

px.bar(nbcur,
             title='Most impactful currencies',
             x='currency',
             y='impact',
             color='impact',
             labels={
                 'count': 'Number of <b>superchats</b>',
                 'impact': 'Monetary impact'
             }
).update_xaxes(categoryorder='total descending')

In [None]:
nbcur = df.groupby(['currency']).agg({'currency': 'first', 'impact': 'mean'})

px.bar(nbcur,
             title='Richest currencies',
             x='currency',
             y='impact',
             color='impact',
             labels={
                 'impact': 'Average impact of <b>superchats</b>'
             }
).update_xaxes(categoryorder='total descending')

In [None]:
curDist = df.groupby('currency')['color'].value_counts(normalize=True).rename('percentage').reset_index()
px.histogram(curDist, 
             title='Color distribution by currency',
             x='currency',
             y='percentage',
             color_discrete_map=superchatColorMap,
             category_orders=superchatCatOrders,
             color='color'
)

In [None]:
px.box(df[df['significance'] == 7],
           x='bodylength',
           y='currency',
           title='Body length of red superchat by currency'
          ).update_yaxes(categoryorder='total descending')

# Affiliation

In [None]:
marketByCounts = df.value_counts('affiliation').rename('counts').reset_index()
marketByImpact = df[['affiliation', 'impact']].groupby(['affiliation']).sum().reset_index()

fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=marketByCounts['affiliation'],
       values=marketByCounts['counts'],
       hole=.4), 1, 1)
fig.add_trace(go.Pie(labels=marketByImpact['affiliation'],
       values=marketByImpact['impact'],
       hole=.4), 1, 2)

fig.update_traces(
  textposition='inside',
  textinfo='label+value+percent',
  showlegend=False
).update_layout(
    annotations=[
        dict(text='SHARE<br>by <b>counts</b>', x=0.23, y=0.5, xanchor='center', yanchor='middle', font_size=10, showarrow=False),
        dict(text='SHARE<br>by <b>impact</b>', x=0.78, y=0.5, xanchor='center', yanchor='middle', font_size=10, showarrow=False)
    ]
)