In [1]:
from pyhive import hive
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import pandas as pd
from sqlalchemy import create_engine

In [2]:
conn = hive.Connection(host='localhost', port=10000, username='rodrigo', database='youtube')
engine = create_engine('hive://', creator=lambda: conn)

In [60]:
tipos = {

    'total_tags': 'int32',
    'total_visualizacoes': 'int32',
    'total_comentarios' :  'int32',
    'total_likes' : 'float',

}

In [61]:
df_result = pd.read_sql_query(
    ' SELECT total_caractere_video, '
    ' total_tags, '
    ' total_visualizacoes,  '
    ' total_comentarios,	 '
    ' total_likes '
    ' FROM TOTAL_VISUALIZACOES_POR_SEMANA '
    ' where TOTAL_VISUALIZACOES_POR_SEMANA.assunto = "assunto_cities_skylines" '
    ' AND TOTAL_VISUALIZACOES_POR_SEMANA.data_extracao = "2023-10-27"', 
    con=engine,
    dtype=tipos
)

In [29]:
df_result.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 245 entries, 0 to 244
Data columns (total 16 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   nm_canal                 245 non-null    string        
 1   titulo_video             245 non-null    string        
 2   total_caractere_video    245 non-null    int32         
 3   tags                     245 non-null    object        
 4   duracao_video_minutos    245 non-null    float64       
 5   total_tags               245 non-null    int32         
 6   total_visualizacoes      245 non-null    int64         
 7   total_comentarios        245 non-null    int32         
 8   total_likes              242 non-null    float64       
 9   total_visualizacoes_dia  245 non-null    int32         
 10  total_comentarios_dia    245 non-null    int32         
 11  total_likes_dia          242 non-null    float64       
 12  assunto                  245 non-nul

In [62]:
df_result.head()

Unnamed: 0,total_caractere_video,total_tags,total_visualizacoes,total_comentarios,total_likes
0,92,24,4,0,2.0
1,70,0,481,19,69.0
2,88,0,342,11,52.0
3,89,0,481,19,86.0
4,77,0,400,20,58.0


In [63]:
df_result.memory_usage(deep=True).sum()

6988

In [64]:
df_result.memory_usage().sum() / 1024 ** 2 

0.006664276123046875

# Relação Comentários X Visualização (proporção) e Relação Visualizações X Likes (proporção)

In [65]:
fig = px.scatter(df_result, x='total_caractere_video', y='total_visualizacoes')

In [66]:
fig.show()

- Comparação Likes X Visualizações e Cometários X Visualizações

In [67]:
fig = px.scatter(df_result, x='total_likes', y='total_visualizacoes', trendline='ols')
fig.update_xaxes(title_text='Likes')
fig.update_yaxes(title_text='Visualizações')
fig.show()


In [68]:
results = px.get_trendline_results(fig)
print(results.px_fit_results.iloc[0].summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.975
Model:                            OLS   Adj. R-squared:                  0.974
Method:                 Least Squares   F-statistic:                     9209.
Date:                Mon, 04 Dec 2023   Prob (F-statistic):          1.98e-193
Time:                        21:08:42   Log-Likelihood:                -2196.2
No. Observations:                 242   AIC:                             4396.
Df Residuals:                     240   BIC:                             4403.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        450.8012    144.040      3.130      0.0

In [69]:
results

Unnamed: 0,px_fit_results
0,<statsmodels.regression.linear_model.Regressio...


In [70]:

fig = px.scatter(df_result, x='total_comentarios', y='total_visualizacoes', trendline='ols')
fig.update_xaxes(title_text='Comentários')
fig.update_yaxes(title_text='Visualizações')
fig.update_layout(title_text='Desenpenho Canal', showlegend=True)
fig.show()

In [71]:
results = px.get_trendline_results(fig)
print(results.px_fit_results.iloc[0].summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.726
Model:                            OLS   Adj. R-squared:                  0.725
Method:                 Least Squares   F-statistic:                     642.9
Date:                Mon, 04 Dec 2023   Prob (F-statistic):           3.30e-70
Time:                        21:08:49   Log-Likelihood:                -2513.5
No. Observations:                 245   AIC:                             5031.
Df Residuals:                     243   BIC:                             5038.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        398.2330    476.410      0.836      0.4

# Dia em que os vídeos são Públicados com alta frequência

In [93]:
tipos = {
    'semana_traduzida': 'string',
    'nm_canal': 'string',
    'total_videos' : 'int32',
    'data_publicacao' : 'datetime64[ns]',
    'assunto' : 'string',
    'id_canal' : 'string'
}

In [94]:
df_publicacao_video = pd.read_sql_query(
    'SELECT semana_traduzida, '
    ' nm_canal,  '
    ' total_videos, '
    ' data_publicacao,  '
    ' assunto, '
    ' id_canal '
    ' FROM youtube.total_video_publicado_semana'
    ' WHERE assunto = "assunto_cities_skylines" ',

    dtype=tipos,
    con=engine
)

In [95]:
def indice_semana(dia: str) -> int: 
    dias_semana = {
        'Domingo': 1,
        'Segunda-feira' : 2,
        'Terça-feira' : 3,
        'Quarta-feira' : 4,
        'Quinta-feira': 5,
        'Sexta-feira': 6,
        'Sábado' : 7
    }

    return dias_semana.get(dia)


In [96]:
df_publicacao_video.head()

Unnamed: 0,semana_traduzida,nm_canal,total_videos,data_publicacao,assunto,id_canal
0,Domingo,Irmãos Cities,1,2023-10-15,assunto_cities_skylines,UC1mk6EtfMjxR4eEZ7C43zTQ
1,Domingo,CANAL HAND - GAME WITH MUSIC,1,2023-10-15,assunto_cities_skylines,UC7v51W2NoLFawizgcNuVBOA
2,Domingo,GAC TV,1,2023-10-15,assunto_cities_skylines,UCEc5jA0NvvR-yJC0SISV6nw
3,Domingo,Lucas gameplays brasil,1,2023-10-15,assunto_cities_skylines,UCOYJm7BsLV8VCkW2DC1MI5w
4,Domingo,O Veio Games,1,2023-10-15,assunto_cities_skylines,UCOe_x5ararWwWajBKbFKNkA


In [97]:

# df_publicacao_video['indice_semana'] = df_publicacao_video['semana_traduzida'].apply(indice_semana)
# df_publicacao_video.sort_values(by='indice_semana', inplace=True)
df_publicacao_video = df_publicacao_video.groupby('semana_traduzida').sum('total_videos').reset_index()
df_publicacao_video['indice_semana'] = df_publicacao_video['semana_traduzida'].apply(indice_semana)
df_publicacao_video.sort_values(by='indice_semana', inplace=True)
df_publicacao_video.drop(['indice_semana'], axis=1, inplace=True)
df_publicacao_video.head(7)

Unnamed: 0,semana_traduzida,total_videos
0,Domingo,25
3,Segunda-feira,40
6,Terça-feira,39
1,Quarta-feira,37
2,Quinta-feira,39
4,Sexta-feira,34
5,Sábado,21


- Cor dia de publicação

In [98]:
fig = px.bar(df_publicacao_video, x='semana_traduzida',y='total_videos', text_auto=True)
fig.update_layout(title_text='Envio de Vídeo por Semana', showlegend=True, title=dict(x=0.5) )

fig.update_layout(
    margin=dict(l=20, r=20, t=40, b=20, pad=4),
    paper_bgcolor='LightSteelBlue'
    
)

fig.show()

- Ajuste automático de Layout

In [99]:
fig = px.bar(df_publicacao_video, x='semana_traduzida',y='total_videos', text_auto=True)


fig.update_layout(
    title_text='Envio de Vídeo por Semana',
    showlegend=True,
    title=dict(x=0.5, font=dict(color='white')),
    plot_bgcolor='#1F2326',
    yaxis=dict(visible=False),  
    margin=dict(l=20, r=20, t=40, b=20, pad=4),
    paper_bgcolor='#1F2326',
    xaxis=dict(title='', tickfont=dict(color='white')),
    legend=dict(font=dict(color='white')),
)
fig.update_traces(
    textfont_color='white',
    marker_color='#FFA500',
    textfont_size=16  
)

fig.show()

# Total de Tags X Vísualizações 

In [100]:
tipos = {
    'id_canal': 'string',
    'total_tags': 'int32',
    'total_visualizacoes': 'int32'
}

In [102]:
df_tags_visualizacoes = pd.read_sql_query(
        ' SELECT  '
        '   id_canal,    ' 
        '	total_tags , '
        '	total_visualizacoes ' 
        ' FROM total_visualizacoes_por_semana tvps  '
        ' WHERE tvps.assunto  = "assunto_cities_skylines" '
        ' AND data_extracao = "2023-10-27" ',
    con=engine,
    dtype=tipos
)

In [103]:
df_tags_visualizacoes.head()

Unnamed: 0,id_canal,total_tags,total_visualizacoes
0,UC-N8y8IuT8B44h-IQQAmq4w,24,4
1,UC1mk6EtfMjxR4eEZ7C43zTQ,0,481
2,UC1mk6EtfMjxR4eEZ7C43zTQ,0,342
3,UC1mk6EtfMjxR4eEZ7C43zTQ,0,481
4,UC1mk6EtfMjxR4eEZ7C43zTQ,0,400


In [104]:
df_tags_visualizacoes.drop_duplicates(inplace=True)

In [105]:
df_tags_visualizacoes.head()

Unnamed: 0,id_canal,total_tags,total_visualizacoes
0,UC-N8y8IuT8B44h-IQQAmq4w,24,4
1,UC1mk6EtfMjxR4eEZ7C43zTQ,0,481
2,UC1mk6EtfMjxR4eEZ7C43zTQ,0,342
4,UC1mk6EtfMjxR4eEZ7C43zTQ,0,400
5,UC1mk6EtfMjxR4eEZ7C43zTQ,0,537


In [106]:
fig = px.scatter(df_tags_visualizacoes, x='total_tags', y='total_visualizacoes')
fig.show()

# Total Víews DIA por Vídeo

In [113]:
tipo = {
    'titulo_video': 'string',
    'id_video': 'string',
    'total_visualizacoes' : 'int32',
    'total_visualizacoes_dia': 'int32'

}

In [115]:
df_views_video = pd.read_sql_query(
    ' SELECT titulo_video, '
    ' id_video, '
    ' total_visualizacoes, '
    ' total_visualizacoes_dia '
    ' FROM total_visualizacoes_por_semana tvps '
    ' where id_canal  = "UCrOH1V-FyMunBIMrKL0y0xQ" '
    ' AND data_extracao = "2023-10-21" '
    ' AND total_visualizacoes  > 0 ',
    con=engine,
    dtype=tipo
)
df_views_video.head() 

Unnamed: 0,titulo_video,id_video,total_visualizacoes,total_visualizacoes_dia
0,MODDERS decidem fazer O MELHOR JOGO DE GERENCI...,XOUzWJ0bHuw,9263,151
1,Planejamento inicial PODE SALVAR SUA CIDADE! 🚗...,dRzwiPwGH_M,17245,440
2,PEQUENA FORTUNA EM SERVIÇOS PÚBLICOS BÁSICOS 🏙...,totHMIv6_hI,44930,32092
3,"DO CRIADOR DA MELHOR CIDADE JÁ FEITA, A NOVA M...",up4KrHrRwLY,24837,2238
4,PRIMEIRA PRAÇA DA CIDADE SAI DO PAPEL 🏙️ - Cit...,vy-upTpchig,24631,24631


In [111]:
df_views_video.shape

(14, 3)

In [116]:
df_views_video['titulo_video'] = df_views_video['titulo_video'].apply(lambda x: x.capitalize())
df_views_video.drop_duplicates(inplace=True)

In [117]:

df_views_video.sort_values(by='total_visualizacoes_dia', ascending=True, inplace=True)
fig = px.bar(df_views_video, x='total_visualizacoes_dia', y='id_video', orientation='h', text_auto=True)


fig.update_layout(
    title_text='Envio de Vídeo por Semana',
    showlegend=True,
    title=dict(x=0.5, font=dict(color='white')),
    plot_bgcolor='#1F2326',
    margin=dict(l=10, r=20, t=40, b=20, pad=2),
    paper_bgcolor='#1F2326',
    yaxis=dict(title='', tickfont=dict(color='white', size=16), tickmode='array', ticklen=1),
    xaxis=dict(title='', tickfont=dict(color='white'), visible=False),
    legend=dict(font=dict(color='white'), orientation='h', y=4)
)
    
fig.update_traces(
    textfont_color='white',
    marker_color='#04BE5F',
    textfont_size=18 ,
    textposition='outside',  
)


fig.show()


- Selecionando Vídeo - (visualização, vídeo, comentário, likes)

In [118]:
tipos = {
    'data_extracao' : 'datetime64[ns]',
    'total_visualizacoes_dia' : 'int'
}

In [119]:
df_desempenho_video = pd.read_sql_query(
    ' SELECT data_extracao, '
    ' total_visualizacoes_dia '
    ' FROM total_visualizacoes_por_semana tvps '
    ' where tvps.assunto  = "assunto_cities_skylines" '
    ' AND tvps.id_video = "wCLSZxLfUAk" ' ,
    con=engine,
    dtype=tipos
)
df_desempenho_video.head()

Unnamed: 0,data_extracao,total_visualizacoes_dia
0,2023-10-19,17961
1,2023-10-20,34195
2,2023-10-21,23882
3,2023-10-22,12800
4,2023-10-23,8201


In [120]:
df_desempenho_video.dtypes

data_extracao              datetime64[ns]
total_visualizacoes_dia             int64
dtype: object

In [121]:
fig = px.bar(data_frame=df_desempenho_video, x='data_extracao', y='total_visualizacoes_dia', text_auto=True)
fig.update_layout(
    title_text='Desempenho Vídeo',
    showlegend=True,
    title=dict(x=0.5, font=dict(color='white')),
    plot_bgcolor='#1F2326',
    yaxis=dict(visible=False),  
    margin=dict(l=20, r=20, t=40, b=20, pad=4),
    paper_bgcolor='#1F2326',
    xaxis=dict(title='', tickfont=dict(color='white')),
    legend=dict(font=dict(color='white')),
    xaxis_tickformat = '%d/%m/%Y'
)
fig.update_traces(
    textfont_color='white',
    marker_color='#246DFB',
    textfont_size=16 ,
    textposition='outside', 
    
)


# Total Víews/likes/comentarios DIA por Canal

In [136]:
tipos = {
    'id_canal' : 'string',
    'total_visualizacoes_dia': 'int32'
}

In [143]:
df_views_canal = pd.read_sql_query(
    ' SELECT '
    '    id_canal, '
    '    total_visualizacoes_dia '
    '    FROM total_visualizacoes_por_semana  '
    '    WHERE data_extracao="2023-10-27"  '
    '    AND ASSUNTO = "assunto_cities_skylines" '
    '    AND total_visualizacoes_dia > 0  ',
    con=engine,
    dtype=tipos
)

df_views_canal['id_canal'] = df_views_canal['id_canal'].apply(lambda x: x.capitalize())
df_views_canal.head()

Unnamed: 0,id_canal,total_visualizacoes_dia
0,Uc-n8y8iut8b44h-iqqamq4w,4
1,Uc1mk6etfmjxr4eez7c43ztq,1
2,Uc1mk6etfmjxr4eez7c43ztq,342
3,Uc1mk6etfmjxr4eez7c43ztq,481
4,Uc1mk6etfmjxr4eez7c43ztq,1


In [146]:
df_views_canal = df_views_canal.groupby('id_canal').sum('total_visualizacoes_dia').sort_values(by='total_visualizacoes_dia', ascending=False).reset_index()

In [147]:
df_views_canal.head()

Unnamed: 0,id_canal,total_visualizacoes_dia
0,Ucroh1v-fymunbimrkl0y0xq,49697
1,Ucayih2y5jbeosubqoqhufda,15020
2,Ucotipyf8_rgzj1lptmvadaa,13960
3,Uce9jri0yq5sm6h5qrz9fzla,10397
4,Ucrucdh_bcfx77xpxhyq2fjw,6136


In [148]:
df_views_canal.dtypes

id_canal                   object
total_visualizacoes_dia     int32
dtype: object

In [149]:
df_views_canal['id_canal'] = df_views_canal['id_canal'].astype('string')

In [150]:
df_views_canal.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 102 entries, 0 to 101
Data columns (total 2 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   id_canal                 102 non-null    string
 1   total_visualizacoes_dia  102 non-null    int32 
dtypes: int32(1), string(1)
memory usage: 1.3 KB


In [151]:
df_views_canal.head()

Unnamed: 0,id_canal,total_visualizacoes_dia
0,Ucroh1v-fymunbimrkl0y0xq,49697
1,Ucayih2y5jbeosubqoqhufda,15020
2,Ucotipyf8_rgzj1lptmvadaa,13960
3,Uce9jri0yq5sm6h5qrz9fzla,10397
4,Ucrucdh_bcfx77xpxhyq2fjw,6136


In [152]:
df_views_canal = df_views_canal.head(10)

In [153]:
df_views_canal.head()

Unnamed: 0,id_canal,total_visualizacoes_dia
0,Ucroh1v-fymunbimrkl0y0xq,49697
1,Ucayih2y5jbeosubqoqhufda,15020
2,Ucotipyf8_rgzj1lptmvadaa,13960
3,Uce9jri0yq5sm6h5qrz9fzla,10397
4,Ucrucdh_bcfx77xpxhyq2fjw,6136


In [154]:
fig = px.bar(
    df_views_canal, 
    x='total_visualizacoes_dia',
    y='id_canal', 
    orientation='h', 
    text_auto=True, 
    category_orders={'id_canal': df_views_canal['id_canal']}
)
fig.update_layout(
    title_text='TOP 10 VIEWS dia',
    showlegend=True,
    title=dict(x=0.5, font=dict(color='white')),
    plot_bgcolor='#1F2326',
    margin=dict(l=10, r=20, t=40, b=20, pad=2),
    paper_bgcolor='#1F2326',
    yaxis=dict(title='', tickfont=dict(color='white', size=16), tickmode='array', ticklen=1),
    xaxis=dict(title='', tickfont=dict(color='white'), visible=False),
    legend=dict(font=dict(color='white'), orientation='h', y=4)
)
    
fig.update_traces(
    textfont_color='white',
    marker_color='#F11A8E',
    textfont_size=18,
    textposition='outside',  
)
fig.show() 

# A duração do vídeo é importante para visualizações e interação (curtidas/comentários)?

In [167]:
tipos = {
    'duracao_video_minutos': 'float',
    'total_comentarios' : 'int32',
    'total_likes': 'int32',
    'total_visualizacoes': 'int32'
}

In [183]:
df_duracao_video = pd.read_sql_query(
    ' SELECT duracao_video_minutos, '
    ' total_comentarios,'
    ' total_likes, '
    ' total_visualizacoes'
    ' FROM total_visualizacoes_por_semana '
    ' WHERE ID_CANAL IN ("UCrOH1V-FyMunBIMrKL0y0xQ") '
    '    AND data_extracao="2023-10-27"  '
    ,
    con=engine,
    dtype=tipos
    )
df_duracao_video.head()

Unnamed: 0,duracao_video_minutos,total_comentarios,total_likes,total_visualizacoes
0,0.0,0,50,0
1,26.88,220,5600,42650
2,25.67,213,6855,54375
3,28.43,124,6448,50789
4,32.28,180,5046,39265


In [184]:
fig = px.scatter(df_duracao_video, x='duracao_video_minutos', y='total_comentarios', trendline='ols')
fig.update_xaxes(title_text='DURACAO_VIDEO_MINUTOS', row=1, col=1)
fig.update_yaxes(title_text='TOTAL_LIKES', row=1, col=1)
fig.show()

In [185]:
results = px.get_trendline_results(fig)

print(results.px_fit_results.iloc[0].summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.171
Model:                            OLS   Adj. R-squared:                  0.146
Method:                 Least Squares   F-statistic:                     6.997
Date:                Mon, 04 Dec 2023   Prob (F-statistic):             0.0123
Time:                        21:38:15   Log-Likelihood:                -214.50
No. Observations:                  36   AIC:                             433.0
Df Residuals:                      34   BIC:                             436.2
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         41.0773     34.999      1.174      0.2

In [169]:
fig = make_subplots(rows=1, cols=3, subplot_titles=['DURACAO_VIDEO_MINUTOS X TOTAL_LIKES', 'DURACAO_VIDEO_MINUTOS X TOTAL_COMENTARIOS', 'DURACAO_VIDEO_MINUTOS X TOTAL_VISUALIZACOES'])
sc1 = px.scatter(df_duracao_video, x='duracao_video_minutos', y='total_likes',)
fig.add_trace(sc1['data'][0], row=1, col=1)
fig.update_xaxes(title_text='DURACAO_VIDEO_MINUTOS', row=1, col=1)
fig.update_yaxes(title_text='TOTAL_LIKES', row=1, col=1)

sc2 = px.scatter(df_duracao_video, x='duracao_video_minutos', y='total_comentarios')
fig.add_trace(sc2['data'][0], row=1, col=2)
fig.update_xaxes(title_text='DURACAO_VIDEO_MINUTOS', row=1, col=2)
fig.update_yaxes(title_text='TOTAL_COMENTARIOS', row=1, col=2)
fig.update_layout(title_text='Desenpenho Canal', showlegend=True)

sc3 = px.scatter(df_duracao_video, x='duracao_video_minutos', y='total_visualizacoes')
fig.add_trace(sc3['data'][0], row=1, col=3)
fig.update_xaxes(title_text='DURACAO_VIDEO_MINUTOS', row=1, col=3)
fig.update_yaxes(title_text='TOTAL_VISUALIZACOES', row=1, col=3)
fig.update_layout(title_text='Desenpenho Canal', showlegend=True)


# Desepenho dos Canais (like, comentários, vísualizações) Faça Comparação


In [187]:
tipos = {
    'data_extracao': 'datetime64[ns]',
    'id_canal': 'string',
    'total_visualizacoes_dia' : 'int32',
    'total_likes_dia' : 'int32',
    'total_comentarios_dia' : 'int32'
}

In [189]:
df_duracao_video = pd.read_sql_query(
    ' SELECT data_extracao, ' 
    ' id_canal, ' 
    ' total_visualizacoes_dia, ' 
    ' total_likes_dia  , ' 
    ' total_comentarios_dia '
    ' FROM total_visualizacoes_por_semana '
    ' WHERE  ID_CANAL IN ("UCrOH1V-FyMunBIMrKL0y0xQ" , "UCe9jrI0YQ5SM6h5QRZ9FZlA")  ',
    dtype=tipos,
    con=engine
    )

df_duracao_video.head()

Unnamed: 0,data_extracao,id_canal,total_visualizacoes_dia,total_likes_dia,total_comentarios_dia
0,2023-10-15,UCrOH1V-FyMunBIMrKL0y0xQ,4562,687,23
1,2023-10-16,UCrOH1V-FyMunBIMrKL0y0xQ,2837,236,6
2,2023-10-16,UCrOH1V-FyMunBIMrKL0y0xQ,7914,1517,55
3,2023-10-19,UCe9jrI0YQ5SM6h5QRZ9FZlA,5011,795,48
4,2023-10-19,UCe9jrI0YQ5SM6h5QRZ9FZlA,7819,2011,120


In [190]:
df_duracao_video = df_duracao_video.groupby(['data_extracao', 'id_canal']) \
    .agg(
        total__visualizacoes=('total_visualizacoes_dia', 'sum'),
        total_likes=('total_likes_dia', 'sum'),
        total_comentarios=('total_comentarios_dia', 'sum')
    ).reset_index()

In [192]:
df_duracao_video.head()

Unnamed: 0,data_extracao,id_canal,total__visualizacoes,total_likes,total_comentarios
0,2023-10-15,UCrOH1V-FyMunBIMrKL0y0xQ,9124,1374,46
1,2023-10-16,UCrOH1V-FyMunBIMrKL0y0xQ,21502,3506,122
2,2023-10-19,UCe9jrI0YQ5SM6h5QRZ9FZlA,20649,4817,288
3,2023-10-19,UCrOH1V-FyMunBIMrKL0y0xQ,133688,21350,686
4,2023-10-20,UCe9jrI0YQ5SM6h5QRZ9FZlA,28906,3486,221


In [209]:
fig = px.line(df_duracao_video, x='data_extracao', y='total__visualizacoes', color='id_canal')
fig.update_layout(
    title_text='Comparação Visualizacao',
    showlegend=True,
    title=dict(x=0.5, font=dict(color='white')),
    plot_bgcolor='#1F2326',
    yaxis=dict(visible=False),  
    margin=dict(l=20, r=20, t=40, b=20, pad=6),
    paper_bgcolor='#1F2326',
    xaxis=dict(title='', tickfont=dict(color='white'), showgrid=False),
    legend=dict(font=dict(color='white')),
    xaxis_tickformat = '%d/%m/%Y',
    bargap=0.09

)
fig.update_traces(
    textfont_color='white',
    textfont_size=16 ,
     
)
fig.show()

# Wordcloud TAGS

In [60]:
df_wordcloud_tags = pd.read_sql_query(
    ' SELECT tags '
    ' FROM total_visualizacoes_por_semana '
    ' WHERE assunto = "assunto_cities_skylines" ',
    con=engine
)
df_wordcloud_tags.head()

Unnamed: 0,tags
0,[]
1,"[""cities skylines"",""cities skylines gameplay"",..."
2,"[""xbae8vvXZp1LBYAKst0k"",""Machinimas"",""GAC"",""Ga..."
3,"[""euro truck 2"",""lucas gameplays brasil"",""mapa..."
4,"[""review"",""gameplay"",""jogo pc"",""jogos pc"",""pc ..."


In [152]:
df_wordcloud_tags_copy = df_wordcloud_tags.copy()

In [153]:
df_wordcloud_tags_copy.drop_duplicates(inplace=True)

In [154]:
df_wordcloud_tags_copy.head()

Unnamed: 0,tags
0,[]
1,"[""cities skylines"",""cities skylines gameplay"",..."
2,"[""xbae8vvXZp1LBYAKst0k"",""Machinimas"",""GAC"",""Ga..."
3,"[""euro truck 2"",""lucas gameplays brasil"",""mapa..."
4,"[""review"",""gameplay"",""jogo pc"",""jogos pc"",""pc ..."


In [155]:
teste = ["cities skylines","cities skylines gameplay"]

def partir_lista(teste):
     return ','.join(str(item) for item in teste)

partir_lista(teste=teste)

'cities skylines,cities skylines gameplay'

In [156]:
def converter_para_lista(texto):
    texto = texto.replace('[', '').replace(']', '').replace('"', '')
    lista = texto.split(', ')
    return lista


In [157]:
df_wordcloud_tags_copy.info()

<class 'pandas.core.frame.DataFrame'>
Index: 108 entries, 0 to 1446
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   tags    108 non-null    object
dtypes: object(1)
memory usage: 1.7+ KB


In [158]:
df_wordcloud_tags_copy['tags'] = df_wordcloud_tags_copy['tags'].apply(converter_para_lista)

In [159]:
df_wordcloud_tags_copy.head()

Unnamed: 0,tags
0,[]
1,"[cities skylines,cities skylines gameplay,citi..."
2,"[xbae8vvXZp1LBYAKst0k,Machinimas,GAC,Gameplay,..."
3,"[euro truck 2,lucas gameplays brasil,mapa eaa,..."
4,"[review,gameplay,jogo pc,jogos pc,pc game,joga..."


In [160]:
df_wordcloud_tags_copy['tags_str'] = df_wordcloud_tags_copy['tags'].apply(partir_lista)
df_wordcloud_tags_copy.head()

Unnamed: 0,tags,tags_str
0,[],
1,"[cities skylines,cities skylines gameplay,citi...","cities skylines,cities skylines gameplay,citie..."
2,"[xbae8vvXZp1LBYAKst0k,Machinimas,GAC,Gameplay,...","xbae8vvXZp1LBYAKst0k,Machinimas,GAC,Gameplay,P..."
3,"[euro truck 2,lucas gameplays brasil,mapa eaa,...","euro truck 2,lucas gameplays brasil,mapa eaa,m..."
4,"[review,gameplay,jogo pc,jogos pc,pc game,joga...","review,gameplay,jogo pc,jogos pc,pc game,jogan..."


- Média likes, comentarios like / visualizacoes ou likes / visualizacoes

# Métricas especificas TRENDS

# Categrias Populares por data de extração

In [225]:
tipos = {
    'data_extracao': 'datetime64[ns]',
    'nm_canal': 'string',
    'titulo_video': 'string',
    'id_categoria': 'string',
    'total_visualizacoes_dia': 'int32',
    'total_likes_dia' : 'float32',
    'total_comentarios_dia': 'float32'
}

In [226]:
df_categoria_populares = pd.read_sql_query(
    ' SELECT data_extracao, ' 
    ' nm_canal, '
    ' titulo_video, '
    ' id_categoria, ' 
    ' total_visualizacoes_dia, '  
    ' total_likes_dia, '
    ' total_comentarios_dia '
    '  FROM trends_youtube '
    ' where data_extracao = "2023-10-15" ',
    con=engine,
    dtype=tipos
    )
df_categoria_populares.head()

Unnamed: 0,data_extracao,nm_canal,titulo_video,id_categoria,total_visualizacoes_dia,total_likes_dia,total_comentarios_dia
0,2023-10-15,東映アニメーション公式YouTubeチャンネル,“Dragon Ball DAIMA” Teaser Trailer / Fall 2024,1,4141018,171952.0,17758.0
1,2023-10-15,Paulinho e Toquinho Family,INCRÍVEL PISCINA de BOLINHAS DENTRO do CARRO,1,269891,3684.0,114.0
2,2023-10-15,A24,The Iron Claw | Official Trailer HD | A24,1,4279413,78174.0,5415.0
3,2023-10-15,Record TV Americas,Roberto Cabrini mostra a realidade das vítimas...,1,61574,,140.0
4,2023-10-15,Vivziepop,Happy Day in Hell | Prime Video,1,1895056,233452.0,13365.0


In [227]:
df_categoria_populares.memory_usage(deep=True)

Index                        128
data_extracao               1848
nm_canal                   16511
titulo_video               32335
id_categoria               13624
total_visualizacoes_dia      924
total_likes_dia              924
total_comentarios_dia        924
dtype: int64

In [228]:
df_categoria_populares.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 231 entries, 0 to 230
Data columns (total 7 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   data_extracao            231 non-null    datetime64[ns]
 1   nm_canal                 231 non-null    string        
 2   titulo_video             231 non-null    string        
 3   id_categoria             231 non-null    string        
 4   total_visualizacoes_dia  231 non-null    int32         
 5   total_likes_dia          226 non-null    float32       
 6   total_comentarios_dia    222 non-null    float32       
dtypes: datetime64[ns](1), float32(2), int32(1), string(3)
memory usage: 10.1 KB


In [229]:
df_categoria_populares.groupby(['data_extracao', 'id_categoria']) \
    .agg(
        total_visualizacoes=('total_visualizacoes_dia', 'sum'),
        total_likes = ('total_likes_dia', 'sum'),
        total_comentarios_dia=('total_comentarios_dia', 'sum')
    ).reset_index()

Unnamed: 0,data_extracao,id_categoria,total_visualizacoes,total_likes,total_comentarios_dia
0,2023-10-15,1,10646952,487262.0,36792.0
1,2023-10-15,10,188470120,8054410.0,505272.0
2,2023-10-15,17,25971018,942816.0,50166.0
3,2023-10-15,20,29314189,1747021.0,116548.0
4,2023-10-15,22,6772581,620345.0,49841.0
5,2023-10-15,23,1361139,68949.0,1486.0
6,2023-10-15,24,285267745,11743071.0,572240.0
7,2023-10-15,25,11520764,248596.0,13235.0
8,2023-10-15,26,49953,9076.0,690.0
9,2023-10-15,27,5215689,263761.0,12350.0


- Vídeo populares (likes, comentarios e visualizações)

In [230]:
df_categoria_populares.head()

Unnamed: 0,data_extracao,nm_canal,titulo_video,id_categoria,total_visualizacoes_dia,total_likes_dia,total_comentarios_dia
0,2023-10-15,東映アニメーション公式YouTubeチャンネル,“Dragon Ball DAIMA” Teaser Trailer / Fall 2024,1,4141018,171952.0,17758.0
1,2023-10-15,Paulinho e Toquinho Family,INCRÍVEL PISCINA de BOLINHAS DENTRO do CARRO,1,269891,3684.0,114.0
2,2023-10-15,A24,The Iron Claw | Official Trailer HD | A24,1,4279413,78174.0,5415.0
3,2023-10-15,Record TV Americas,Roberto Cabrini mostra a realidade das vítimas...,1,61574,,140.0
4,2023-10-15,Vivziepop,Happy Day in Hell | Prime Video,1,1895056,233452.0,13365.0


- Canais populares (likes, comentarios e visualizações)

In [231]:
df_categoria_populares.groupby(['data_extracao', 'nm_canal']) \
    .agg(
        total_visualizacoes=('total_visualizacoes_dia', 'sum'),
        total_likes = ('total_likes_dia', 'sum'),
        total_comentarios_dia=('total_comentarios_dia', 'sum')
    ).reset_index()

Unnamed: 0,data_extracao,nm_canal,total_visualizacoes,total_likes,total_comentarios_dia
0,2023-10-15,(G)I-DLE (여자)아이들 (Official YouTube Channel),8627209,476803.0,19048.0
1,2023-10-15,30PRAUM,932037,116511.0,9296.0
2,2023-10-15,5incominutos,165650,27148.0,1994.0
3,2023-10-15,A Fazenda,1190662,25723.0,0.0
4,2023-10-15,A24,4279413,78174.0,5415.0
...,...,...,...,...,...
183,2023-10-15,ge,3921474,109948.0,9405.0
184,2023-10-15,k a m a i t a c h i,113624,28687.0,2104.0
185,2023-10-15,starshipTV,29838674,780081.0,45617.0
186,2023-10-15,viniccius13,1607585,259444.0,10361.0


- Desempenho da categoria por dia

In [250]:
df_desempenho_categoria_dia = pd.read_sql_query(
    ' SELECT data_extracao, ' 
    ' nm_canal, '
    ' titulo_video, '
    ' id_categoria, ' 
    ' total_visualizacoes_dia, '  
    ' total_likes_dia, '
    ' total_comentarios_dia '
    '  FROM trends_youtube '
    ' where id_categoria = 20 ',
    con=engine,
    dtype=tipos
    )
df_desempenho_categoria_dia.head()

Unnamed: 0,data_extracao,nm_canal,titulo_video,id_categoria,total_visualizacoes_dia,total_likes_dia,total_comentarios_dia
0,2023-10-15,PlayStation,PlayStation 5 - Same Immersive Power. New Slim...,20,1481250,38623.0,3777.0
1,2023-10-15,Flakes Power,URGENTE!!! FORTNITE,20,195887,10604.0,491.0
2,2023-10-15,Ana Toys games,"COMO ACHAR TODOS OS DOCES EXTREME, HARD E INSA...",20,90064,2200.0,82.0
3,2023-10-15,CBLOL,PSG Talon x LOUD (Jogo 1) - Worlds 2023: Fase ...,20,108681,3230.0,234.0
4,2023-10-15,CBLOL,LOUD x GAM Esports (Jogo 1) - Worlds 2023: Fas...,20,176296,8923.0,261.0


In [251]:
df_desempenho_categoria_dia.groupby('data_extracao').agg(
    total_visualizacoes=('total_visualizacoes_dia', 'sum'),
    total_likes=('total_likes_dia', 'sum'),
    total_comentarios=('total_comentarios_dia', 'sum')
).reset_index()

Unnamed: 0,data_extracao,total_visualizacoes,total_likes,total_comentarios
0,2023-10-15,29314189,1747021.0,116548.0
1,2023-10-16,8515606,487937.0,28578.0
2,2023-10-19,8804495,438514.0,16182.0
3,2023-10-20,2081774,89122.0,10456.0
4,2023-10-21,6227678,257911.0,8012.0
5,2023-10-22,8052190,401900.0,37357.0
6,2023-10-23,8572491,706958.0,43658.0
7,2023-10-24,9262972,419687.0,30551.0
8,2023-10-25,4755872,161188.0,26452.0
9,2023-10-26,6796190,261186.0,33919.0


- Top 10 vídeo em alta por categoria

In [259]:
tipos = {
    'nm_canal': 'string',
    'titulo_video': 'string',
    'id_categoria': 'string',
    'total_visualizacoes_dia': 'int32',
    'total_likes_dia' : 'float32',
    'total_comentarios_dia': 'float32'
}

In [265]:
df_top_10 = pd.read_sql_query(
    ' SELECT  ' 
    ' nm_canal, '
    ' titulo_video, '
    ' id_categoria, ' 
    ' total_visualizacoes_dia, '  
    ' total_likes_dia, '
    ' total_comentarios_dia '
    '  FROM trends_youtube '
    ' where id_categoria = 20 '
    ' AND data_extracao = "2023-10-26" ',
    con=engine,
    dtype=tipos
    )
df_top_10.head(10)

Unnamed: 0,nm_canal,titulo_video,id_categoria,total_visualizacoes_dia,total_likes_dia,total_comentarios_dia
0,Apex Legends,Apex Legends: Ignite Launch Trailer,20,147543,3248.0,220.0
1,Apex Legends,Apex Legends: Ignite Gameplay Trailer,20,840241,41044.0,3831.0
2,League of Legends,"HEARTSTEEL - PARANOIA ft. BAEKHYUN, tobi lou, ...",20,1935990,34047.0,1767.0
3,League of Legends,HEARTSTEEL - PARANOIA Music Video Teaser | Lea...,20,16016,597.0,3.0
4,Bern,Eu EXPULSEI o GELEIA da CREATIVE SQUAD,20,3567,113.0,1.0
5,Rockstar Games,Halloween in GTA Online,20,345895,17159.0,8217.0
6,JeffBlox 2,LOCALIZAÇÃO E COMO PEGAR O ESTILO DE LUTA ARTE...,20,453,5.0,214.0
7,Cortes do Mylon,MYLON REAGE: AS 20 PIORES PLAYS DA HISTÓRIA DO...,20,58108,3342.0,33.0
8,ARK: Survival Ascended,ARK: Survival Ascended Launch Trailer,20,1322140,66120.0,7019.0
9,Tex HS,A EXPERIÊNCIA DO AVIÃO ft CRIS MINEGIRL (Roblo...,20,53286,829.0,45.0
