In [1]:
import pandas as pd
import plotly.express as px

In [2]:
df = pd.read_csv('datasets/trends-new-year.csv')
df

Unnamed: 0,Semana,gimnasio,dieta,ahorrar
0,2021-01-03,27,59,9
1,2021-01-10,24,67,8
2,2021-01-17,25,60,8
3,2021-01-24,23,56,9
4,2021-01-31,19,60,10
...,...,...,...,...
256,2025-11-30,23,27,10
257,2025-12-07,22,26,7
258,2025-12-14,22,24,6
259,2025-12-21,27,19,4


In [3]:
df = df.melt(
    id_vars='Semana',
    var_name='keyword',
    value_name='interest'
)

df

Unnamed: 0,Semana,keyword,interest
0,2021-01-03,gimnasio,27
1,2021-01-10,gimnasio,24
2,2021-01-17,gimnasio,25
3,2021-01-24,gimnasio,23
4,2021-01-31,gimnasio,19
...,...,...,...
778,2025-11-30,ahorrar,10
779,2025-12-07,ahorrar,7
780,2025-12-14,ahorrar,6
781,2025-12-21,ahorrar,4


In [4]:
iso = pd.to_datetime(df['Semana']).dt.isocalendar()

df['year'] = iso.year
df['week'] = iso.week

df = df[df['year'] >= 2021].reset_index(drop=True)

df['baseline_year'] = (
    df.groupby(['keyword', 'year'])['interest']
      .transform('mean')
)

df['above_baseline'] = df['interest'] > df['baseline_year']

df['is_hype'] = (
    df['above_baseline']
    .groupby([df['keyword'], df['year']])
    .apply(lambda x: (x & x.shift(1)) | (x & x.shift(-1)))
    .reset_index(level=[0,1], drop=True)
)

df

Unnamed: 0,Semana,keyword,interest,year,week,baseline_year,above_baseline,is_hype
0,2021-01-10,gimnasio,24,2021,1,19.326923,True,True
1,2021-01-17,gimnasio,25,2021,2,19.326923,True,True
2,2021-01-24,gimnasio,23,2021,3,19.326923,True,True
3,2021-01-31,gimnasio,19,2021,4,19.326923,False,False
4,2021-02-07,gimnasio,19,2021,5,19.326923,False,False
...,...,...,...,...,...,...,...,...
775,2025-11-30,ahorrar,10,2025,48,6.788462,True,True
776,2025-12-07,ahorrar,7,2025,49,6.788462,True,True
777,2025-12-14,ahorrar,6,2025,50,6.788462,False,False
778,2025-12-21,ahorrar,4,2025,51,6.788462,False,False


In [5]:
def get_hype_intervals(df):
    intervals = []

    for year, g in df.groupby('year'):
        g = g.sort_values('week')

        in_hype = False
        start_week = None

        for _, row in g.iterrows():
            if row['is_hype'] and not in_hype:
                in_hype = True
                start_week = row['week']

            elif not row['is_hype'] and in_hype:
                intervals.append((year, start_week, prev_week))
                in_hype = False

            prev_week = row['week']

        if in_hype:
            intervals.append((year, start_week, prev_week))

    return intervals


In [6]:
for keyword in df['keyword'].unique():
    df_keyword = df[df['keyword'] == keyword]

    fig = px.line(
        df_keyword,
        x='week',
        y='interest',
        color='year',
        line_group='year',
        markers=False,
        title=f'Interés semanal por {keyword.upper()} en Perú - Comparación interanual',
        labels={
            'week': 'Semana',
            'interest': 'Índice de interés',
            'year': 'Año'
        },
        template='plotly_dark'
    )

    # Agregar bandas de hype
    hype_intervals = get_hype_intervals(df_keyword)

    for year, w_start, w_end in hype_intervals:
        fig.add_vrect(
            x0=w_start - 0.5,
            x1=w_end + 0.5,
            fillcolor='rgba(255, 215, 0, 0.05)',  # dorado suave
            layer='below',
            line_width=0
        )

    fig.update_layout(
        xaxis=dict(dtick=4),
        hovermode='x unified'
    )

    fig.update_xaxes(showgrid=False)
    fig.update_yaxes(rangemode='tozero', showgrid=False)

    fig.show()


In [7]:
hype_duration = (
    df[df['is_hype']]
    .groupby(['keyword', 'year'])
    .size()
    .reset_index(name='weeks_above_baseline')
)
hype_duration

Unnamed: 0,keyword,year,weeks_above_baseline
0,ahorrar,2021,13
1,ahorrar,2022,18
2,ahorrar,2023,29
3,ahorrar,2024,23
4,ahorrar,2025,25
5,dieta,2021,16
6,dieta,2022,27
7,dieta,2023,22
8,dieta,2024,20
9,dieta,2025,27


In [9]:
fig = px.line(
    hype_duration,
    x='year',
    y='weeks_above_baseline',
    color='keyword',
    markers=True,
    template='plotly_dark',
    title='Evolución del hype anual'
)

fig.update_layout(
    xaxis_title='Año',
    yaxis_title='Semanas sobre el baseline',
    hovermode='x unified'
)

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)

fig.show()


In [8]:
fig = px.bar(
    hype_duration,
    x='year',
    y='weeks_above_baseline',
    color='keyword',
    barmode='group',
    template='plotly_dark',
    title='Duración del hype anual (semanas sobre baseline)'
)

fig.update_layout(
    xaxis_title='Año',
    yaxis_title='Semanas sobre el baseline',
    legend_title='Keyword'
)

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)

fig.show()
