In [15]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from warnings import filterwarnings
filterwarnings('ignore')
import sys
sys.path.append(r"C:\Programming\Brand Perception Dashboard\src")
from utils.functions import *

In [2]:
import os

# Get the current working directory
cwd = os.getcwd()

# Print the current working directory
print("Current working directory:", cwd)

Current working directory: c:\Programming\Brand Perception Dashboard\src\data\notebooks


In [16]:
df_stocks_days = pd.read_csv("../tab2/aggregated_stocks_values_days.csv")
df_stocks_weeks = pd.read_csv("../tab2/aggregated_stocks_values_weeks.csv")
df_stocks_weeks["Brand Name"] = df_stocks_weeks["Brand Name"].str.replace("HM", "H&M")
brands_list = df_stocks_days["Brand Name"].unique()
brands_list = np.sort(brands_list)
# remove AirBnb and Pepsi from numpy array
brands_list = brands_list[np.where((brands_list != "Airbnb") & (brands_list != "Pepsi"))]
brands_dict = {brand: None for brand in brands_list}
# brands_dict_weekly = {brand: None for brand in brands_list}

In [16]:
def p_value_map(x):
    return "***" if x < 0.001 else "**" if x < 0.01 else "*" if x < 0.05 else "'" if x < 0.1 else ""

In [18]:
df_tweets_sentiment_daily = pd.read_csv("../tab2/twitter_sentiment_daily_percent.csv")
df_tweets_sentiment_weekly = pd.read_csv("../tab2/twitter_sentiment_weekly_percent.csv")
df_tweets_count_daily = pd.read_csv("../tab2/twitter_count_daily.csv")
df_tweets_count_weekly = pd.read_csv("../tab2/twitter_count_weekly.csv")

In [4]:
df_stocktwits_daily = pd.read_csv("../tab2/stocktwits_daily.csv")
df_stocktwits_weekly = pd.read_csv("../tab2/stocktwits_weekly.csv")
df_stocktwits_count_daily = pd.read_csv("../tab2/stocktwits_daily_count.csv")
df_stocktwits_count_weekly = pd.read_csv("../tab2/stocktwits_weekly_count.csv")

In [3]:
df_yougov_daily = pd.read_csv("data/tab2/yougov_daily.csv")
df_yougov_daily.rename(columns={"Brand": "brand"}, inplace=True)
df_yougov_weekly = pd.read_csv("data/tab2/yougov_weekly.csv")
df_yougov_weekly.rename(columns={"Brand": "brand"}, inplace=True)
df_yougov_weekly["brand"] = df_yougov_weekly["brand"].str.replace("HM", "H&M")

yougov_brand_presence = ["Awareness","Attention","WOM Exposure","Ad Awareness","Buzz"]
yougov_brand_image = ["Impression","Quality","Value","Recommend","Satisfaction","Reputation","BrandIndex"]
yougov_brand_relationship = ["Consideration", "Purchase Intent", "Current Customer ", "Former Customer"]

In [5]:
# To pd.DataFrame
def create_df(df_stocks,df_score, score):
    for i in brands_list:
        brands_dict[i] = pearson_correlation(df_stocks[df_stocks["Brand Name"] == i], df_score[df_score["brand"] == i], "Close", score)
    df_brands_score = pd.DataFrame.from_dict(brands_dict, orient="index", columns=[score,"p-value"])
    df_brands_score["p-value"] = df_brands_score["p-value"].apply(p_value_map)
    df_brands_score[score] = df_brands_score[score].round(2)
    df_brands_score[score] = df_brands_score[score].astype(str) + df_brands_score["p-value"]
    df_brands_score.drop(columns=["p-value"], inplace=True)
    # df_brands_score.columns = pd.MultiIndex.from_product([[period],df_brands_score.columns])
    return df_brands_score



In [None]:
df_daily_polarity = create_df(df_stocks_days,df_tweets_sentiment_daily, "polarity")
df_daily_count = create_df(df_stocks_days,df_tweets_count_daily, "tweets_count")
df_daily_polarity
df_daily_count

In [27]:
# Merge the two dataframes
df_daily_twitter = pd.merge(df_daily_polarity, df_daily_count, left_index=True, right_index=True)
df_daily_twitter

Unnamed: 0,polarity,tweets_count
Apple,-0.15**,0.28***
Coca-Cola,-0.02,0.15**
HM,-0.29***,-0.04
Nike,-0.01,-0.09
Starbucks,0.35***,0.1'


In [28]:
df_weekly_polarity = create_df(df_stocks_weeks,df_tweets_sentiment_weekly, "polarity")
df_weekly_count = create_df(df_stocks_weeks,df_tweets_count_weekly, "tweets_count")
df_weekly_twitter = pd.merge(df_weekly_polarity, df_weekly_count, left_index=True, right_index=True)
df_weekly_twitter

Unnamed: 0,polarity,tweets_count
Apple,-0.29*,0.41***
Coca-Cola,0.07,0.2'
HM,-0.34**,-0.1
Nike,-0.05,-0.14
Starbucks,0.46***,0.23'


In [9]:
df_daily_stocktwits_polarity = create_df(df_stocks_days,df_stocktwits_daily, "polarity")
df_daily_stocktwits_count = create_df(df_stocks_days,df_stocktwits_count_daily, "stocktwits_count")
df_daily_stocktwits = pd.merge(df_daily_stocktwits_polarity, df_daily_stocktwits_count, left_index=True, right_index=True)
df_daily_stocktwits

Unnamed: 0,polarity,stocktwits_count
Apple,0.79,0.23
Coca-Cola,-0.29***,-0.12
HM,-0.51,0.05
Nike,0.31***,0.05
Starbucks,0.42***,-0.08


In [10]:
df_weekly_stocktwits_polarity = create_df(df_stocks_weeks,df_stocktwits_weekly, "polarity")
df_weekly_stocktwits_count = create_df(df_stocks_weeks,df_stocktwits_count_weekly, "stocktwits_count")
df_weekly_stocktwits = pd.merge(df_weekly_stocktwits_polarity, df_weekly_stocktwits_count, left_index=True, right_index=True)
df_weekly_stocktwits

Unnamed: 0,polarity,stocktwits_count
Apple,0.0,0.0
Coca-Cola,-0.3',-0.2
HM,-0.63,0.02
Nike,0.58***,0.07
Starbucks,0.6***,-0.18


In [23]:
df_daily_yougov_pressence = pd.DataFrame()
for pressence in yougov_brand_presence:
    df_pressence = create_df(df_stocks_days,df_yougov_daily, pressence)
    df_daily_yougov_pressence = pd.concat([df_daily_yougov_pressence, df_pressence], axis=1)
df_daily_yougov_pressence

Unnamed: 0,Awareness,Attention,WOM Exposure,Ad Awareness,Buzz
Apple,0.13**,0.13**,0.12*,0.01,0.01
Coca-Cola,-0.1*,0.21***,0.03,0.39***,0.04
HM,0.17***,0.26***,0.21***,0.2***,0.11*
Nike,0.14**,-0.09',-0.05,0.01,0.22***
Starbucks,0.02,0.08',0.18***,0.29***,0.11*


In [17]:
df_weekly_yougov_pressence = pd.DataFrame()
for pressence in yougov_brand_presence:
    df_pressence = create_df(df_stocks_weeks,df_yougov_weekly, pressence)
    df_weekly_yougov_pressence = pd.concat([df_weekly_yougov_pressence, df_pressence], axis=1)

In [18]:
df_weekly_yougov_pressence

Unnamed: 0,Awareness,Attention,WOM Exposure,Ad Awareness,Buzz
Apple,0.34**,0.2',0.24*,0.04,0.03
Coca-Cola,-0.15,0.45***,0.07,0.62***,0.16
HM,0.3**,0.53***,0.38***,0.45***,0.25*
Nike,0.36***,-0.09,-0.05,-0.01,0.41***
Starbucks,0.14,0.19',0.35**,0.41***,0.27*


In [8]:
df_weekly_yougov_image = pd.DataFrame()
for image in yougov_brand_image:
    df_image = create_df(df_stocks_weeks,df_yougov_weekly, image)
    df_weekly_yougov_image = pd.concat([df_weekly_yougov_image, df_image], axis=1)
df_weekly_yougov_image

Unnamed: 0,Impression,Quality,Value,Recommend,Satisfaction,Reputation,BrandIndex
Apple,-0.06,0.23*,0.07,0.05,0.1,-0.1,0.05
Coca-Cola,-0.19',-0.2',-0.09,-0.2',-0.11,-0.28**,-0.23*
HM,0.02,0.15,0.14,0.13,0.13,0.16,0.15
Nike,0.43***,0.39***,0.31**,0.42***,0.32**,0.42***,0.42***
Starbucks,0.21',0.22*,-0.1,0.25*,0.19',0.17,0.19'


In [9]:
df_weekly_yougov_relationship = pd.DataFrame()
for relationship in yougov_brand_relationship:
    df_relationship = create_df(df_stocks_weeks,df_yougov_weekly, relationship)
    df_weekly_yougov_relationship = pd.concat([df_weekly_yougov_relationship, df_relationship], axis=1)
df_weekly_yougov_relationship

Unnamed: 0,Consideration,Purchase Intent,Current Customer,Former Customer
Apple,0.14,0.13,0.14,0.28**
Coca-Cola,0.0,-0.16,0.18',-0.27*
HM,0.21',0.03,0.34**,-0.07
Nike,0.26*,0.14,0.1,0.2'
Starbucks,0.39***,0.43***,0.43***,-0.21*


In [22]:
# To pd.DataFrame
def create_df_volume(df_stocks,df_score, score):
    for i in brands_list:
        brands_dict[i] = pearson_correlation(df_stocks[df_stocks["Brand Name"] == i], df_score[df_score["brand"] == i], "Volume", score)
    df_brands_score = pd.DataFrame.from_dict(brands_dict, orient="index", columns=[score,"p-value"])
    df_brands_score["p-value"] = df_brands_score["p-value"].apply(p_value_map)
    df_brands_score[score] = df_brands_score[score].round(2)
    df_brands_score[score] = df_brands_score[score].astype(str) + df_brands_score["p-value"]
    df_brands_score.drop(columns=["p-value"], inplace=True)
    # df_brands_score.columns = pd.MultiIndex.from_product([[period],df_brands_score.columns])
    return df_brands_score

In [26]:
# Create correlation for volume
df_daily_stock_volume= create_df_volume(df_stocks_days,df_stocktwits_count_daily, "stocktwits_count")
df_weekly_stock_volume = create_df_volume(df_stocks_weeks,df_stocktwits_count_weekly, "stocktwits_count")
df_volume_stocktwits = pd.merge(df_daily_stock_volume, df_weekly_stock_volume, left_index=True, right_index=True)
df_volume_stocktwits

Unnamed: 0,stocktwits_count_x,stocktwits_count_y
Apple,0.08,0.0
Coca-Cola,0.34***,0.34*
HM,0.09,0.0
Nike,0.78***,0.84***
Starbucks,0.65***,0.73***


Cross correlation for Starbucks and Nike for Stocktiwts

In [32]:

lag_periods, lag_step = 105, 7

yougov_brand_presence = ["Awareness","Attention","WOM Exposure","Ad Awareness","Buzz"]
# symbols_dict = {"Awareness":"circle", "Attention":"square", "WOM Exposure":"diamond", "Ad Awareness":"cross", "Buzz":"triangle-up"}
symbols_dict_pressence = {"Awareness":{'symbol': 'circle'}, "Attention":{'symbol': 'square'}, "WOM Exposure":{'symbol': 'diamond'}, "Ad Awareness":{'symbol': "x"}, "Buzz":{'symbol': 'triangle-up'}}
colors_dict = {"Awareness":"#1f77b4", "Attention":"#ff7f0e", "WOM Exposure":"#2ca02c", "Ad Awareness":"#d62728", "Buzz":"#9467bd"}
# Relatinoship
yougov_brand_relationship = ["Consideration", "Purchase Intent", "Current Customer ", "Former Customer"]
symbols_dict_relationship = {"Consideration":{'symbol': 'circle'}, "Purchase Intent":{'symbol': 'square'}, "Current Customer ":{'symbol': 'diamond'}, "Former Customer":{'symbol': "x"}}
colors_dict.update({"Consideration":"#1f77b4", "Purchase Intent":"#ff7f0e", "Current Customer ":"#2ca02c", "Former Customer":"#d62728"})

In [26]:
def cross_plot(df_stocks,df_metrics, brand, metrics, symbols_dict,lag_periods,legend_x,legend_y):
    df_filtered_stocks = df_stocks[df_stocks["Brand Name"] == brand]
    df_filtered_metrics = df_metrics[df_metrics["brand"] == brand]
    title = brand
    if len(metrics) > 1:
        # df_cross_correlation = cross_correlation(df_filtered_stocks, df_filtered_metrics,"Close", metrics[0],lag_periods ,lag_step)
        cross_correlation_fig = go.Figure()
        for metric in metrics:
            color = colors_dict[metric]
            df_cross_correlation = cross_correlation(df_filtered_stocks, df_filtered_metrics,"Close", metric,lag_periods ,lag_step)
            cross_correlation_fig.add_trace(
                go.Scatter(
                    x=df_cross_correlation["Lag"],
                    y=df_cross_correlation["Correlation"], 
                    mode='lines+markers', 
                    name=metric,
                    showlegend=True,
                    marker=dict(symbols_dict[metric]),
                    line=dict(color=color)
                )
            )

    cross_correlation_fig.update_layout(
    hovermode='x',
    hoverlabel=dict(bgcolor="white"),
    template="simple_white",
    height=550, width=1000,margin={'l': 30, 'b': 40, 't': 30, 'r': 40},
    title={
                'text': title,
                'y':1,
                'x':0.515,
                'xanchor': 'center',
                'yanchor': 'top',
                # 'font': {"size": 16}
                },
    yaxis_title_standoff = 0,
    font=dict(
        family='Times New Roman',
        size=18,
        # color='#7f7f7f'
    ),
    legend=dict(
                  yanchor="top",
                  y=legend_y,
                  xanchor="left",
                  x=legend_x,
                  bgcolor='rgba(0, 0, 0, 0)'
    )
    # xaxis_hoverformat = "%{x:.2f} asdsad"
    # showlegend = False
    )

    cross_correlation_fig.update_traces(hovertemplate = "%{y:.2f}"
                                        ,marker=dict(size=11)),
    cross_correlation_fig.update_xaxes(title = f" Stock Price Lag (weeks)",showline=True,  linecolor='black', mirror=True)
    cross_correlation_fig.update_yaxes(title_text= "Correlation",showline=True,  linecolor='black', mirror=True)
    cross_correlation_fig.add_vline(x=0, line_width=2, line_dash="dash", line_color="blue")
    # Show plot
    config = {
    'toImageButtonOptions': {
        'format': 'png', # one of png, svg, jpeg, webp
        'filename': 'custom_image',
        # 'height': 5000,
        # 'width': 700,
        'scale':4 # Multiply title/legend/axis/canvas sizes by this factor
    }
    }

  
    cross_correlation_fig.show(config=config)

In [183]:
cross_plot(df_stocks_weeks,df_yougov_weekly, "Apple", ["Awareness", "Attention","WOM Exposure"],symbols_dict_pressence,14*7)
#   legend=dict(
#                   yanchor="top",
#                   y=0.3,
#                   xanchor="left",
#                   x=0.8,
#     )

In [203]:
cross_plot(df_stocks_weeks,df_yougov_weekly, "Coca-Cola", ["Attention", "Ad Awareness","Buzz"],symbols_dict_pressence,8*7)
    # legend=dict(
    #               yanchor="top",
    #               y=0.95,
    #               xanchor="left",
    #               x=0.8,
    # )

In [36]:
cross_plot(df_stocks_weeks,df_yougov_weekly, "H&M", yougov_brand_presence,symbols_dict_pressence,10*7,0.82,1.02)

In [49]:
cross_plot(df_stocks_weeks,df_yougov_weekly, "Nike", ["Awareness", "Attention","Ad Awareness", "Buzz"],symbols_dict_pressence,15*7,0.82,0.27)

In [63]:
cross_plot(df_stocks_weeks,df_yougov_weekly, "Starbucks", ["Awareness","WOM Exposure","Ad Awareness", "Buzz"],symbols_dict_pressence,15*7,0.02,1)

In [34]:
cross_plot(df_stocks_weeks,df_yougov_weekly, "H&M", yougov_brand_relationship[1:],symbols_dict_relationship,15*7,0.02,1)

In [40]:
cross_plot(df_stocks_weeks,df_yougov_weekly, "Coca-Cola", yougov_brand_relationship[2:],symbols_dict_relationship,15*7,0.02,1)

In [10]:
yougov_brand_relationship

['Consideration', 'Purchase Intent', 'Current Customer ', 'Former Customer']

In [12]:
df_filtered_stocks_nike = df_stocks_weeks[df_stocks_weeks["Brand Name"] == "Nike"]
df_filtered_stocks_starbucks = df_stocks_weeks[df_stocks_weeks["Brand Name"] == "Starbucks"]
df_filtered_metrics_nike = df_yougov_weekly[df_yougov_weekly["brand"] == "Nike"]
df_filtered_metrics_starbucks = df_yougov_weekly[df_yougov_weekly["brand"] == "Starbucks"]


cross_correlation_fig = go.Figure()
df_cross_correlation = cross_correlation(df_filtered_stocks_nike, df_filtered_metrics_nike,"Close", "BrandIndex",lag_periods ,lag_step)

# color = colors_dict[metric]

cross_correlation_fig.add_trace(
    go.Scatter(
        x=df_cross_correlation["Lag"],
        y=df_cross_correlation["Correlation"], 
        mode='lines+markers', 
        name="Nike",
        showlegend=True,
        # marker=dict(symbols_dict[metric]),
        # line=dict(color=color)
    )
)
df_cross_correlation = cross_correlation(df_filtered_stocks_starbucks, df_filtered_metrics_starbucks,"Close", "BrandIndex",lag_periods ,lag_step)
cross_correlation_fig.add_trace(
    go.Scatter(
        x=df_cross_correlation["Lag"],
        y=df_cross_correlation["Correlation"], 
        mode='lines+markers', 
        name="Starbucks",
        showlegend=True,
        # marker=dict(symbols_dict[metric]),
        # line=dict(color=color)
    )
)


cross_correlation_fig.update_layout(
hovermode='x',
hoverlabel=dict(bgcolor="white"),
template="simple_white",
height=550, width=1000,margin={'l': 30, 'b': 40, 't': 30, 'r': 40},
# title={
#             'text': title,
#             'y':1,
#             'x':0.51,
#             'xanchor': 'center',
#             'yanchor': 'top',
#             # 'font': {"size": 16}
#             },
yaxis_title_standoff = 0,
font=dict(
    family='Times New Roman',
    size=18,
    # color='#7f7f7f'
),
legend=dict(
                yanchor="top",
                y=1,
                xanchor="left",
                x=0.86,
                bgcolor='rgba(0, 0, 0, 0)'
)
# xaxis_hoverformat = "%{x:.2f} asdsad"
# showlegend = False
)

cross_correlation_fig.update_traces(hovertemplate = "%{y:.2f}"
                                    ,marker=dict(size=11)),
cross_correlation_fig.update_xaxes(title = f" Stock Price Lag (weeks)",showline=True,  linecolor='black', mirror=True)
cross_correlation_fig.update_yaxes(title_text= "Correlation",showline=True,  linecolor='black', mirror=True)
cross_correlation_fig.add_vline(x=0, line_width=2, line_dash="dash", line_color="blue")
# Show plot
config = {
'toImageButtonOptions': {
    'format': 'png', # one of png, svg, jpeg, webp
    'filename': 'custom_image',
    # 'height': 5000,
    # 'width': 700,
    'scale':5 # Multiply title/legend/axis/canvas sizes by this factor
}
}


cross_correlation_fig.show(config=config)

In [28]:

period_d_w = ["Daily", "Weekly"]
brands_plot = ["Coca-Cola", "Nike", "Starbucks"]
colors_dict = {"Apple":"#1f77b4", "Coca-Cola":"#ff7f0e", "HM":"#2ca02c", "Nike":"#d62728", "Starbucks":"#9467bd"}

In [40]:
# df_filtered_stocks_nike = df_stocks_weeks[df_stocks_weeks["Brand Name"] == "Nike"]
# df_filtered_stocks_starbucks = df_stocks_weeks[df_stocks_weeks["Brand Name"] == "Starbucks"]
# df_filtered_metrics_nike = df_stocktwits_count_daily[df_stocktwits_count_daily["brand"] == "Nike"]
# df_filtered_metrics_starbucks = df_yougov_weekly[df_yougov_weekly["brand"] == "Starbucks"]


cross_correlation_fig = go.Figure()

for brand in brands_plot:
    df_cross_correlation = cross_correlation(df_stocks_days[df_stocks_days["Brand Name"] == brand], df_stocktwits_count_daily[df_stocktwits_count_daily["brand"] == brand],
                                             "Volume", "stocktwits_count",8 ,1)
    color = colors_dict[brand]
    cross_correlation_fig.add_trace(
    go.Scatter(
        x=df_cross_correlation["Lag"],
        y=df_cross_correlation["Correlation"], 
        mode='lines+markers', 
        name=brand,
        showlegend=True,
        # marker=dict(symbols_dict[metric]),
        line=dict(color=color)
    )
)



cross_correlation_fig.update_layout(
hovermode='x',
hoverlabel=dict(bgcolor="white"),
template="simple_white",
height=550, width=1000,margin={'l': 30, 'b': 40, 't': 30, 'r': 40},
# xaxis=dict(tickmode='linear', dtick=1),
yaxis_title_standoff = 0,
font=dict(
    family='Times New Roman',
    size=18,
    # color='#7f7f7f'
),
legend=dict(
                yanchor="top",
                y=1,
                xanchor="left",
                x=0.86,
                bgcolor='rgba(0, 0, 0, 0)'
)
# xaxis_hoverformat = "%{x:.2f} asdsad"
# showlegend = False
)

cross_correlation_fig.update_traces(hovertemplate = "%{y:.2f}"
                                    ,marker=dict(size=11)),
cross_correlation_fig.update_xaxes(title = f" Stock Volume Lag (days)",showline=True,  linecolor='black', mirror=True)
cross_correlation_fig.update_yaxes(title_text= "Correlation",showline=True,  linecolor='black', mirror=True)
cross_correlation_fig.add_vline(x=0, line_width=2, line_dash="dash", line_color="blue")
# Show plot
config = {
'toImageButtonOptions': {
    'format': 'png', # one of png, svg, jpeg, webp
    'filename': 'custom_image',
    # 'height': 5000,
    # 'width': 700,
    'scale':5 # Multiply title/legend/axis/canvas sizes by this factor
}
}


cross_correlation_fig.show(config=config)