In [1]:
%load_ext lab_black

In [2]:
# Set the figure size - handy for larger output
from matplotlib import pyplot as plt
import matplotlib.backends.backend_pdf
import pandas as pd
from dateutil import parser

plt.rcParams["figure.figsize"] = [10, 6]
# Set up with a higher resolution screen (useful on Mac)
%config InlineBackend.figure_format = 'retina'

In [3]:
TYPE_PAGE_LIST = ["news", "politicians"]
TYPE_SOCIAL_LIST = ["facebook", "instagram"]
COUNTRY_PAGE_LIST = ["austria", "italy", "uk"]

MONTHS_2019 = {"dec": 12}
MONTHS_2020 = {
    "jan": 1,
    "feb": 2,
    "mar": 3,
    "apr": 4,
    "may": 5,
    "jun": 6,
    "jul": 7,
}

DATA_PATH = "Data/"

COLUMNS_TYPES_FB = {}
COLUMNS_TYPES_IG = {}

## Read Data

In [4]:
df_map = {}
df_months_map = {
    "dec": {},
    "jan": {},
    "feb": {},
    "mar": {},
    "apr": {},
    "may": {},
    "jun": {},
    "jul": {},
}

In [5]:
for type_page in TYPE_PAGE_LIST:
    for type_social in TYPE_SOCIAL_LIST:
        for country_page in COUNTRY_PAGE_LIST:
            df_map["_".join((type_page, type_social, country_page))] = pd.read_csv(
                DATA_PATH + "_".join((type_page, type_social, country_page)) + ".csv"
            )
            df_map["_".join((type_page, type_social, country_page))][
                "Created"
            ] = df_map["_".join((type_page, type_social, country_page))][
                "Created"
            ].apply(
                lambda x: parser.parse(x)
            )



In [None]:
df_map["news_facebook_uk"].head()

### Create DFs Map per Month

In [None]:
for name_df, df in df_map.items():
    df_months_map[list(MONTHS_2019.keys())[0]][name_df] = df_map[name_df][
        (df_map[name_df]["Created"] >= "2019-12-01")
        & (df_map[name_df]["Created"] < "2020-01-01")
    ]

for name_df, df in df_map.items():
    for month in MONTHS_2020.keys():
        df_months_map[month][name_df] = df_map[name_df][
            (df_map[name_df]["Created"] >= "2020-{}-01".format(MONTHS_2020[month]))
            & (df_map[name_df]["Created"] < "2020-{}-01".format(MONTHS_2020[month] + 1))
        ]

### Create Lists for plotting bar charts

In [None]:
lists_months_map = {
    "dec": {},
    "jan": {},
    "feb": {},
    "mar": {},
    "apr": {},
    "may": {},
    "jun": {},
    "jul": {},
}

In [None]:
def compute_metrics(df, social):
    if social == "instagram":
        mean_followers = df["Followers at Posting"].mean()
        interactions = df["Likes"].sum() + df["Comments"].sum()  # + df["Views"].sum()
        # print(interactions, mean_followers)
        n_video = len(df.loc[df["Type"].isin(["Video", "IGTV"])])
        # n_video = df[(df["Type"] == "Video") or (df["Type"] == "IGTV")].count()
        video_total_views = df["Views"].sum() / 100000
        # computed_df = df[['a','d']]
    else:
        mean_followers = df["Likes at Posting"].mean()
        interactions = (
            df["Likes"].sum()
            + df["Comments"].sum()
            + df["Shares"].sum()
            + df["Love"].sum()
            + df["Wow"].sum()
            + df["Haha"].sum()
            + df["Sad"].sum()
            + df["Angry"].sum()
            + df["Care"].sum()
        ) 
        # print(interactions, mean_followers)
        n_video = len(df.loc[df["Type"].isin(["Video", "Native Video"])])
        # n_video = df[(df["Type"] == "Video") or (df["Type"] == "IGTV")].count()
        video_total_views = df["Post Views"].sum() / 100000
        # computed_df = df[['a','d']]
    engagement = (interactions / mean_followers) * 100
    return engagement, n_video, video_total_views

In [None]:
# per ogni mese, prendo ogni df di quel mese e calcolo per ogni pagina:
# - la somma totale di likes,comments,shares
# - la media dei followers/likes alla pagina durante il mese ->
# questi primi due per il calcolo dell'engagement
# - numero di video condivisi
# - numero totale di views dei video
# creo le liste -> [nomi pagine], [engagement], [numero di video condivisi], [numero views video]
i = 0
for month, df_map_month in df_months_map.items():
    for df_name, df_data in df_map_month.items():
        lists_months_map[month][df_name] = {
            "page_names": [],
            "engagements": [],
            "share_videos": [],
            "video_views": [],
        }
        social_type = df_name.split("_")[1]
        # print(social_type)
        if social_type == "instagram":
            df_data = df_data.drop(
                [
                    "Photo",
                    "Title",
                    "Description",
                    "Image Text",
                    "Sponsor Id",
                    "Sponsor Name",
                ],
                axis=1,
            )
            grouped = df_data.groupby(
                ["Account"]
            )  # .apply(lambda row: compute_metrics(row))
        else:
            df_data = df_data.drop(
                [
                    "Video Length",
                    "Message",
                    "Link",
                    "Final Link",
                    "Image Text",
                    "Link Text",
                    "Description",
                    "Sponsor Id",
                    "Sponsor Name",
                ],
                axis=1,
            )
            grouped = df_data.groupby(["Page Name"])
        for name, group in grouped:
            engagement, n_video, video_total_views = compute_metrics(group, social_type)
            lists_months_map[month][df_name]["page_names"].append(name)
            lists_months_map[month][df_name]["engagements"].append(engagement)
            lists_months_map[month][df_name]["share_videos"].append(n_video)
            lists_months_map[month][df_name]["video_views"].append(video_total_views)

In [None]:
figures = {
    "dec": {"instagram": [], "facebook": []},
    "jan": {"instagram": [], "facebook": []},
    "feb": {"instagram": [], "facebook": []},
    "mar": {"instagram": [], "facebook": []},
    "apr": {"instagram": [], "facebook": []},
    "may": {"instagram": [], "facebook": []},
    "jun": {"instagram": [], "facebook": []},
    "jul": {"instagram": [], "facebook": []},
}

### 2019 Plots

In [None]:
month = "December 2019"
for name_df, lists_map in lists_months_map["dec"].items():
    country = name_df.split("_")[2]
    social_ = name_df.split("_")[1]
    pages_names = lists_map["page_names"]
    engagements = lists_map["engagements"]
    share_videos = lists_map["share_videos"]
    video_views = lists_map["video_views"]
    df = pd.DataFrame(
        {
            "engagement (% rate)": engagements,
            "n. video shared": share_videos,
            "video views (100k)": video_views,
        },
        index=pages_names,
    )
    ax = df.sort_values("engagement (% rate)", ascending=False).plot(
        kind="bar",
        title="{} - {}, {}".format(month, social_.capitalize(), country.capitalize()),
        rot=90,
    )
    figures["dec"][social_].append(ax)

### 2020 Plots

In [None]:
months_2020_complete = {
    "January 2020": "jan",
    "February 2020": "feb",
    "March 2020": "mar",
    "April 2020": "apr",
    "May 2020": "may",
    "June 2020": "jun",
    "July 2020": "jul",
}
for month_title, month_cut in months_2020_complete.items():
    for name_df, lists_map in lists_months_map[month_cut].items():
        country = name_df.split("_")[2]
        social_ = name_df.split("_")[1]
        pages_names = lists_map["page_names"]
        engagements = lists_map["engagements"]
        share_videos = lists_map["share_videos"]
        video_views = lists_map["video_views"]
        df = pd.DataFrame(
            {
                "engagement (% rate)": engagements,
                "n. video shared": share_videos,
                "video views (100k)": video_views,
            },
            index=pages_names,
        )
        if len(df) > 0:
            ax = df.sort_values("engagement (% rate)", ascending=False).plot(
                kind="bar",
                title="{} - {}, {}".format(
                    month_title, social_.capitalize(), country.capitalize()
                ),
                rot=90,
            )
            figures[month_cut][social_].append(ax)

### Generate Final PDFs

In [None]:
for month, socials in figures.items():
    for social, plots in socials.items():
        if month == "dec":
            name_file = "Output/2019_{}_{}.pdf".format(month, social)
        else:
            name_file = "Output/2020_{}_{}.pdf".format(month, social)
        pdf = matplotlib.backends.backend_pdf.PdfPages(name_file)
        for fig in plots:  ## will open an empty extra figure :(
            fig = fig.get_figure()
            # print(type(fig))
            pdf.savefig(fig, bbox_inches="tight")
        pdf.close()

## Top-10 Performing Posts per Page

In [23]:
def get_best_posts(df, social_type):
    if social_type == "instagram":
        df["interactions"] = df["Likes"] + df["Comments"]
        df = df.sort_values(
            by=["interactions", "Overperforming Score"], ascending=False
        ).iloc[0:10]
    else:
        df["interactions"] = (
            df["Likes"]
            + df["Comments"]
            + df["Shares"]
            + df["Love"]
            + df["Haha"]
            + df["Wow"]
            + df["Sad"]
            + df["Care"]
            + df["Angry"]
        )
        df = df.sort_values(
            by=["interactions", "Overperforming Score"], ascending=False
        ).iloc[0:10]
    return df

In [27]:
df_ig_list = []
df_fb_list = []
for df_name, df_data in df_map.items():
    social_type = df_name.split("_")[1]
    if social_type == "instagram":
        grouped = df_data.groupby(["Account"])
    else:
        grouped = df_data.groupby(["Page Name"])
    for name, group in grouped:
        df_copy = group.copy()
        df = get_best_posts(df_copy, social_type)
        if social_type == "instagram":
            df_ig_list.append(df)
        else:
            df_fb_list.append(df)
    # print(df.head())

In [28]:
result_ig = pd.concat(df_ig_list).reset_index()
result_fb = pd.concat(df_fb_list).reset_index()

result_ig.to_csv("top_10_posts_ig.csv")
result_fb.to_csv("top_10_posts_fb.csv")