# Setup

In [1]:
import pandas as pd
from pathlib import Path
from matplotlib import pyplot as plt

In [2]:
import matplotlib
matplotlib.rc('font', size=10)

In [3]:
import matplotlib
matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False,
})

import os
os.environ["PATH"] += ';C:\\Users\\papse\\AppData\\Local\\Programs\\MiKTeX\\miktex\\bin\\x64'

In [4]:
gss_table = pd.read_stata(Path('./data/gss_data.dta'))
video_table = pd.read_parquet(Path('./data/video_data.parquet'))
traffic_table = pd.read_parquet(Path('./data/traffic_data.parquet'))
category_table = pd.read_parquet(Path('./data/category_data.parquet'))
population_table = pd.read_parquet(Path('./data/population_data.parquet'))

# Traffic

In [5]:
fig, axes = plt.subplots(figsize = [16,4])
axes.plot(traffic_table.iloc[57:, :].index, traffic_table.iloc[57:, :]['traffic_world'], label = 'Összesen')
axes.plot(traffic_table.iloc[57:, :].index, traffic_table.iloc[57:, :]['traffic_usa'], label = 'USA')

fig.set_size_inches(w = 6.30045, h = 6.30045 / 4)

axes.set_ylabel('Forgalom')

fig.legend(
    bbox_to_anchor=(0, -0.25, 1, 0.2),
    loc="center",
    ncol=2,
    framealpha=0,
    borderaxespad=0,
)

<matplotlib.legend.Legend at 0x2883aa15d60>

In [6]:
fig.savefig(Path('./figures/traffic_series.pgf'), bbox_inches='tight')

# Video

In [7]:
data_to_viz = (
    video_table.reset_index()
    .set_index("date")
    .groupby(pd.Grouper(freq="m"))
    .agg(video_num=("video_id", "count"), view=("view", "sum"))
)

In [8]:
fig, axes = plt.subplots()
fig.set_size_inches(w = 6.30045, h = 6.30045 / 4)

axes.plot(data_to_viz.index, data_to_viz.video_num, label = 'Videók száma')

axes_2 = axes.twinx()
axes_2.plot(
    data_to_viz.index, data_to_viz.view, label="Nézettség", color="darkorange"
)

axes.ticklabel_format(style="sci", axis="y", scilimits=(0, 0))

axes.set_ylabel('Videók száma')
axes_2.set_ylabel('Nézettség \n2020-ban')

fig.legend(
    bbox_to_anchor=(0, -0.25, 1, 0.2),
    loc="center",
    ncol=2,
    framealpha=0,
    borderaxespad=0,
)

<matplotlib.legend.Legend at 0x2885fd7d550>

In [9]:
fig.savefig(Path('./figures/video_all_series.pgf'), bbox_inches='tight')

In [10]:
rename_cat = lambda x: {
    "Amateur": "Amateur",
    "Verified Amateurs": "Amateur",
    "Lesbian": "Gay",
    "Hardcore": "Hardcore",
    "Gay": "Gay",
}[x]

In [11]:
data_to_viz = (
    category_table[
        lambda df: df["category"].isin(
            ["Hardcore", "Amateur", "Verified Amateurs", "Lesbian", "Gay"]
        )
    ]
    .assign(category=lambda df: df["category"].apply(rename_cat))
    .reset_index()
    .drop_duplicates()
    .set_index("video_id")
    .join(video_table[["view", "date"]])
    .reset_index()
    .set_index("date")
    .groupby([pd.Grouper(freq="m"), "category"])["video_id"]
    .count()
)

In [12]:
fig, axes = plt.subplots(figsize = [16,4])

axes.plot(data_to_viz[slice(None), 'Amateur'].index, data_to_viz[slice(None), 'Amateur'], label = 'Amatőr')
axes.plot(data_to_viz[slice(None), 'Gay'].index, data_to_viz[slice(None), 'Gay'], label = 'Homoszexuális')
axes.plot(data_to_viz[slice(None), 'Hardcore'].index, data_to_viz[slice(None), 'Hardcore'], label = 'Hardcore')

fig.set_size_inches(w = 6.30045, h = 6.30045 / 4)

axes.set_ylabel('Videók száma')

axes.ticklabel_format(style="sci", axis="y", scilimits=(0, 0))

fig.legend(
    bbox_to_anchor=(0, -0.25, 1, 0.2),
    loc="center",
    ncol=3,
    framealpha=0,
    borderaxespad=0,
)

<matplotlib.legend.Legend at 0x2885fcf7550>

In [13]:
fig.savefig(Path('./figures/video_cat_series.pgf'), bbox_inches='tight')