# compare suggested videos for two users given a specific video

### In order to use this, you need first to use youtube.py in the folder src to download data based on a specific videoId.

Then we:
1. import all the needed libraries,
2. specify the path to the csv file (the output of dashboard/src/youtube.py),
3. check that the title of the video corresponds to the one we want to analyze

First of all, specify a path to a valid .csv file in the box below.

In [None]:
path = '/home/ubuntu/dashboard/outputs/video4.csv'

In [None]:
import pandas as pd
%matplotlib inline
from matplotlib_venn import venn2
import matplotlib.pyplot as plt
import squarify

df = pd.read_csv(path)

title = df['sourceTitle'][0]
df = df[['watcher','id','related_source','related_videoId','related_title','related_index']]

print(title)

### Now we need to know how many users have seen that video, and choose two of them in order to compare the videos suggested to each of them.

In [None]:
df.watcher.unique()

### We can now pick the two users, then we create two separate datasets.

In [None]:
user1 = 'icecream-pie-tea'
user2 = 'milk-quince-alfalfa'

df1 = df[df['watcher'] == user1]
df2 = df[df['watcher'] == user2]


### For each of the two users, we need to choose only one id (one specific session of recommended videos).
Then we reduce the datasets to that session only, so we can make a comparison.

First, choose among the unique values for df1, then for df2. Those will be id1 and id2.

In [None]:
df1.id.unique()

In [None]:
df2.id.unique()

In [None]:
id1 = '47c16bdc2358e51110003a626d822a61e6bc908f'
id2 = '120bce0960e72e333ebc5f95e48611c6b8d64b20'


df1 = df[df['id'] == id1]
df2 = df[df['id'] == id2]
df1.index = df1.related_index
df2.index = df2.related_index

df1 = df1[['related_source', 'related_title', 'related_videoId', 'related_index']]
df2 = df2[['related_source', 'related_title', 'related_videoId', 'related_index']]

### Now we can see which posts appeared to each user after watching the same video.

In [None]:
merge1 = df1[['related_title']].rename(columns={"related_title": user1})
merge2 = df2[['related_title']].rename(columns={"related_title": user2})
compare = pd.concat([merge1,merge2], axis=1)
compare

### Furthermore, we can use venn diagrams to visualize how many videos those users had in common (it is also possible to add a third user).

In [None]:
fig = plt.figure(figsize=(20,10))
plt.title('Video suggestions after: '+title)
v = venn2([set(df1.related_videoId), set(df2.related_videoId)], (user1, user2))

In [None]:
df1["uniqueId"] = df1["related_index"].map(str) + df1["related_videoId"]
df2["uniqueId"] = df2["related_index"].map(str) + df2["related_videoId"]

fig = plt.figure(figsize=(20,10))
plt.title('Video suggestions after: '+title+'\n With both videoId and position in the suggested list in common.')
v2 = venn2([set(df1.uniqueId), set(df2.uniqueId)], (user1, user2))

### Or see the most common suggested video sources (Youtube channels) for each user.

In [None]:
channels1 = df1.related_source.value_counts().rename_axis('name').to_frame('Count')
channels2 = df2.related_source.value_counts().rename_axis('name').to_frame('Count')

fig, (ax, ax2) = plt.subplots(ncols=2, figsize=(18, 8))


channels1.plot(kind='barh', ax=ax, title=user1)
channels2.plot(kind='barh', ax=ax2, title=user2)
ax2 = ax2.yaxis.set_label_position("right")

plt.tight_layout()

In [None]:
fig = plt.figure(figsize=(15,10))
plt.title('Suggested channels after watching: '+title+'\n User: '+user1)
squarify.plot(sizes=channels1.Count, label=channels1.index, alpha=.8)
plt.axis('off')
plt.show()

In [None]:
fig = plt.figure(figsize=(15,10))
plt.title('Suggested channels after watching: '+title+'\n User: '+user2)
squarify.plot(sizes=channels2.Count, label=channels2.index, alpha=.8)
plt.axis('off')
plt.show()