In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
import numpy as np
import sqlite3
import matplotlib.pyplot as plt
from bisect import bisect
from itertools import chain

In [3]:
# Connect database sqlite where the information regarding the Dribbble dataset are stored.
conn = sqlite3.connect("dribbble.db", detect_types = sqlite3.PARSE_DECLTYPES)
c = conn.cursor()

In [4]:
# Read 'likes' table.
likes = pd.read_sql("SELECT shot_id, like_id, created_at_unix AS created_at_like FROM likes", conn)

In [5]:
# Read 'shots' table.
shots = pd.read_sql("SELECT shot_id, created_at AS created_at_shot, id_author_shot, likes_count AS n_likes_at_end, id_team_username FROM shots", conn)

# Likes information at different temporal horizons from shot publication

We now compute the number of likes occured at different temporal horizons from the corresponding shot publication.

In [6]:
# Create a dataframe where for each 'shot_id' corresponds a list of all the likes creation (ordered in time).
l = likes.groupby("shot_id", as_index = False).agg(created_at_likes = ("created_at_like", lambda x: x.tolist()))
l

Unnamed: 0,shot_id,created_at_likes
0,1,"[1511374513, 1508036519, 1495089306, 149402130..."
1,2,"[1489177505, 1488304519, 1488215890, 148818972..."
2,26,"[1466700712, 1407257380, 1394066151, 132748508..."
3,27,"[1407257382, 1350659578, 1345118828, 1331742911]"
4,28,"[1430814323, 1407257384, 1392995395, 137115394..."
...,...,...
2451444,4058825,"[1514910097, 1514905934, 1514905733, 151490099..."
2451445,4058829,"[1514920969, 1514916394, 1514910096, 151490593..."
2451446,4058866,"[1514897657, 1514897074, 1514896535, 151489567..."
2451447,4058902,"[1514896958, 1514896328, 1514895642, 151489549..."


In [7]:
shots_likes = pd.merge(shots[["shot_id", "created_at_shot", "id_author_shot"]], l, on = "shot_id", how = "left")

In [8]:
shots_likes.isna().sum()
# There are some shots with no likes.

shot_id                 0
created_at_shot         0
id_author_shot          0
created_at_likes    23862
dtype: int64

In [9]:
def likes_after_x_from_shot_publication(df1, df2, x):
    label = f"n_likes_at_{x.replace(" ", "_")}"
    # Convert to seconds.
    seconds = int(pd.to_timedelta(x).total_seconds())
    df1["created_at_shot"] = df1["created_at_shot"] + seconds
    
    mask = df1["created_at_likes"].notnull()
    df1.loc[mask, label] = df1[mask].apply(lambda x: bisect(x["created_at_likes"], x["created_at_shot"]), axis = 1)
    df1[label] = df1[label].fillna(0).astype(int)

    df2 = pd.merge(df2, df1[["shot_id", label]], on = "shot_id", how = "left")
    return df2

In [10]:
# Example.
times = ["1 minutes", "10 minutes", "30 minutes", "1 hours", "8 hours", "16 hours", "1 days", "2 days", "3 days", "5 days", "7 days", "12 days", "20 days", "30 days", "90 days"]

In [19]:
# Convert to unix time.
shots_likes["created_at_shot"] = ((shots_likes["created_at_shot"] - pd.Timestamp("1970-01-01")) // pd.Timedelta("1s"))

In [16]:
df = shots[["shot_id"]]

In [20]:
for time in times:
    print(time)
    df = likes_after_x_from_shot_publication(shots_likes.copy(), df, time)

1 minutes
10 minutes
30 minutes
1 hours
8 hours
16 hours
1 days
2 days
3 days
5 days
7 days
12 days
20 days
30 days
90 days


In [21]:
df

Unnamed: 0,shot_id,n_likes_at_1_minutes,n_likes_at_10_minutes,n_likes_at_30_minutes,n_likes_at_1_hours,n_likes_at_8_hours,n_likes_at_16_hours,n_likes_at_1_days,n_likes_at_2_days,n_likes_at_3_days,n_likes_at_5_days,n_likes_at_7_days,n_likes_at_12_days,n_likes_at_20_days,n_likes_at_30_days,n_likes_at_90_days
0,3549658,0,0,51,51,51,51,51,51,51,51,51,51,51,51,51
1,3254544,0,0,0,0,101,101,101,101,101,101,101,101,101,101,101
2,3153930,0,0,0,0,0,0,0,192,192,192,192,192,192,192,192
3,3732805,0,0,0,0,44,44,44,44,44,44,44,44,44,44,44
4,2686725,0,0,0,0,0,27,27,27,27,27,27,27,27,27,27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2475306,3204628,0,0,0,0,25,25,25,25,25,25,25,25,25,25,25
2475307,2886712,0,0,0,0,20,20,20,20,20,20,20,20,20,20,20
2475308,3402864,0,0,0,0,10,10,10,10,10,10,10,10,10,10,10
2475309,3231855,0,0,0,0,0,30,30,30,30,30,30,30,30,30,30


In [22]:
df.to_csv("shots_likes.csv")