In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
import sqlite3

In [3]:
# Connect to database sqlite where is stored the information regarding the Dribbble dataset.
conn = sqlite3.connect("dribbble_temporary.db", detect_types = sqlite3.PARSE_DECLTYPES)
c = conn.cursor()

# Extra information

Let's add some extra column information to our tables related to the *id* of the users. 

For example in the *followers* table, we have only information about the usernames of the users (source and destination). We want to add also the information related to the corresponding *id*s. In addition, we insert an extra useful information column into the *likes* and *comments* tables regarding the authors of the shots.

## Table *shots*

In [4]:
# Add the information regarding the id of the author of the shot.
c.execute("""ALTER TABLE shots ADD COLUMN id_author_shot INT""")
c.execute("""REPLACE INTO shots (rowid, author_shot, team_username, shot_id, title, description, width, height, views_count, likes_count, comments_count, attachments_count, rebounds_count, buckets_count, created_at, updated_at, animated, team_id, team_name, team_bio, team_location, team_buckets_count, team_comments_received_count, team_followers_count, team_followings_count, team_likes_count, team_likes_received_count, team_projects_count, team_rebounds_received_count, team_shots_count, team_can_upload_shot, team_type, team_pro, team_created_at, team_updated_at, team_members_count, id_author_shot)
                SELECT s.rowid, s.author_shot, s.team_username, s.shot_id, s.title, s.description, s.width, s.height, s.views_count, s.likes_count, s.comments_count, s.attachments_count, s.rebounds_count, s.buckets_count, s.created_at, s.updated_at, s.animated, s.team_id, s.team_name, s.team_bio, s.team_location, s.team_buckets_count, s.team_comments_received_count, s.team_followers_count, s.team_followings_count, s.team_likes_count, s.team_likes_received_count, s.team_projects_count, s.team_rebounds_received_count, s.team_shots_count, s.team_can_upload_shot, s.team_type, s.team_pro, s.team_created_at, s.team_updated_at, s.team_members_count, u.id
                    FROM shots s LEFT JOIN users u ON s.author_shot = u.username""")
conn.commit()

In [5]:
# Add the information regarding the id of the team_username of the shot.
c.execute("""ALTER TABLE shots ADD COLUMN id_team_username INT""")
c.execute("""REPLACE INTO shots (rowid, author_shot, team_username, shot_id, title, description, width, height, views_count, likes_count, comments_count, attachments_count, rebounds_count, buckets_count, created_at, updated_at, animated, team_id, team_name, team_bio, team_location, team_buckets_count, team_comments_received_count, team_followers_count, team_followings_count, team_likes_count, team_likes_received_count, team_projects_count, team_rebounds_received_count, team_shots_count, team_can_upload_shot, team_type, team_pro, team_created_at, team_updated_at, team_members_count, id_author_shot, id_team_username)
                SELECT s.rowid, s.author_shot, s.team_username, s.shot_id, s.title, s.description, s.width, s.height, s.views_count, s.likes_count, s.comments_count, s.attachments_count, s.rebounds_count, s.buckets_count, s.created_at, s.updated_at, s.animated, s.team_id, s.team_name, s.team_bio, s.team_location, s.team_buckets_count, s.team_comments_received_count, s.team_followers_count, s.team_followings_count, s.team_likes_count, s.team_likes_received_count, s.team_projects_count, s.team_rebounds_received_count, s.team_shots_count, s.team_can_upload_shot, s.team_type, s.team_pro, s.team_created_at, s.team_updated_at, s.team_members_count, s.id_author_shot, u.id
                    FROM shots s LEFT JOIN users u ON s.team_username = u.username""")
conn.commit()

## Table *comments*

In [6]:
# Add the information regarding the id of the author of the comment.
c.execute("""ALTER TABLE comments ADD COLUMN id_author_comment INT""")
c.execute("""REPLACE INTO comments (rowid, shot_id, comment_id, created_at, updated_at, author_comment, comment, likes_count, id_author_comment)
                SELECT c.rowid, c.shot_id, c.comment_id, c.created_at, c.updated_at, c.author_comment, c.comment, c.likes_count, u.id
                    FROM comments c LEFT JOIN users u ON c.author_comment = u.username""")
conn.commit()

In [7]:
# Add the information regarding the author of the shot.
c.execute("""ALTER TABLE comments ADD COLUMN author_shot TEXT""")
c.execute("""REPLACE INTO comments (rowid, shot_id, comment_id, created_at, updated_at, author_comment, comment, likes_count, id_author_comment, author_shot)
                SELECT c.rowid, c.shot_id, c.comment_id, c.created_at, c.updated_at, c.author_comment, c.comment, c.likes_count, c.id_author_comment, s.author_shot
                    FROM comments c LEFT JOIN shots s ON c.shot_id = s.shot_id""")
conn.commit()

In [8]:
# Add the information regarding the id of the author of the shot.
c.execute("""ALTER TABLE comments ADD COLUMN id_author_shot INT""")
c.execute("""REPLACE INTO comments (rowid, shot_id, comment_id, created_at, updated_at, author_comment, comment, likes_count, id_author_comment, author_shot, id_author_shot)
                SELECT c.rowid, c.shot_id, c.comment_id, c.created_at, c.updated_at, c.author_comment, c.comment, c.likes_count, c.id_author_comment, c.author_shot, u.id
                    FROM comments c LEFT JOIN users u ON c.author_shot = u.username""")
conn.commit()

## Table *followers*

In [9]:
# Add the information regarding the id of the destination.
c.execute("""ALTER TABLE followers ADD COLUMN id_destination INT""")
c.execute("""REPLACE INTO followers (rowid, destination, created_at, source, id_destination)
                SELECT f.rowid, f.destination, f.created_at, f.source, u.id
                    FROM followers f LEFT JOIN users u ON f.destination = u.username""")
conn.commit()

In [10]:
# Add the information regarding the id of the source.
c.execute("""ALTER TABLE followers ADD COLUMN id_source INT""")
c.execute("""REPLACE INTO followers (rowid, destination, created_at, source, id_destination, id_source)
                SELECT f.rowid, f.destination, f.created_at, f.source, f.id_destination, u.id
                    FROM followers f LEFT JOIN users u ON f.source = u.username""")
conn.commit()

## Table *skills*

In [11]:
# Add the information regarding the id of the username.
c.execute("""ALTER TABLE skills ADD COLUMN id_username INT""")
c.execute("""REPLACE INTO skills (rowid, username, skill, id_username)
                SELECT s.rowid, s.username, s.skill, u.id
                    FROM skills s LEFT JOIN users u ON s.username = u.username""")
conn.commit()

## Table *tags*

In [12]:
# Add the information regarding the id of the author of the shot.
c.execute("""ALTER TABLE tags ADD COLUMN id_author_shot INT""")
c.execute("""REPLACE INTO tags (rowid, shot_id, author_shot, tag, id_author_shot)
                SELECT t.rowid, t.shot_id, t.author_shot, t.tag, u.id
                    FROM tags t LEFT JOIN users u ON t.author_shot = u.username""")
conn.commit()

## Table *likes*

In [13]:
# Add the information regarding the id of the author of the like.
c.execute("""ALTER TABLE likes ADD COLUMN id_author_like INT""")
c.execute("""REPLACE INTO likes (rowid, shot_id, like_id, created_at_unix, author_like, created_at, id_author_like)
                SELECT l.rowid, l.shot_id, l.like_id, l.created_at_unix, l.author_like, l.created_at, u.id
                    FROM likes l LEFT JOIN users u ON l.author_like = u.username""")
conn.commit()

In [14]:
# Add the information regarding the author of the shot.
c.execute("""ALTER TABLE likes ADD COLUMN author_shot TEXT""")
c.execute("""REPLACE INTO likes (rowid, shot_id, like_id, created_at_unix, author_like, created_at, id_author_like, author_shot)
                SELECT l.rowid, l.shot_id, l.like_id, l.created_at_unix, l.author_like, l.created_at, l.id_author_like, s.author_shot
                    FROM likes l LEFT JOIN shots s ON l.shot_id = s.shot_id""")
conn.commit()

In [15]:
# Add the information regarding the id of the author of the shot.
c.execute("""ALTER TABLE likes ADD COLUMN id_author_shot INT""")
c.execute("""REPLACE INTO likes (rowid, shot_id, like_id, created_at_unix, author_like, created_at, id_author_like, author_shot, id_author_shot)
                SELECT l.rowid, l.shot_id, l.like_id, l.created_at_unix, l.author_like, l.created_at, l.id_author_like, l.author_shot, u.id
                    FROM likes l LEFT JOIN users u ON l.author_shot = u.username""")
conn.commit()

# Index

An index is an additional data structure that helps improve the performance of a query.

### Table *users*

In [17]:
users = pd.read_sql("SELECT id, username, created_at FROM users", conn)

In [18]:
users.set_index("id").index.is_unique

True

In [19]:
users.set_index("username").index.is_unique

True

In [20]:
c.execute("""CREATE UNIQUE INDEX idx_users ON users (id)""")
conn.commit()

### Table *followers*

In [21]:
followers = pd.read_sql("SELECT id_destination, id_source, created_at FROM followers", conn)

In [22]:
followers.set_index("id_destination").index.is_unique

False

In [23]:
followers.set_index("id_source").index.is_unique

False

In [24]:
followers.set_index(["id_destination", "id_source"]).index.is_unique

True

In [25]:
c.execute("""CREATE UNIQUE INDEX idx_followers ON followers (id_destination, id_source)""")
conn.commit()

### Table *comments*

In [26]:
comments = pd.read_sql("SELECT shot_id, comment_id, created_at FROM comments", conn)

In [27]:
comments.set_index("comment_id").index.is_unique

True

In [28]:
c.execute("""CREATE UNIQUE INDEX idx_comments ON comments (comment_id)""")
conn.commit()

### Table *shots*

In [29]:
shots = pd.read_sql("SELECT shot_id, created_at, updated_at, id_author_shot, id_team_username FROM shots", conn)

In [30]:
shots.set_index("id_author_shot").index.is_unique

False

In [31]:
shots.set_index("shot_id").index.is_unique

True

In [32]:
c.execute("""CREATE UNIQUE INDEX idx_shots ON shots (shot_id)""")
conn.commit()

### Table *skills*

In [33]:
skills = pd.read_sql("SELECT id_username, skill FROM skills", conn)

In [34]:
skills.set_index("id_username").index.is_unique

False

In [35]:
skills.set_index("skill").index.is_unique

False

In [36]:
skills.set_index(["id_username", "skill"]).index.is_unique

True

In [37]:
c.execute("""CREATE UNIQUE INDEX idx_skills ON skills (id_username, skill)""")
conn.commit()

### Table *tags*

In [38]:
tags = pd.read_sql("SELECT shot_id, id_author_shot, tag FROM tags", conn)

In [39]:
tags.set_index("shot_id").index.is_unique

False

In [40]:
tags.set_index("id_author_shot").index.is_unique

False

In [41]:
tags.set_index("tag").index.is_unique

False

In [42]:
tags.set_index(["id_author_shot", "shot_id"]).index.is_unique

False

In [43]:
tags.set_index(["id_author_shot", "tag"]).index.is_unique

False

In [44]:
tags.set_index(["shot_id", "tag"]).index.is_unique

True

In [45]:
c.execute("""CREATE UNIQUE INDEX idx_tags ON tags (shot_id, tag)""")
conn.commit()

### Table *likes*

In [46]:
likes = pd.read_sql("SELECT shot_id, like_id FROM likes", conn)

In [47]:
likes.set_index("like_id").index.is_unique

True

In [48]:
c.execute("""CREATE UNIQUE INDEX idx_likes ON likes (like_id)""")
conn.commit()

In [49]:
c.execute("VACUUM");