In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
import sqlite3
import os

In [3]:
# Connect to database sqlite where is stored the information regarding the temporary Dribbble dataset.
conn = sqlite3.connect("dribbble_temporary.db", detect_types = sqlite3.PARSE_DECLTYPES)
c = conn.cursor()

In [4]:
# Create database sqlite where to store the information regarding the Dribbble dataset.
conn_new = sqlite3.connect("dribbble.db", detect_types = sqlite3.PARSE_DECLTYPES)
c_new = conn_new.cursor()

In [5]:
# Create connection between two databases.
c_new.execute("ATTACH DATABASE 'dribbble_temporary.db' AS old");

# Rearranging database

Into this notebook, we replace the cumulative features *_count* (e.g. likes_count, followings_count, etc) of the 'users' table and 'shots' table and we arrange the final database in a new file. 

## Table *users*

In [6]:
# Read some columns of 'users' table.
users = pd.read_sql("SELECT id, name, username, bio, location, can_upload_shot, type, pro, created_at, updated_at FROM users", conn)
users.head()

Unnamed: 0,id,name,username,bio,location,can_upload_shot,type,pro,created_at,updated_at
0,2005984,Chris Maffuccio,cmaffuccio,,"New York, NY",0,User,0,2017-11-27 02:42:13,2017-11-27 02:42:47
1,695302,Leonid Arestov,arestov_design,Web &amp; UX/UI designer arestov.design@gmail.com,"Moscow, Russia",1,Player,1,2014-11-17 10:35:07,2017-12-12 07:37:24
2,1946520,~/.space307,space307,"The big, friendly, talented Space307 team is d...","Russia, Saint Petersburg",1,Team,0,2017-10-16 15:03:02,2017-12-12 11:42:59
3,1875583,Hadeer,Hido,,eygpt,0,User,0,2017-08-25 11:22:24,2017-10-29 11:27:38
4,1386566,Avagana,Avagana,,,0,User,0,2016-09-22 10:31:57,2017-04-30 15:25:23


In [7]:
# Add 'likes_count'.
likes_count = pd.read_sql("SELECT * FROM 'likes:users_count_likes'", conn)
likes_count.head()

Unnamed: 0,username,likes_count
0,$$,1
1,$aro,4
2,---,21
3,------,16
4,-----12----34,7


In [8]:
# Merge on username.
users = pd.merge(users, likes_count, on = "username")
users.head()

Unnamed: 0,id,name,username,bio,location,can_upload_shot,type,pro,created_at,updated_at,likes_count
0,2005984,Chris Maffuccio,cmaffuccio,,"New York, NY",0,User,0,2017-11-27 02:42:13,2017-11-27 02:42:47,0
1,695302,Leonid Arestov,arestov_design,Web &amp; UX/UI designer arestov.design@gmail.com,"Moscow, Russia",1,Player,1,2014-11-17 10:35:07,2017-12-12 07:37:24,5484
2,1946520,~/.space307,space307,"The big, friendly, talented Space307 team is d...","Russia, Saint Petersburg",1,Team,0,2017-10-16 15:03:02,2017-12-12 11:42:59,2835
3,1875583,Hadeer,Hido,,eygpt,0,User,0,2017-08-25 11:22:24,2017-10-29 11:27:38,82
4,1386566,Avagana,Avagana,,,0,User,0,2016-09-22 10:31:57,2017-04-30 15:25:23,38


In [9]:
users.username.isna().sum()

0

In [10]:
users.likes_count.isna().sum()

0

In [11]:
# Add 'likes_received_count'.
likes_received_count = pd.read_sql("SELECT * FROM 'likes:likes_received_count_users'", conn)
likes_received_count.head()

Unnamed: 0,username,likes_received_count
0,-Alina-,470
1,-Alx-,35
2,-Eco-,71
3,-Geminisama,25
4,-K-,167


In [12]:
# Merge on username.
users = pd.merge(users, likes_received_count, on = "username")
users.head()

Unnamed: 0,id,name,username,bio,location,can_upload_shot,type,pro,created_at,updated_at,likes_count,likes_received_count
0,2005984,Chris Maffuccio,cmaffuccio,,"New York, NY",0,User,0,2017-11-27 02:42:13,2017-11-27 02:42:47,0,0
1,695302,Leonid Arestov,arestov_design,Web &amp; UX/UI designer arestov.design@gmail.com,"Moscow, Russia",1,Player,1,2014-11-17 10:35:07,2017-12-12 07:37:24,5484,4873
2,1946520,~/.space307,space307,"The big, friendly, talented Space307 team is d...","Russia, Saint Petersburg",1,Team,0,2017-10-16 15:03:02,2017-12-12 11:42:59,2835,0
3,1875583,Hadeer,Hido,,eygpt,0,User,0,2017-08-25 11:22:24,2017-10-29 11:27:38,82,0
4,1386566,Avagana,Avagana,,,0,User,0,2016-09-22 10:31:57,2017-04-30 15:25:23,38,0


In [13]:
users.username.isna().sum()

0

In [14]:
users.likes_received_count.isna().sum()

0

In [15]:
# Add 'comments_received_count'.
comments_received_count = pd.read_sql("SELECT * FROM 'shots:comments_received_count_users'", conn)
comments_received_count.head()

Unnamed: 0,username,comments_received_count
0,cmaffuccio,0
1,arestov_design,117
2,space307,0
3,Hido,0
4,Avagana,0


In [16]:
# Merge on username.
users = pd.merge(users, comments_received_count, on = "username")
users.head()

Unnamed: 0,id,name,username,bio,location,can_upload_shot,type,pro,created_at,updated_at,likes_count,likes_received_count,comments_received_count
0,2005984,Chris Maffuccio,cmaffuccio,,"New York, NY",0,User,0,2017-11-27 02:42:13,2017-11-27 02:42:47,0,0,0
1,695302,Leonid Arestov,arestov_design,Web &amp; UX/UI designer arestov.design@gmail.com,"Moscow, Russia",1,Player,1,2014-11-17 10:35:07,2017-12-12 07:37:24,5484,4873,117
2,1946520,~/.space307,space307,"The big, friendly, talented Space307 team is d...","Russia, Saint Petersburg",1,Team,0,2017-10-16 15:03:02,2017-12-12 11:42:59,2835,0,0
3,1875583,Hadeer,Hido,,eygpt,0,User,0,2017-08-25 11:22:24,2017-10-29 11:27:38,82,0,0
4,1386566,Avagana,Avagana,,,0,User,0,2016-09-22 10:31:57,2017-04-30 15:25:23,38,0,0


In [17]:
users.username.isna().sum()

0

In [18]:
users.comments_received_count.isna().sum()

0

In [19]:
# Add 'followers_count'.
followers_count = pd.read_sql("SELECT * FROM 'followers:users_count_followers'", conn)
followers_count.head()

Unnamed: 0,username,followers_count
0,cmaffuccio,0
1,arestov_design,1105
2,space307,475
3,Hido,0
4,Avagana,0


In [20]:
# Merge on username.
users = pd.merge(users, followers_count, on = "username")
users.head()

Unnamed: 0,id,name,username,bio,location,can_upload_shot,type,pro,created_at,updated_at,likes_count,likes_received_count,comments_received_count,followers_count
0,2005984,Chris Maffuccio,cmaffuccio,,"New York, NY",0,User,0,2017-11-27 02:42:13,2017-11-27 02:42:47,0,0,0,0
1,695302,Leonid Arestov,arestov_design,Web &amp; UX/UI designer arestov.design@gmail.com,"Moscow, Russia",1,Player,1,2014-11-17 10:35:07,2017-12-12 07:37:24,5484,4873,117,1105
2,1946520,~/.space307,space307,"The big, friendly, talented Space307 team is d...","Russia, Saint Petersburg",1,Team,0,2017-10-16 15:03:02,2017-12-12 11:42:59,2835,0,0,475
3,1875583,Hadeer,Hido,,eygpt,0,User,0,2017-08-25 11:22:24,2017-10-29 11:27:38,82,0,0,0
4,1386566,Avagana,Avagana,,,0,User,0,2016-09-22 10:31:57,2017-04-30 15:25:23,38,0,0,0


In [21]:
users.username.isna().sum()

0

In [22]:
users.followers_count.isna().sum()

0

In [23]:
# Add 'followings_count'.
followings_count = pd.read_sql("SELECT * FROM 'followers:users_count_followings'", conn)
followings_count.head()

Unnamed: 0,username,followings_count
0,cmaffuccio,7
1,arestov_design,736
2,space307,1974
3,Hido,74
4,Avagana,89


In [24]:
# Merge on username.
users = pd.merge(users, followings_count, on = "username")
users.head()

Unnamed: 0,id,name,username,bio,location,can_upload_shot,type,pro,created_at,updated_at,likes_count,likes_received_count,comments_received_count,followers_count,followings_count
0,2005984,Chris Maffuccio,cmaffuccio,,"New York, NY",0,User,0,2017-11-27 02:42:13,2017-11-27 02:42:47,0,0,0,0,7
1,695302,Leonid Arestov,arestov_design,Web &amp; UX/UI designer arestov.design@gmail.com,"Moscow, Russia",1,Player,1,2014-11-17 10:35:07,2017-12-12 07:37:24,5484,4873,117,1105,736
2,1946520,~/.space307,space307,"The big, friendly, talented Space307 team is d...","Russia, Saint Petersburg",1,Team,0,2017-10-16 15:03:02,2017-12-12 11:42:59,2835,0,0,475,1974
3,1875583,Hadeer,Hido,,eygpt,0,User,0,2017-08-25 11:22:24,2017-10-29 11:27:38,82,0,0,0,74
4,1386566,Avagana,Avagana,,,0,User,0,2016-09-22 10:31:57,2017-04-30 15:25:23,38,0,0,0,89


In [25]:
users.username.isna().sum()

0

In [26]:
users.followings_count.isna().sum()

0

In [27]:
# Add 'shots_count'.
shots_count = pd.read_sql("SELECT * FROM 'shots:users_count_shots'", conn)
shots_count.head()

Unnamed: 0,username,shots_count
0,cmaffuccio,0
1,arestov_design,65
2,space307,0
3,Hido,0
4,Avagana,0


In [28]:
# Merge on username.
users = pd.merge(users, shots_count, on = "username")
users.head()

Unnamed: 0,id,name,username,bio,location,can_upload_shot,type,pro,created_at,updated_at,likes_count,likes_received_count,comments_received_count,followers_count,followings_count,shots_count
0,2005984,Chris Maffuccio,cmaffuccio,,"New York, NY",0,User,0,2017-11-27 02:42:13,2017-11-27 02:42:47,0,0,0,0,7,0
1,695302,Leonid Arestov,arestov_design,Web &amp; UX/UI designer arestov.design@gmail.com,"Moscow, Russia",1,Player,1,2014-11-17 10:35:07,2017-12-12 07:37:24,5484,4873,117,1105,736,65
2,1946520,~/.space307,space307,"The big, friendly, talented Space307 team is d...","Russia, Saint Petersburg",1,Team,0,2017-10-16 15:03:02,2017-12-12 11:42:59,2835,0,0,475,1974,0
3,1875583,Hadeer,Hido,,eygpt,0,User,0,2017-08-25 11:22:24,2017-10-29 11:27:38,82,0,0,0,74,0
4,1386566,Avagana,Avagana,,,0,User,0,2016-09-22 10:31:57,2017-04-30 15:25:23,38,0,0,0,89,0


In [29]:
users.username.isna().sum()

0

In [30]:
users.shots_count.isna().sum()

0

In [31]:
# Add 'teams_count'.
teams_count = pd.read_sql("SELECT * FROM 'teams:users_count_teams'", conn)
teams_count.head()

Unnamed: 0,member_username,teams_count
0,-jk-,1
1,09ui,1
2,123OMGOMG,1
3,14eleven,1
4,1dollarpsd,1


In [32]:
# Merge on username.
users = pd.merge(users, teams_count, left_on = "username", right_on = "member_username", how = "left").drop("member_username", axis = 1)
users.head()

Unnamed: 0,id,name,username,bio,location,can_upload_shot,type,pro,created_at,updated_at,likes_count,likes_received_count,comments_received_count,followers_count,followings_count,shots_count,teams_count
0,2005984,Chris Maffuccio,cmaffuccio,,"New York, NY",0,User,0,2017-11-27 02:42:13,2017-11-27 02:42:47,0,0,0,0,7,0,0.0
1,695302,Leonid Arestov,arestov_design,Web &amp; UX/UI designer arestov.design@gmail.com,"Moscow, Russia",1,Player,1,2014-11-17 10:35:07,2017-12-12 07:37:24,5484,4873,117,1105,736,65,0.0
2,1946520,~/.space307,space307,"The big, friendly, talented Space307 team is d...","Russia, Saint Petersburg",1,Team,0,2017-10-16 15:03:02,2017-12-12 11:42:59,2835,0,0,475,1974,0,
3,1875583,Hadeer,Hido,,eygpt,0,User,0,2017-08-25 11:22:24,2017-10-29 11:27:38,82,0,0,0,74,0,0.0
4,1386566,Avagana,Avagana,,,0,User,0,2016-09-22 10:31:57,2017-04-30 15:25:23,38,0,0,0,89,0,0.0


In [33]:
users.username.isna().sum()

0

In [34]:
# Add 'members_count'.
members_count = pd.read_sql("SELECT * FROM 'teams:teams_count_members'", conn)
members_count.head()

Unnamed: 0,team_username,members_count
0,1099group,3
1,10Clouds,13
2,10up,4
3,1337s,2
4,15Five,1


In [35]:
# Merge on username.
users = pd.merge(users, members_count, left_on = "username", right_on = "team_username", how = "left").drop("team_username", axis = 1)
users.head()

Unnamed: 0,id,name,username,bio,location,can_upload_shot,type,pro,created_at,updated_at,likes_count,likes_received_count,comments_received_count,followers_count,followings_count,shots_count,teams_count,members_count
0,2005984,Chris Maffuccio,cmaffuccio,,"New York, NY",0,User,0,2017-11-27 02:42:13,2017-11-27 02:42:47,0,0,0,0,7,0,0.0,
1,695302,Leonid Arestov,arestov_design,Web &amp; UX/UI designer arestov.design@gmail.com,"Moscow, Russia",1,Player,1,2014-11-17 10:35:07,2017-12-12 07:37:24,5484,4873,117,1105,736,65,0.0,
2,1946520,~/.space307,space307,"The big, friendly, talented Space307 team is d...","Russia, Saint Petersburg",1,Team,0,2017-10-16 15:03:02,2017-12-12 11:42:59,2835,0,0,475,1974,0,,8.0
3,1875583,Hadeer,Hido,,eygpt,0,User,0,2017-08-25 11:22:24,2017-10-29 11:27:38,82,0,0,0,74,0,0.0,
4,1386566,Avagana,Avagana,,,0,User,0,2016-09-22 10:31:57,2017-04-30 15:25:23,38,0,0,0,89,0,0.0,


In [36]:
users.username.isna().sum()

0

In [37]:
# Add 'skills_count'.
skills_count = pd.read_sql("SELECT * FROM 'skills:users_count_skills'", conn)
skills_count.head()

Unnamed: 0,username,skills_count
0,cmaffuccio,0
1,arestov_design,11
2,space307,5
3,Hido,0
4,Avagana,0


In [38]:
# Merge on username.
users = pd.merge(users, skills_count, on = "username")
users.head()

Unnamed: 0,id,name,username,bio,location,can_upload_shot,type,pro,created_at,updated_at,likes_count,likes_received_count,comments_received_count,followers_count,followings_count,shots_count,teams_count,members_count,skills_count
0,2005984,Chris Maffuccio,cmaffuccio,,"New York, NY",0,User,0,2017-11-27 02:42:13,2017-11-27 02:42:47,0,0,0,0,7,0,0.0,,0
1,695302,Leonid Arestov,arestov_design,Web &amp; UX/UI designer arestov.design@gmail.com,"Moscow, Russia",1,Player,1,2014-11-17 10:35:07,2017-12-12 07:37:24,5484,4873,117,1105,736,65,0.0,,11
2,1946520,~/.space307,space307,"The big, friendly, talented Space307 team is d...","Russia, Saint Petersburg",1,Team,0,2017-10-16 15:03:02,2017-12-12 11:42:59,2835,0,0,475,1974,0,,8.0,5
3,1875583,Hadeer,Hido,,eygpt,0,User,0,2017-08-25 11:22:24,2017-10-29 11:27:38,82,0,0,0,74,0,0.0,,0
4,1386566,Avagana,Avagana,,,0,User,0,2016-09-22 10:31:57,2017-04-30 15:25:23,38,0,0,0,89,0,0.0,,0


In [39]:
users.username.isna().sum()

0

In [40]:
users.skills_count.isna().sum()

0

In [41]:
# Add 'tags_count'.
tags_count = pd.read_sql("SELECT * FROM 'tags:users_count_tags'", conn)
tags_count.head()

Unnamed: 0,username,tags_count
0,cmaffuccio,0
1,arestov_design,220
2,space307,0
3,Hido,0
4,Avagana,0


In [42]:
# Merge on username.
users = pd.merge(users, tags_count, on = "username")
users.head()

Unnamed: 0,id,name,username,bio,location,can_upload_shot,type,pro,created_at,updated_at,likes_count,likes_received_count,comments_received_count,followers_count,followings_count,shots_count,teams_count,members_count,skills_count,tags_count
0,2005984,Chris Maffuccio,cmaffuccio,,"New York, NY",0,User,0,2017-11-27 02:42:13,2017-11-27 02:42:47,0,0,0,0,7,0,0.0,,0,0
1,695302,Leonid Arestov,arestov_design,Web &amp; UX/UI designer arestov.design@gmail.com,"Moscow, Russia",1,Player,1,2014-11-17 10:35:07,2017-12-12 07:37:24,5484,4873,117,1105,736,65,0.0,,11,220
2,1946520,~/.space307,space307,"The big, friendly, talented Space307 team is d...","Russia, Saint Petersburg",1,Team,0,2017-10-16 15:03:02,2017-12-12 11:42:59,2835,0,0,475,1974,0,,8.0,5,0
3,1875583,Hadeer,Hido,,eygpt,0,User,0,2017-08-25 11:22:24,2017-10-29 11:27:38,82,0,0,0,74,0,0.0,,0,0
4,1386566,Avagana,Avagana,,,0,User,0,2016-09-22 10:31:57,2017-04-30 15:25:23,38,0,0,0,89,0,0.0,,0,0


In [43]:
users.username.isna().sum()

0

In [44]:
users.tags_count.isna().sum()

0

In [45]:
# Add 'comments_count'.
comments_count = pd.read_sql("SELECT * FROM 'comments:comments_count_users'", conn)
comments_count.head()

Unnamed: 0,username,comments_count
0,cmaffuccio,0
1,arestov_design,111
2,space307,3
3,Hido,0
4,Avagana,0


In [46]:
# Merge on username.
users = pd.merge(users, comments_count, on = "username")
users.head()

Unnamed: 0,id,name,username,bio,location,can_upload_shot,type,pro,created_at,updated_at,...,likes_received_count,comments_received_count,followers_count,followings_count,shots_count,teams_count,members_count,skills_count,tags_count,comments_count
0,2005984,Chris Maffuccio,cmaffuccio,,"New York, NY",0,User,0,2017-11-27 02:42:13,2017-11-27 02:42:47,...,0,0,0,7,0,0.0,,0,0,0
1,695302,Leonid Arestov,arestov_design,Web &amp; UX/UI designer arestov.design@gmail.com,"Moscow, Russia",1,Player,1,2014-11-17 10:35:07,2017-12-12 07:37:24,...,4873,117,1105,736,65,0.0,,11,220,111
2,1946520,~/.space307,space307,"The big, friendly, talented Space307 team is d...","Russia, Saint Petersburg",1,Team,0,2017-10-16 15:03:02,2017-12-12 11:42:59,...,0,0,475,1974,0,,8.0,5,0,3
3,1875583,Hadeer,Hido,,eygpt,0,User,0,2017-08-25 11:22:24,2017-10-29 11:27:38,...,0,0,0,74,0,0.0,,0,0,0
4,1386566,Avagana,Avagana,,,0,User,0,2016-09-22 10:31:57,2017-04-30 15:25:23,...,0,0,0,89,0,0.0,,0,0,0


In [47]:
users.username.isna().sum()

0

In [48]:
users.comments_count.isna().sum()

0

In [49]:
len(users)

770540

In [50]:
users.columns

Index(['id', 'name', 'username', 'bio', 'location', 'can_upload_shot', 'type',
       'pro', 'created_at', 'updated_at', 'likes_count',
       'likes_received_count', 'comments_received_count', 'followers_count',
       'followings_count', 'shots_count', 'teams_count', 'members_count',
       'skills_count', 'tags_count', 'comments_count'],
      dtype='object')

In [51]:
# Save the type for the sqlite table.
dtype = {"id": "INT", "name": "TEXT", "username": "TEXT", "bio": "TEXT", "location": "TEXT", "comments_count": "INT",
         "comments_received_count": "INT", "followers_count": "INT", "followings_count": "INT", "likes_count": "INT", 
         "likes_received_count": "INT", "tags_count": "INT", "skills_count": "INT", "shots_count": "INT", 
         "teams_count": "INT", "can_upload_shot": "BOOLEAN", "type": "TEXT", "pro": "BOOLEAN", "created_at": "TIMESTAMP", 
         "updated_at": "TIMESTAMP", "members_count": "INT"}

In [52]:
# Save the dataframe into the sql database.
users.to_sql("users", conn_new, index = False, dtype = dtype)

In [53]:
# Define index table.
c_new.execute("""CREATE UNIQUE INDEX idx_users ON users (username)""")
conn_new.commit()

## Table *shots*

In [54]:
# Read some columns of 'shots' table.
shots = pd.read_sql("SELECT author_shot, team_username, shot_id, title, description, width, height, created_at, updated_at, animated, id_author_shot, id_team_username FROM shots", conn)
shots.head()

Unnamed: 0,author_shot,team_username,shot_id,title,description,width,height,created_at,updated_at,animated,id_author_shot,id_team_username
0,max_palyvoda,,3549658,Cinema Festival App Part.1,<p>What's up Dribbblers!</p>\n\n<p>Here's the ...,400,300,2017-06-07 07:01:59,2017-06-07 11:46:56,1,1364989,
1,max_palyvoda,,3254544,Smart Home App Concept,<p>What's up Dribbblers!\n<br />I'd like to pr...,400,300,2017-01-31 08:47:31,2017-01-31 09:47:32,1,1364989,
2,max_palyvoda,,3153930,Pre-Order App For Cafe,"<p>Hello, Dribbblers!\n<br />That is my first ...",400,300,2016-12-13 10:00:26,2016-12-13 11:07:05,1,1364989,
3,miketanael,,3732805,Threenity Logo Design,<p>Hellow fellow dribbblers! Glad to be a part...,400,300,2017-08-14 04:10:20,2017-08-14 06:19:32,0,358178,
4,citrusbyte,citrusbyte,2686725,Real Estate collaboration for laywers,<p>ProTitle360 let lawyers share and collabora...,400,300,2016-05-02 04:24:29,2016-09-08 08:32:15,0,673130,673130.0


In [55]:
# Add 'likes_count'.
likes_count = pd.read_sql("SELECT * FROM 'likes:likes_count_shots'", conn)
likes_count.head()

Unnamed: 0,shot_id,likes_count
0,1,142
1,2,26
2,26,7
3,27,4
4,28,9


In [56]:
# Merge on shot_id.
shots = pd.merge(shots, likes_count, on = "shot_id")
shots.head()

Unnamed: 0,author_shot,team_username,shot_id,title,description,width,height,created_at,updated_at,animated,id_author_shot,id_team_username,likes_count
0,max_palyvoda,,3549658,Cinema Festival App Part.1,<p>What's up Dribbblers!</p>\n\n<p>Here's the ...,400,300,2017-06-07 07:01:59,2017-06-07 11:46:56,1,1364989,,51
1,max_palyvoda,,3254544,Smart Home App Concept,<p>What's up Dribbblers!\n<br />I'd like to pr...,400,300,2017-01-31 08:47:31,2017-01-31 09:47:32,1,1364989,,101
2,max_palyvoda,,3153930,Pre-Order App For Cafe,"<p>Hello, Dribbblers!\n<br />That is my first ...",400,300,2016-12-13 10:00:26,2016-12-13 11:07:05,1,1364989,,192
3,miketanael,,3732805,Threenity Logo Design,<p>Hellow fellow dribbblers! Glad to be a part...,400,300,2017-08-14 04:10:20,2017-08-14 06:19:32,0,358178,,44
4,citrusbyte,citrusbyte,2686725,Real Estate collaboration for laywers,<p>ProTitle360 let lawyers share and collabora...,400,300,2016-05-02 04:24:29,2016-09-08 08:32:15,0,673130,673130.0,27


In [57]:
len(shots.shot_id.dropna().unique())

2475311

In [58]:
# Add 'comments_count'.
comments_count = pd.read_sql("SELECT * FROM 'comments:comments_count_shots'", conn)
comments_count.head()

Unnamed: 0,shot_id,comments_count
0,3549658,5
1,3254544,7
2,3153930,26
3,3732805,4
4,2686725,0


In [59]:
# Merge on shot_id.
shots = pd.merge(shots, comments_count, on = "shot_id")
shots.head()

Unnamed: 0,author_shot,team_username,shot_id,title,description,width,height,created_at,updated_at,animated,id_author_shot,id_team_username,likes_count,comments_count
0,max_palyvoda,,3549658,Cinema Festival App Part.1,<p>What's up Dribbblers!</p>\n\n<p>Here's the ...,400,300,2017-06-07 07:01:59,2017-06-07 11:46:56,1,1364989,,51,5
1,max_palyvoda,,3254544,Smart Home App Concept,<p>What's up Dribbblers!\n<br />I'd like to pr...,400,300,2017-01-31 08:47:31,2017-01-31 09:47:32,1,1364989,,101,7
2,max_palyvoda,,3153930,Pre-Order App For Cafe,"<p>Hello, Dribbblers!\n<br />That is my first ...",400,300,2016-12-13 10:00:26,2016-12-13 11:07:05,1,1364989,,192,26
3,miketanael,,3732805,Threenity Logo Design,<p>Hellow fellow dribbblers! Glad to be a part...,400,300,2017-08-14 04:10:20,2017-08-14 06:19:32,0,358178,,44,4
4,citrusbyte,citrusbyte,2686725,Real Estate collaboration for laywers,<p>ProTitle360 let lawyers share and collabora...,400,300,2016-05-02 04:24:29,2016-09-08 08:32:15,0,673130,673130.0,27,0


In [60]:
len(shots.shot_id.dropna().unique())

2475311

In [61]:
shots.columns

Index(['author_shot', 'team_username', 'shot_id', 'title', 'description',
       'width', 'height', 'created_at', 'updated_at', 'animated',
       'id_author_shot', 'id_team_username', 'likes_count', 'comments_count'],
      dtype='object')

In [62]:
# Save the type for the sqlite table.
dtype = {"author_shot": "TEXT", "team_username": "TEXT", "title": "TEXT", "description": "TEXT", "shot_id": "INT",
         "width": "INT", "height": "INT", "likes_count": "INT", "comments_count": "INT", 
         "created_at": "TIMESTAMP", "updated_at": "TIMESTAMP", "animated": "BOOLEAN", 
         "id_author_shot": "INT", "id_team_username": "INT"}

In [63]:
# Save the dataframe into the sql database.
shots.to_sql("shots", conn_new, index = False, dtype = dtype)

In [64]:
# Define index table.
c_new.execute("""CREATE INDEX idx_shots ON shots (author_shot)""")
conn_new.commit()

## Table *comments*

In [65]:
# Copy the 'comments' table into new database.
c_new.execute("""CREATE TABLE comments (shot_id INT, comment_id INT, created_at TIMESTAMP, updated_at TIMESTAMP, author_comment TEXT, comment TEXT, likes_count INT, id_author_comment INT, author_shot TEXT, id_author_shot INT)""")
c_new.execute("""INSERT INTO comments (shot_id, comment_id, created_at, updated_at, author_comment, comment, likes_count, id_author_comment, author_shot, id_author_shot) 
                    SELECT c.shot_id, c.comment_id, c.created_at, c.updated_at, c.author_shot, c.comment, c.likes_count, c.id_author_shot, c.author_shot, c.id_author_shot
                     FROM old.comments c""")
conn_new.commit()

In [66]:
# Define index table.
c_new.execute("""CREATE UNIQUE INDEX idx_comments ON comments (comment_id)""")
conn_new.commit()

## Table *followers*

In [67]:
# Copy the 'followers' table into new database.
c_new.execute("""CREATE TABLE followers (destination TEXT, source TEXT, created_at TIMESTAMP, id_destination INT, id_source INT)""")
c_new.execute("""INSERT INTO followers (destination, source, created_at, id_destination, id_source) 
                    SELECT f.destination, f.source, f.created_at, f.id_destination, f.id_source
                     FROM old.followers f""")
conn_new.commit()

In [68]:
# Define index table.
c_new.execute("""CREATE UNIQUE INDEX idx_followers ON followers (destination, source)""")
conn_new.commit()

## Table *skills*

In [69]:
# Copy the 'skills' table into new database.
c_new.execute("""CREATE TABLE skills (username TEXT, skill TEXT, id_username INT)""")
c_new.execute("""INSERT INTO skills (username, skill, id_username) 
                    SELECT s.username, s.skill, s.id_username
                     FROM old.skills s""")
conn_new.commit()

In [70]:
# Define index table.
c_new.execute("""CREATE UNIQUE INDEX idx_skills ON skills (username, skill)""")
conn_new.commit()

## Table *tags*

In [71]:
# Copy the 'tags' table into new database.
c_new.execute("""CREATE TABLE tags (shot_id INT, author_shot TEXT, tag TEXT, id_author_shot INT)""")
c_new.execute("""INSERT INTO tags (shot_id, author_shot, tag, id_author_shot) 
                    SELECT t.shot_id, t.author_shot, t.tag, t.id_author_shot
                     FROM old.tags t""")
conn_new.commit()

In [72]:
# Define index table.
c_new.execute("""CREATE UNIQUE INDEX idx_tags ON tags (shot_id, tag)""")
conn_new.commit()

## Table *teams*

In [73]:
# Copy the 'teams' table into new database.
c_new.execute("""CREATE TABLE teams (member_username TEXT, team_username TEXT, n_of_shots INT, first_shot TIMESTAMP, last_shot TIMESTAMP, id_member_username INT, id_team_username INT)""")
c_new.execute("""INSERT INTO teams (member_username, team_username, n_of_shots, first_shot, last_shot, id_member_username, id_team_username) 
                    SELECT t.member_username, t.team_username, t.n_of_shots, t.first_shot, t.last_shot, t.id_member_username, t.id_team_username
                     FROM old.teams t""")
conn_new.commit()

## Table *likes*

In [74]:
# Copy the 'likes' table into new database.
c_new.execute("""CREATE TABLE likes (shot_id INT, like_id INT, created_at_unix INT, author_like TEXT, created_at TIMESTAMP, id_author_like INT, author_shot TEXT, id_author_shot INT)""")
c_new.execute("""INSERT INTO likes (shot_id, like_id, created_at_unix, author_like, created_at, id_author_like, author_shot, id_author_shot) 
                    SELECT l.shot_id, l.like_id, l.created_at_unix, l.author_like, l.created_at, l.id_author_like, l.author_shot, l.id_author_shot
                     FROM old.likes l""")
conn_new.commit()

In [75]:
# Define index table.
c_new.execute("""CREATE INDEX idx_likes ON likes (author_like, author_shot)""")
conn_new.commit()

In [76]:
c_new.execute("DETACH old");

In [77]:
conn.close()

In [78]:
# Delete ald Dribbble database.
os.remove("dribbble_temporary.db")

In [79]:
c_new.execute("VACUUM");

In [80]:
conn_new.close()