## Install sqlite
download and install the sqlite from https://www.sqlite.org/download.html

or if you are using conda env run the command: conda install -c anaconda sqlite

## Note:
### db naming convention:
#### project_s +  summary + v_version + timestamp
    

e.g.
'project_s_sample_v0.0_20210221'

In [1]:
import pandas as pd
import sqlite3

from pathlib import Path

## TODO: change this dev db_name according to the naming convention described above
db_name = 'project_s_sample_v0.0_20210221.db'
table_name = 'twitter_netflix'
Path(db_name).touch()

In [2]:
twitter_netflix_df = pd.read_csv("../csv/twitter_netflix.csv")
twitter_netflix_df.head(2)

Unnamed: 0,full text,time created,user_id,tweet_id,country,locality,region,sub_region
0,RT @coveygils: @netflix @lanacondor EXACTLY ht...,Sat Feb 13 23:59:58 +0000 2021,770073794056712193,1360740529286356993,,,,
1,RT @MichelleObama: Here's the trailer for #Waf...,Sat Feb 13 23:59:57 +0000 2021,101346144,1360740528472674309,,,,


### Create Sqlite table

In [3]:
conn = sqlite3.connect(db_name)
c = conn.cursor()

In [4]:
# initialize schema
c.execute('''CREATE TABLE IF NOT EXISTS {table_name}
                         (user_id int, 
                          tweet_id int,
                          time_created timestamp,
                          reduced_text varchar,
                          full_text text,
                          country varchar(32),
                          region varchar(32)
                          )'''.format(table_name = table_name))

<sqlite3.Cursor at 0x7fa6fea41ab0>

In [5]:
# preprocess df
final_columns = ['user_id', 'tweet_id', 'time_created',\
                'reduced_text', 'full_text', 'country', 'region']
twitter_netflix_df.rename(columns={'full text':'full_text',
                                   'time created':'time_created'}, inplace=True)
twitter_netflix_df['reduced_text'] = twitter_netflix_df['full_text'].apply(lambda x: x[:255])
twitter_netflix_df = twitter_netflix_df[final_columns]
twitter_netflix_df.head(2)

Unnamed: 0,user_id,tweet_id,time_created,reduced_text,full_text,country,region
0,770073794056712193,1360740529286356993,Sat Feb 13 23:59:58 +0000 2021,RT @coveygils: @netflix @lanacondor EXACTLY ht...,RT @coveygils: @netflix @lanacondor EXACTLY ht...,,
1,101346144,1360740528472674309,Sat Feb 13 23:59:57 +0000 2021,RT @MichelleObama: Here's the trailer for #Waf...,RT @MichelleObama: Here's the trailer for #Waf...,,


In [6]:
# convert csv to table
twitter_netflix_df.to_sql('twitter_netflix', 
                          conn, 
                          if_exists='append',
                          index=False)

### Query data from sqlite table

In [7]:
q = """
    SELECT * FROM {table_name}
    """
sample_df = pd.read_sql(q.format(table_name = table_name), conn)

In [8]:
sample_df.head(2)

Unnamed: 0,user_id,tweet_id,time_created,reduced_text,full_text,country,region
0,770073794056712193,1360740529286356993,Sat Feb 13 23:59:58 +0000 2021,RT @coveygils: @netflix @lanacondor EXACTLY ht...,RT @coveygils: @netflix @lanacondor EXACTLY ht...,,
1,101346144,1360740528472674309,Sat Feb 13 23:59:57 +0000 2021,RT @MichelleObama: Here's the trailer for #Waf...,RT @MichelleObama: Here's the trailer for #Waf...,,


### Delete table

In [9]:
## start sqlite in terminal
## navigate to the correct db
## type in the terminal: drop table <table_name>