## Install sqlite
download and install the sqlite from https://www.sqlite.org/download.html

or if you are using conda env run the command: conda install -c anaconda sqlite

## <font color='blue'> Note </fong>:
#### create new db naming convention: < project_s +  summary + v_version + timestamp >
    

e.g.
'project_s_sample_v0.0_20210221'

In [1]:
import pandas as pd
import numpy as np
import sqlite3
from pathlib import Path
from sqlalchemy import create_engine
import os
import sys
sys.path.insert(1,'../utils/')
from dtypes_utils import *

In [2]:
db_dir = '../db_creation/'
db_path = os.path.join(db_dir, 'project_s_instagram_v1_20210314.db') 

conn = sqlite3.connect(db_path) #, detect_types=sqlite3.PARSE_DECLTYPES)
c = conn.cursor()

In [3]:
## check tables in db
check_table_q = """
   SELECT name FROM sqlite_master WHERE type='table'
    """
check_table_df = pd.read_sql(check_table_q, conn)
check_table_df

Unnamed: 0,name
0,instagram_netflix_static
1,instagram_netflix_tracking
2,instagram_disney_static
3,instagram_disney_tracking
4,instagram_hulu_static
5,instagram_hulu_tracking
6,instagram_hbomax_static
7,instagram_hbomax_tracking
8,instagram_platform_page_info


### wrapper function
wrapper function to standardize the dtypes

In [4]:
def apply_dtypes_wrapper(df, dtype_df):
    
    for col in df.columns:
        col_type = dtype_df.loc[dtype_df['name']==col, 'type'].values[0]
        
        if col_type == 'int':
            df[col] = df[col].apply(lambda x: convert_to_int(x))
            
        elif col_type == 'float':
            df[col] = df[col].apply(lambda x: convert_to_float(x))
            
        elif col_type == 'varchar' or col_type == 'varchar(32)' or col_type == 'text':
            df[col] = df[col].apply(lambda x: convert_to_string(x))
            
        elif col_type == 'BOOLEAN':
            df[col] = df[col].apply(lambda x: convert_to_boolean(x))
            
        elif col_type == 'timestamp':
            df[col] = df[col].apply(lambda x: convert_to_timestamp(x))

    return df

## <font color='blue'>Query

### <font color='green'> IMDB Netflix data

In [5]:
#################      querying from the db    ################# 
q = """
    SELECT * FROM instagram_netflix_tracking
    """
netflix_tracking = pd.read_sql(q, conn)
netflix_tracking.head(2)

#################    get dtypes from the sql table  ################# 
tracking_types_q = """
          PRAGMA table_info(instagram_netflix_tracking)
          """
netflix_tracking_dtypes = pd.read_sql(tracking_types_q, conn)

#################   apply wrapper function     ################# 
netflix_tracking = apply_dtypes_wrapper(netflix_tracking, netflix_tracking_dtypes)

In [6]:
netflix_tracking.head(1)

Unnamed: 0,fetch_date,short_codes,number_of_likes,number_of_video_views,number_of_comments
0,2021-03-06,CL4c2Fjl0tq,768179,,8548


In [7]:
netflix_tracking.dtypes

fetch_date               datetime64[ns]
short_codes                      object
number_of_likes                   int64
number_of_video_views           float64
number_of_comments                int64
dtype: object