# Merge Forum Data
This notebook merges the data from individual netmums databases into netmumsTables.db

## Changes
- 2020-02-08: Created
- 2020-02-09: Added merge loop

## TODO
- 

## Imports

In [1]:
import sqlite3
from pathlib import Path
from scraping import create_connection
from netmums import set_up_merged_db
import pandas as pd

## Functions

In [2]:
def pad(n):
    """ pad an int if less than 10 
    :param n: input int
    :return: string padded with 0 if less than 10
    """
    if n < 10:
        return "0" + str(n)
    else:
        return str(n)

## File Locations

In [3]:
p = Path.cwd()
path_parent = p.parents[0]

## Merge Databases

In [4]:
db = "netmums-merged.db"
path_db = str(path_parent / "database" / db)
conn = create_connection(path_db)
cur = conn.cursor()
loop_sql = '''
    ATTACH DATABASE "{0}" AS db{1};
    INSERT INTO main.users (name, data_user_id, url) SELECT name, data_user_id, url FROM db{1}.users;
    INSERT INTO main.quotes (quoting_id, quoted_id) SELECT quoting_id, quoted_id FROM db{1}.quotes;
    INSERT INTO main.posts (thread_id, post_id, post_count, data_user_id, date_created, date_recorded, body) SELECT thread_id, post_id, post_count, data_user_id, date_created, date_recorded, body FROM db{1}.posts;
    DETACH DATABASE db{1};
'''

In [5]:
duplicates_sql = '''
    DELETE FROM users
    WHERE id NOT IN (
        SELECT MIN(id)
        FROM users
        GROUP BY name, data_user_id, url
    );
    DELETE FROM quotes
    WHERE id NOT IN (
        SELECT MIN(id)
        FROM quotes
        GROUP BY quoting_id, quoted_id
    );
'''

In [8]:
for i in range(1, 11):
    n = pad(i)
    fn = "netmumsTables-{}.db".format(n)
    path_add = str(path_parent / "database" / fn)
    cur.executescript(loop_sql.format(path_add, n))

netmumsTables-01.db
netmumsTables-02.db
netmumsTables-03.db
netmumsTables-04.db
netmumsTables-05.db
netmumsTables-06.db
netmumsTables-07.db
netmumsTables-08.db
netmumsTables-09.db
netmumsTables-10.db


In [9]:
cur.executescript(duplicates_sql)

<sqlite3.Cursor at 0x7fe68665e2d0>

In [11]:
conn.commit()

In [12]:
conn.close()