# Identify cases for analysis

## Imports

In [1]:
import pandas as pd
from pathlib import Path
from scraping import create_connection

## Data sources

In [2]:
p = Path.cwd()
path_parent = p.parents[1]

In [3]:
# database
path_db = str(path_parent / "database" / "netmums-merged.db")
path_clean_data = path_parent / "clean_data" / "netmums"

## Query database

In [4]:
distinct_users_sql = """
    SELECT p.user_url, COUNT(*) AS count
    FROM posts AS p
    LEFT JOIN threads AS t
    ON t.id=p.thread_id
    LEFT JOIN subforums AS s
    ON s.id=t.subforum_id
    WHERE s.forum_id=24
    AND post_count=1
    AND user_url<>"Anonymous"
    GROUP BY p.user_url
    ORDER BY count DESC
"""

In [5]:
conn = create_connection(path_db)
users = pd.read_sql_query(distinct_users_sql, conn)
conn.close()
users.head(20)

Unnamed: 0,user_url,count
0,tracy-120,73
1,kirsty-t-312,72
2,heidi-s-146,66
3,emma-207,66
4,karen-s-822,54
5,karen-229,54
6,kim-1559,53
7,wendel,48
8,carla-s-289,48
9,laura-m-1902,47


In [5]:
path_clean_data = path_parent / "clean_data" / "netmums"
# users.to_csv(path_clean_data / "most_sn_posts.csv", index=False)

In [6]:
sql = '''
    SELECT
        s.name AS subforum_name,
        p.user_url AS user_url,
        p.date_created AS date_created,
        p.body as text
    FROM posts AS p
    LEFT JOIN threads AS t
    ON t.id=p.thread_id
    LEFT JOIN subforums AS s
    ON s.id=t.subforum_id
    WHERE s.forum_id=24
    AND p.user_url="{}"
    AND post_count=1
    ORDER BY date_created ASC
'''

In [6]:
def get_posts(user_url, sql):
    conn = create_connection(path_db)
    df = pd.read_sql_query(sql.format(user_url), conn)
    conn.close()
    return df

In [7]:
def print_posts(user_url, sql, print_forum=False):
    df = get_posts(user_url, sql)
    for index, row in df.iterrows():
        if print_forum:
            print(row['subforum_name'])
        print(row['text'])
        print()

In [9]:
sql_all = '''
    SELECT
        s.name AS subforum_name,
        p.user_url AS user_url,
        p.date_created AS date_created,
        p.body as text, 
        s.forum_id as forum_id
    FROM posts AS p
    LEFT JOIN threads AS t
    ON t.id=p.thread_id
    LEFT JOIN subforums AS s
    ON s.id=t.subforum_id
    WHERE p.user_url="{}"
    AND post_count=1
    ORDER BY date_created ASC
'''

In [40]:
print_posts("tracy-120", sql)

Yet again I have had to meet OH in a dodgy car park to get help changing DS nappy. Son is 8 years old ds, spd, heart condition and so on. He has also not long came out of hospital with double pnuemonia. Both me and his nanna could not get near him for him headbanging, hitting etc. He also had a dirty nappy. Does anyone have any advice or been through similar.

I am sitting here quite tearful. We have been to the hospital as part of DS heart monitoring. He also has DS, severe developmental delay (6 years behind his age) SPD, challenging behaviour etc. Anyway after we told him off for hitting out at one child, he started on another. The mother jumped up, grabbing her child and shout for F*s sake, what the f* did you let him f***g do that for. by then he was on a mission to run so I immediatly went after him for the woman to shout I go on f* off. I felt terrible. Then too top it the doctor asked is anyone dealing with his behaviour. We said yes but they are not around to see the worse of 

In [17]:
print_posts("tracy-120", sql_all, True)

Parties
I have a severly disabled child who is in special school. He's never had a birthday party for a few year. It was fine when he was younger but now he's older it's a bit harder. He also doesn't mix but likes to watch. Should he have a party or not.

Other special needs chat
Yet again I have had to meet OH in a dodgy car park to get help changing DS nappy. Son is 8 years old ds, spd, heart condition and so on. He has also not long came out of hospital with double pnuemonia. Both me and his nanna could not get near him for him headbanging, hitting etc. He also had a dirty nappy. Does anyone have any advice or been through similar.

Other special needs chat
I am sitting here quite tearful. We have been to the hospital as part of DS heart monitoring. He also has DS, severe developmental delay (6 years behind his age) SPD, challenging behaviour etc. Anyway after we told him off for hitting out at one child, he started on another. The mother jumped up, grabbing her child and shout for 

In [10]:
sql_first_not_sn = """
    SELECT user_url, post
    FROM posts AS p
    LEFT JOIN threads AS t
    ON t.id=p.thread_id
    LEFT JOIN subforums AS s
    ON s.id=t.subforum_id
    WHERE 
    ORDER BY date_created ASC
"""

In [11]:
test_sql = """
WITH rows AS (
    SELECT
        s.name AS subforum_name,
        p.user_url AS user_url,
        p.date_created AS date_created,
        p.body as text, 
        s.forum_id as forum_id,
        ROW_NUMBER() OVER(PARTITION BY p.user_url ORDER BY p.date_created ASC) AS row_number
        FROM posts AS p
    LEFT JOIN threads AS t
    ON t.id=p.thread_id
    LEFT JOIN subforums AS s
    ON s.id=t.subforum_id
    WHERE user_url IN (
        SELECT DISTINCT p.user_url
        FROM posts AS p
        LEFT JOIN threads AS t
        ON t.id=p.thread_id
        LEFT JOIN subforums AS s
        ON s.id=t.subforum_id
        WHERE s.forum_id=24
        AND post_count=1
    )
    ORDER BY p.date_created
)
SELECT *
FROM rows
WHERE row_number=1
"""

In [12]:
conn = create_connection(path_db)
first_posts = pd.read_sql_query(test_sql, conn)
conn.close()

In [52]:
for i, row in pd.DataFrame(first_posts['subforum_name'].value_counts()).iterrows():
    print(row.name, row.subforum_name)

Other special needs chat 765
Children with autism 118
Archived make friends posts 79
Toddlers (1 - 3 years) 78
Family and other relationships 74
Trying for a baby 60
Court cases 53
Children (4 - 11 years) 50
Early pregnancy signs and symptoms 50
Baby and child health 49
Benefits and entitlements 47
General chat 41
Netmums-to-be 38
New to Netmums 37
Maternal mental health 36
Single parents 35
Parenting advice 34
Due dates clubs 34
Children with ADHD 33
Babies (birth - 12 months) 33
Children with disabilities 32
Labour and birth 27
Self-employed 27
Weaning 25
Having a bad day? 24
Budgeting 23
In the news 23
Baby names 21
Cleaning 21
Parents with disabilities 20
Special needs education 19
Am I pregnant? 19
Short haul 18
Potty training 18
New mums 18
Working mums 18
Breastfeeding 18
Christmas and New Year 17
Unplanned pregnancy 17
Food tips and ideas 16
Choosing childcare 14
Tweens and teens 14
Trying to conceive clubs 14
Other Netmums clubs 13
Sleep 13
Miscarriage 13
Greater London 11
Bot

In [53]:
for i, row in df.loc[df['subforum_name']=="Trying for a baby"].iterrows():
    print(row['user_url'])
    print(row['text'])
    print()

claire-728
Hi all, I already have a gorgeous 2 year old girl and wanting to now try for another baby, not wanting too much of an age gap plus being an only child myself it's really important to me but I am worried about Swine Flu and if I was to get pregnant the dilema of whether to have the vaccine or not and if not would I putting myself and the baby at risk. I am normally so sensible and I know alot of you will just say to wait until I have been vaccinated and then try but i'm really broody... Would just like to get your views to help me decide? Thanks in advance!:hmmm:

sukina-b
Hi I'm 38 and 18 weeks pregnant with baby no 4. I had my first 2 who are now 11 & 12 much younger and thought I was done, especially as I had DS & DD. Then I got divorced 8 years ago and met my husband now 3 yrs ago. He is also great with my kids but I always knew he wanted some of his own. So in Jan this year I gave my birth to another little boy :) As I didn't want him to feel like an only child (older 2 

In [55]:
print_posts("sukina-b", sql_all, True)

Labour and birth
Hi netmums I am so confused at the moment I feel like my brain will explode!! I'm hoping that writing it down plus your advise might help. Here goes... Ds1 born 1997 - emergency section, 4 days overdue, foetal distress Dd1 born 1998 - section, 10 wks early, IUGR Ds2 born 2009 - section booked for 39 wks but went into labour 38+5, quick labour (3 hrs) so no time for section, born by ventouse. I was so thrilled as I hadn't liked the idea of an elective section and it was all so exciting and unexpected!! Fast forward to now. I am pregnant again and worked hard to convince my consultants to let me try for another VBAC. Eventually they agreed. They warned it would probably be early and quick again and I would need constant monitoring. If I did go overdue they would let me go to 41 wks but couldn't induce me so section it would be. I was happy with that as I was sure it would be early. Got to 36 wks and suffered from SPD. Painful and very uncomfortable. After 2 wks in agony 

In [57]:
print_posts("hannah-h-242", sql_all, True)

Trying for a baby
Hi, I could really do with some advice. Me and my hubby have been trying (ish) for about 4 months now and over the last couple of days my body has been being v v odd and I am wondering if I could be preg. Yesterday I started to bleed very very very lightly more like spoting and I thought my period had come a bit early as I am a little iregular. I normaly would be bleeding very heavely by now but still only have less then yesterday- hardly any. This morning on my way out to work I just felt really nauseas and had to run home to throw up but then afterwards I felt fine and have felt only a little bit nausea after eating the rest of the day. Out of pure frustration of not understanding what the hell my body was doing I took a preg test this eve and there was a v v v faint second line. I know you guys cant say for sure but do I sound Preg and should I test gain and is it ok that I have bleed a bit and what if I bleed more? Thanks all Han

Early pregnancy signs and symptom

In [13]:
print_posts("carla-s-289", sql_all, True)

Food tips and ideas
i have two children one of 5 and one of 19months i want to start a meal planner for each week to try new and exciting food and also to eat healthly but my 5 year old is soooooo fussy its unbelievble i mean he looks at something and say straight away IM NOT EATING THAT!!! and drives me insane and he doesnt even no what it is never even tried it...iv asked him to come help me cook he does and then straight away say whose this food for and ill say all of us and bang say it again im not eating that we eat the same stuff all the time chicken nuggets and chips etc etc and iv had enough hes not a big eater as it is but i just dont no what to do and to be honest im a fussy eater to but its time for a HUGE change in are household with food some advice would really help thanks :) xxxx

Toddlers (1 - 3 years)
Hi I was just looking for advice really on how other mum plan there days I'm a mum of two boys 6 and 24months but as my youngest is getting very busy and is all over the 

In [29]:
sql_first_post_forum = """
WITH rows AS (
    SELECT
        s.name AS subforum_name,
        p.user_url AS user_url,
        p.date_created AS date_created,
        p.body as text, 
        s.forum_id as forum_id,
        ROW_NUMBER() OVER(PARTITION BY p.user_url ORDER BY p.date_created ASC) AS row_number
    FROM posts AS p
    LEFT JOIN threads AS t
    ON t.id=p.thread_id
    LEFT JOIN subforums AS s
    ON s.id=t.subforum_id
    WHERE user_url IN (
        SELECT DISTINCT p.user_url
        FROM posts AS p
        LEFT JOIN threads AS t
        ON t.id=p.thread_id
        LEFT JOIN subforums AS s
        ON s.id=t.subforum_id
        WHERE s.forum_id=24
        AND p.user_url<>'Anonymous'
    )
    ORDER BY p.date_created
)
SELECT *
FROM rows
WHERE row_number=1

"""

In [36]:
conn = create_connection(path_db)
df = pd.read_sql_query(sql_first_post_forum, conn)
conn.close()

In [37]:
df = df.loc[df['forum_id']!=24]

In [32]:
sql_count_n_posts = """
SELECT
    user_url AS user_url,
    COUNT(*) AS count
FROM posts
WHERE user_url IN (
    SELECT DISTINCT p.user_url
    FROM posts AS p
    LEFT JOIN threads AS t
    ON t.id=p.thread_id
    LEFT JOIN subforums AS s
    ON s.id=t.subforum_id
    WHERE s.forum_id=24
)
AND user_url<>'Anonymous'
GROUP BY user_url
"""

In [33]:
conn = create_connection(path_db)
n_posts = pd.read_sql_query(sql_count_n_posts, conn)
conn.close()

In [38]:
mask1 = n_posts['user_url'].isin(df['user_url'])
mask2 = (n_posts['count'] > 1)
n_posts.loc[mask1 & mask2]

Unnamed: 0,user_url,count
1,123abc,27
2,1v9jnvpm,65
3,33kc,647
7,aarh,4
9,abbey-f-3,447
...,...,...
5712,zoekillian,16
5714,zooz,39
5716,zsofia-t,70
5717,zuzana-95,63


In [39]:
n_posts.loc[mask1 & mask2, 'count'].value_counts()

3        105
4         98
2         85
6         75
5         66
        ... 
197        1
4285       1
2228       1
2210       1
10136      1
Name: count, Length: 1162, dtype: int64

In [13]:
sql_all_posts = '''
    SELECT
        s.name AS subforum_name,
        p.user_url AS user_url,
        p.date_created AS date_created,
        p.body as text, 
        s.forum_id as forum_id,
        p.thread_id as thread_id,
        p.post_count as post_count
    FROM posts AS p
    LEFT JOIN threads AS t
    ON t.id=p.thread_id
    LEFT JOIN subforums AS s
    ON s.id=t.subforum_id
    WHERE p.user_url="{}"
    ORDER BY date_created ASC
'''

In [None]:
"""
hannah-h-242: tracking from birth
carla-s-289: already born, no diagnosis before first post
tracy-120: already born, diagnosed before first post
"""

In [19]:
df = get_posts("tracy-120", sql_all_posts)

In [21]:
df.to_csv(path_clean_data / "tracy-120.csv", index=False)

## Get all first posts on SN forum

In [4]:
first_posts_sql = """
SELECT p1.user_url, p1.body
FROM posts AS p1
INNER JOIN (
    SELECT p2.user_url, min(p2.date_created) AS min_date
    FROM posts AS p2
    LEFT JOIN threads AS t
    ON t.id=p2.thread_id
    LEFT JOIN subforums AS s
    ON s.id=t.subforum_id
    WHERE s.forum_id=24
    AND p2.user_url<>"Anonymous"
    GROUP BY p2.user_url
) AS ij
ON p1.user_url = ij.user_url AND p1.date_created = ij.min_date
"""
conn = create_connection(path_db)
first_posts_df = pd.read_sql_query(first_posts_sql, conn)
conn.close()

In [5]:
first_posts_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5738 entries, 0 to 5737
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   user_url  5738 non-null   object
 1   body      5738 non-null   object
dtypes: object(2)
memory usage: 89.8+ KB


In [7]:
n = 100
first_posts_sample = first_posts_df.sample(n)
first_posts_sample.to_csv(path_clean_data / "first_posts_sample.csv", index=False)
first_posts_df.to_csv(path_clean_data / "first_posts.csv", index=False)

In [6]:
for i in range(n):
    user = first_posts_sample.loc[i, 'user_url']
    text = first_posts_sample.loc[i, 'body']
    print(user)
    print(text)

rebecca-d-805
I've just been told my baby has TGA- anyone give me some advice on how to cope with the news and what to expect?x
hollie-r-65
‘Bathtime’ is a special bonding time for families, but when is the right time to make the transition to showering? As part of Hubbub’s #TapChat about daily water habits, we've asked mum of two Charlotte to share her top tips for getting little ones to swap bath time for shower time. Read Charlotte’s 8 top tips here: ::link_1::
anon-2470980
My 12 month old is currently being referred for possible global development delay. Has anyone else been through this? Can help me know what to expect? Any advice? Thanks!
sophia-n-15
Okay at present I don't so think I'm going to try without his consent & see how it goes! Thank you
sarah-m-3870
As far as I know Hun it's until one is 11 xxx
ted-b-55
My son and I recently wrote a book called, “That’s Cool!” (available on Amazon). One of our goals is to help kids with ADHD by directing their energies in learning life

## Analyze post frequency

In [4]:
first_posts_sample = pd.read_csv(path_clean_data / "first_posts_sample.csv")

In [5]:
user_list = "('" + "', '".join(first_posts_sample['user_url'].tolist()) + "')"

In [25]:
conn.close()

In [6]:
post_freq_sql = f"""
    SELECT
        p.user_url AS user_url,
        p.date_created AS date_created,
        p.id AS post_id,
        p.body AS body,
        s.forum_id AS forum_id,
        p.thread_id AS thread_id
    FROM posts AS p
    LEFT JOIN threads AS t
    ON t.id=p.thread_id
    LEFT JOIN subforums AS s
    ON s.id=t.subforum_id
    WHERE p.user_url IN {user_list}
"""
conn = create_connection(path_db)
post_freq_df = pd.read_sql_query(post_freq_sql, conn)
conn.close()

In [7]:
post_freq_df = post_freq_df.sort_values(['user_url', 'date_created'])

In [8]:
post_freq_first_post = post_freq_df.merge(first_posts_sample, on=['user_url','body'], how="inner")
post_id_list = post_freq_first_post['post_id'].tolist()

In [9]:
post_freq_df['first_post'] = 0
post_freq_df.loc[post_freq_df['post_id'].isin(post_id_list), 'first_post'] = 1

In [10]:
post_freq_df = post_freq_df.merge(post_freq_first_post[['user_url','date_created']].rename(columns={"date_created":"first_sn_post_date"}), on='user_url', how="left")

In [11]:
post_freq_df['stage'] = "before"
post_freq_df.loc[post_freq_df['first_post']==1, 'stage'] = "first post"
post_freq_df.loc[((post_freq_df['date_created']>=post_freq_df['first_sn_post_date']) & (post_freq_df['first_post']==0)), 'stage'] = "after"

In [12]:
for user in post_freq_df['user_url'].unique().tolist():
    print(user)
    print(post_freq_df.loc[post_freq_df['user_url']==user, 'stage'].value_counts())

alana-c-54
after         23
first post     1
Name: stage, dtype: int64
alice-p-187
after         253
before         70
first post      1
Name: stage, dtype: int64
allison-239
after         5806
before          18
first post       1
Name: stage, dtype: int64
amy-l-769
first post    1
Name: stage, dtype: int64
annabelle-60
first post    1
Name: stage, dtype: int64
annie-c-121
after         315
first post      1
Name: stage, dtype: int64
anon-2623322
after         182
before         12
first post      1
Name: stage, dtype: int64
beck-n
before        3
first post    1
Name: stage, dtype: int64
becky-1042
after         152
before         23
first post      1
Name: stage, dtype: int64
beverley-275
after         47
before         4
first post     1
Name: stage, dtype: int64
brogan-s-4
before        820
after         331
first post      1
Name: stage, dtype: int64
catherine-m-276
before        670
after           8
first post      1
Name: stage, dtype: int64
charlie-d-86
after         9
first 

In [13]:
def save_user_url(user):
    post_freq_df.loc[post_freq_df['user_url']==user].to_csv(path_clean_data / f"posts_{user}.csv", index=False)

In [14]:
save_user_url('sophia-w-62')

In [12]:
for i, row in post_freq_df.loc[post_freq_df['user_url']=='michelle-225'].iterrows():
    print(row['date_created'])
    print(row['stage'])
    print(row['body'])

2008-11-25 07:49PM
before
Hi everyone I'm after some feedback please. I'm a mother of 3 young children, trying to think of ways to make some money from home, without having too much initial outlay, or 'selling.' I had an idea of running a 'rent-2-play' (toy library-ish) business from home. Especially as I have a fair amount of toys around the place already! Would this sort of thing appeal to people, would a web list of toys & prices help. Would a waiting list for 'high demand' toys be a good idea? Please, any advice or feedback welcome!!
2008-12-01 10:42AM
before
Hi Sarah Thanks for your thoughts. I was worried about those issues you raised myself, hence needed the feedback. It now turns out, that a local childrens centre up the road from me, has opened (today) a toy library. Obviously that would be hard to compete with, as they have free membership to join, and probably alot more storage. Maybe thats an OMEN!! I have some other ideas, but need to look into a bit further. I won't get a

In [41]:
first_posts_2_weeks_sql = """
SELECT p1.user_url, p1.body, p1.date_created, p1.id, ij.min_date
FROM posts AS p1
INNER JOIN (
    SELECT p2.user_url, min(p2.date_created) AS min_date
    FROM posts AS p2
    LEFT JOIN threads AS t
    ON t.id=p2.thread_id
    LEFT JOIN subforums AS s
    ON s.id=t.subforum_id
    WHERE s.forum_id=24
    AND p2.user_url<>"Anonymous"
    GROUP BY p2.user_url
) AS ij
ON p1.user_url = ij.user_url
"""
conn = create_connection(path_db)
first_posts_2_weeks = pd.read_sql_query(first_posts_2_weeks_sql, conn)
conn.close()

In [42]:
first_posts_2_weeks['min_date'] = pd.to_datetime(first_posts_2_weeks['min_date'])
first_posts_2_weeks['date_created'] = pd.to_datetime(first_posts_2_weeks['date_created'])

In [43]:
first_posts_2_weeks['date_diff'] = (first_posts_2_weeks['min_date'] - first_posts_2_weeks['date_created']).dt.days

In [44]:
first_posts_2_weeks = first_posts_2_weeks.loc[((first_posts_2_weeks['date_diff']<=14) & (first_posts_2_weeks['date_diff']>=0))]

In [51]:
first_posts_2_weeks = first_posts_2_weeks.sort_values(['user_url','date_created'], ascending=True)
more_than_one = first_posts_2_weeks.groupby('user_url')['id'].count().reset_index()

In [54]:
more_than_one = more_than_one.loc[more_than_one['id']>1]
first_posts_2_weeks = first_posts_2_weeks.loc[first_posts_2_weeks['user_url'].isin(more_than_one['user_url'])]

In [59]:
more_than_one.loc[more_than_one['id']>5].sort_values('id', ascending=False)

Unnamed: 0,user_url,id
1514,emma-b-4076,540
3929,midnight8,512
60,al-28,509
198,ami-s-28,416
912,cheska-b,406
...,...,...
3473,lottie-d-15,6
204,amy-11,6
1561,emma-l-488,6
468,arianne,6


In [53]:
first_posts_2_weeks.to_csv(path_clean_data / f"posts_2_weeks_before.csv", index=False)

Unnamed: 0,user_url,id
0,12345_smumandson,1
1,123abc,1
2,1v9jnvpm,3
3,33kc,72
4,4771,1


In [5]:
threads_sql = "SELECT * FROM threads WHERE id=1238974"
conn = create_connection(path_db)
threads_df = pd.read_sql_query(threads_sql, conn)
conn.close()
threads_df.head()

Unnamed: 0,id,url,subject,subforum_id
0,1238974,https://www.netmums.com/coffeehouse/special-ne...,How do you tell if a developmentally advanced ...,236


In [7]:
threads_df.iloc[0, 2]

'How do you tell if a developmentally advanced toddler is autistic?'