## free-marketing-watch
Search social media for mentions of brands and collect the comments/tweets/etc.
Count mentions of each and perform sentiment analysis on the strings.

In [1]:
import praw
import pandas as pd
from secrets import *
from pathlib import Path
from brands import fashion
import re

In [2]:
reddit = praw.Reddit(client_id=client_id,
               client_secret=client_secret,
               user_agent=user_agent)

Now to get the comments data, put it in a dataframe, and clean the data to get what we want.

In [3]:
def create_comments_df(subreddit_):
    """Returns a pandas df with the information about comments from this year.

    Inputs
    -----
    str: subreddit to be searched.
    Return
    ------
    Pandas dataframe with all the data from the praw object.m
    """
    subreddit = reddit.subreddit(subreddit_)
    submission_list = subreddit.top(
        time_filter="year", limit=1000
    )  # generator of submissions in the subreddit
    comment_list = []
    for submission in submission_list:
        submission.comments.replace_more(limit=0)
        for comment in submission.comments.list():
            comment_list.append(comment)

    df = pd.DataFrame([vars(comment) for comment in comment_list])
    df2 = df.loc[:,['link_id','id','score','body']]
    df2['Subreddit'] = subreddit_
    return df2


In [4]:
def brand_check(df,brandlist):
    """Checks comment body against a list of brands to see if it mentions any.
       Adds what brand was found if any in the brands column.
       
       Inputs
       ------
       Dataframe you will search over and a list of brands in a separate file. 
       Return
       ------
       Dataframe with column indicating what brand was found in the values.
       """

    df2 = df.body.str.extractall(pat=brandlist)
    df2.dropna(axis=0,how='all',inplace=True)
    return df2

### This takes a long time, probably around 30 minutes per 100,000 comments.

In [5]:
df = create_comments_df('femalefashionadvice')
df

Unnamed: 0,link_id,id,score,body,Subreddit
0,t3_ggpm9u,fq4m7up,1,We've received enough reports that this belong...,femalefashionadvice
1,t3_ggpm9u,fq3ejsm,4019,I'm sure I'll be downvoted for this but... whi...,femalefashionadvice
2,t3_ggpm9u,fq3f8cq,1333,I agree; there needs to be more flexibility in...,femalefashionadvice
3,t3_ggpm9u,fq43dqk,356,I completely agree. I waited 8 hours for a pos...,femalefashionadvice
4,t3_ggpm9u,fq3llwt,627,"Yeah, I think the sub is too strict on moderat...",femalefashionadvice
...,...,...,...,...,...
147337,t3_g04l8d,fn9viaz,2,"If SoWa gets to happen this year, sometimes th...",femalefashionadvice
147338,t3_g04l8d,fnfssbp,1,this gives me hope!! what style did you get??,femalefashionadvice
147339,t3_g04l8d,fnat7hw,1,They have a few. Do you these could be daily ...,femalefashionadvice
147340,t3_g04l8d,fna39ga,2,"Great to note, thanks for the tip!",femalefashionadvice


Run to export the df to csv. Careful about overwriting. Use the mode = 'a' line to add to an existing file.


In [6]:
p = Path.cwd() / 'data' / 'commentdf.csv'
#df.to_csv(path_or_buf = p)
df.to_csv(path_or_buf = p, mode = 'a', header=False)

In [5]:
p = Path.cwd() / 'data' / 'commentdf.csv'
df = pd.read_csv(p)

In [6]:
#df['body'] = df['body'].str.lower()
df = df.drop(columns =['Unnamed: 0'])

In [10]:
df2 = brand_check(df,fashion)

In [23]:
pd.set_option("display.max_rows",50, "display.max_columns", None)
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2,3,4,5,6,7,8,9,10,11,12
Unnamed: 0_level_1,match,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
4,4383,,,,,,,,,,,,,Vans
4,4462,,,,,,,,,,J Crew,,,
29,40,,,,,,,,,,J.Crew,,,
43,0,,Gap,,,,,,,,,,,
164,39,,,H&M,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93785,230,,,,,,,,,,,Zara,,
93785,355,,,,,,,,,,,Zara,,
93789,385,,,,,,,,,,,Zara,,
93793,225,,,,,,,,,,,Zara,,


In [29]:
df2[6].value_counts()

Apple    124
Name: 6, dtype: int64