## free-marketing-watch
Search social media for mentions of brands and collect the comments/tweets/etc.
Count mentions of each and perform sentiment analysis on the strings.

In [1]:
import praw
import pandas as pd
from secrets import *
from pathlib import Path
from brands import fashion
import re

In [2]:
reddit = praw.Reddit(client_id=client_id,
               client_secret=client_secret,
               user_agent=user_agent)

Now to get the comments data, put it in a dataframe, and clean the data to get what we want.

In [3]:
def create_comments_df(subreddit_):
    """Returns a pandas df with the information about comments from this year.

    Inputs
    -----
    str: subreddit to be searched.
    Return
    ------
    Pandas dataframe with all the data from the praw object.m
    """
    subreddit = reddit.subreddit(subreddit_)
    submission_list = subreddit.search(
        'WAYWT',sort='new', time_filter="year", limit=1000
    )  # generator of submissions in the subreddit
    comment_list = []
    for submission in submission_list:
        submission.comments.replace_more(limit=0)
        for comment in submission.comments.list():
            comment_list.append(comment)

    df = pd.DataFrame([vars(comment) for comment in comment_list])
    df2 = df.loc[:,['link_id','id','score','body']]
    df2['Subreddit'] = subreddit_
    return df2


In [4]:
def brand_check(df,brandlist):
    """Checks comment body against a list of brands to see if it mentions any.
       Adds what brand was found if any in the brands column.
       
       Inputs
       ------
       Dataframe you will search over and a list of brands in a separate file. 
       Return
       ------
       Dataframe with column indicating what brand was found in the values.
       """

    df2 = df.body.str.extractall(pat=brandlist, flags=re.VERBOSE)
    df2.dropna(axis=0,how='all',inplace=True)
    return df2

### This takes a long time, probably around 30 minutes per 100,000 comments.

In [None]:
df1 = create_comments_df('femalefashionadvice')
df1

Run to export the df to csv. Careful about overwriting. Use the mode = 'a' line to add to an existing file.


In [None]:
p = Path.cwd() / 'data' / 'waywtdf.csv'
#df1.to_csv(path_or_buf = p)
df1.to_csv(path_or_buf = p, mode = 'a', header=False)

In [5]:
p = Path.cwd() / 'data' / 'waywtdf.csv'
df = pd.read_csv(p)
df

Unnamed: 0.1,Unnamed: 0,link_id,id,score,body,Subreddit
0,0,t3_klxncy,ghc7s6s,12,Really like the crooklyn hat and Santa coat gu...,malefashionadvice
1,1,t3_klxncy,ghbh5m6,4,Thank you to everyone who participated [in yes...,malefashionadvice
2,2,t3_klxncy,ghczbnz,11,Finally a real and unbiased member of the publ...,malefashionadvice
3,3,t3_klxncy,ghdw1as,3,"I believe that's Frank Rossitano. Yeah, probab...",malefashionadvice
4,4,t3_klxncy,ghcr9kf,2,Add funny to that list. What a freaking all star.,malefashionadvice
...,...,...,...,...,...,...
33802,13370,t3_eif7u5,fcrvhlu,3,thank you! i think i got it at hot topic many ...,femalefashionadvice
33803,13371,t3_eif7u5,fct07im,1,"Hahaha I did not! Class of '09 in NC, but say ...",femalefashionadvice
33804,13372,t3_eif7u5,fct28pu,5,"I will! Your aesthetic reminds me of her, too,...",femalefashionadvice
33805,13373,t3_eif7u5,fct406p,2,a solid aspiration! i wonder if she still carr...,femalefashionadvice


If you prefer pickling over csv, use these cells for IO

In [None]:
p = Path.cwd() / 'data' / 'commentdf.pkl'
df.to_pickle(path = p)

In [None]:
df = pd.read_pickle(filepath_or_buffer=p)

If you read from CSV it created an extra column so this cell drops it.

In [6]:
df = df.drop(columns =['Unnamed: 0'])

In [7]:
df2 = brand_check(df,fashion)

### Drop any columns that didn't find a match.

In [8]:
pd.set_option("display.max_rows",50, "display.max_columns", None)
df2.dropna(axis=1,how='all',inplace=True)
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,uniqlo,Gap,HM,Levis,Carhartt,BrooksBrothers,Apple,Patagonia,Everlane,JCrew,Zara,Target,Vans,BananaRepbulic,OldNavy,SaintLaurent,Prada,CanadaGoose,CommonProjects,AllendEdmonds,Nike,Adidas,Abercrombie,Amazon,CalvinKlein,NorthFace,RalphLauren,Lacoste,TommyHilfiger,Hollister,LLBean,LouisVuitton,TomFord,Gucci,Burberry,AmericanApparel,Uggs,Express,Asket,Birkenstocks,ClubMonaco,ScotchandSoda,NextLevel,JCPenney,Costco,DarnTough,NorseProjects,ReigningChamp,NewBalance,Frye,Yeezy,MichaelKors,RedWings,Lululemon,Marmot,Puma,EddieBauer,Pendleton,ThreeSixteen,Bonobos,EngineeredGarments,Outlier,Armani,Gildan,AmericanEagle,Columbia,Converse,Dockers,Balenciaga,Juicy,Champion,LandsEnd,Viberg,Aldens,Clarks,ChuckTaylors,VictoriaSecret,Dickies,SteveMadden,Thrifted,Margiela,Visvim,Loft,AnnTaylor,PullsandBears,Madewell,KennethCole,Mango,Modcloth,DocMartens,KateSpade,Stradivarius,MarcJacobs,Allsaints,Primark,EileenFisher,Aaritzia,Topshop,Naturalizer,SpierandMackay,RogueTerritory,Muji,Arcteryx,Paraboots,Blundstone,StanRays,Deveaux,Epaulet,BergsandBergs,ONI,GrantStone,EvanKinori,NakedandFamous,Supreme,Drakes
Unnamed: 0_level_1,match,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1
20,38,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,puma,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
64,1119,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Puma,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
66,114,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,margiela,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
68,310,,,,,,,,,,,,,,,,,,,,,,,,Amazon,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
68,479,,,,,,,,,,,,,,,,,,,,,,,,Amazon,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33773,222,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,LV,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
33774,1795,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Birkenstocks,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
33777,700,,,H&M,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
33778,4415,,,,,,,,,,,,,,BR,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [9]:
overall_counts = df2.count().sort_values(ascending=False)

### Splitting up the male and female subreddits to get demographic data.

In [10]:
filt = (df['Subreddit'] == 'malefashionadvice')
mfa_df = df[filt]
filt2 = (df['Subreddit'] == 'femalefashionadvice')
ffa_df = df[filt2]

In [11]:
mfa_df = brand_check(mfa_df,fashion)
ffa_df = brand_check(ffa_df,fashion)

In [12]:
mfa_df.dropna(axis=1,how='all',inplace=True)
ffa_df.dropna(axis=1,how='all',inplace=True)

In [13]:
mfa_counts = mfa_df.count().sort_values(ascending=False)
ffa_counts = ffa_df.count().sort_values(ascending=False)

In [14]:
pd.set_option("display.max_rows",117)
counts_df = pd.concat([overall_counts,mfa_counts,ffa_counts],axis='columns',sort=False)

## Final dataframe with the number of mentions for each brand split into overall, r/malefashionadvice, and r/femalefashionadvice. Ready for data visualization.

In [20]:
counts_df.columns = ['Total','r/malefashionadvice','r/femalefashionadvice']
counts_df

Unnamed: 0,Total,r/malefashionadvice,r/femalefashionadvice
uniqlo,1077,801.0,276.0
Thrifted,881,434.0,447.0
Levis,425,327.0,98.0
JCrew,282,241.0,41.0
HM,249,107.0,142.0
BananaRepbulic,206,148.0,58.0
Target,192,77.0,115.0
Converse,175,126.0,49.0
Nike,174,140.0,34.0
Madewell,164,22.0,142.0
