In [95]:
%%writefile ./templates/page.html
<!doctype html>
<html>
  <head>
    <title>Facebook Content Predictor</title>
  </head>
  <body>

    <form action="http://localhost:5000/result" method="POST">
      <p>Content <input type="string" name="content" /></p>
    </form>

  </body>
</html>

Overwriting ./templates/page.html


In [163]:
%%writefile social_predictor.py

import flask
from flask import render_template
app = flask.Flask(__name__)

#-------- MODEL GOES HERE -----------#

import numpy as np
import pandas as pd
import pickle
from sklearn.externals import joblib
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS as esw
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.pipeline import make_pipeline
import seaborn as sns
import io
import regex as re
import matplotlib.pyplot as plt

df = pd.read_csv('Clean_Data_Eng/Final_merged_Eng.csv')

#Selection of Regex functions that clean the post content of links and hashtags
def nohash(x):
    return re.sub(r'#\S+', '',x)
def nohttp(x):
    return re.sub(r'http\S+', '',x)
def nobitly(x):
    return re.sub(r'bit.ly\S+', '',x)
def nolink(x):
    return re.sub(r'\b\w*[/.]\w*\b','',x)
def cleaner(x):
    return nohash(nohttp(nobitly(nolink(x))))

brand_cues =  ['\n','Terms:' ,'See More', 'store','stores',
             'Sainsburys',"Sainsbury’s",'sainsburys',"sainsbury’s",'sainsbury','Sainsbury','nectar','Nectar','Good Living','Tu','tu',
             'Tesco','tesco',"Tesco's",'clubcard','Clubcard','Tesco Extra','Jamie','Jamie Oliver',
             'Waitrose','waitrose','heston','Heston',
             'Lidl','LidlUk','lidl',"lidl's",'lidluk',
             'Heidi','heidi','klum','Klum',
             'Morrisons','morrisons','msstorefinder','nutmeg','Nutmeg',
             'ASDA','asda','Asda','bestchristmasever','George',
             'M&S','MnS','Marks & Spencer','Marks&Spencer','marks and spencer','marksandspencer','marks&spencer'] 

#Function that removes any obvious branding cues (above) from the post content i.e. brand names, celebrities
def debrander(x):
    x = re.sub(r'[,.!]',' ',x).split()
    for word in x:
        if word in brand_cues:
            x.remove(word)
    x = [' {} '.format(elem) for elem in x]
    return ''.join(x) 


class ContentPreprocessor(BaseEstimator, TransformerMixin):
    def __init__(self, cleaner=None,debrander=None,columns_to_drop=None,):
        self.cleaner = cleaner
        self.debrander = debrander
        self.columns_to_drop = columns_to_drop
       
    #function as outlined above
    def _cleaner(self, X):
        try:
            for col in self.cleaner:
                try:
                    subset = X.loc[:,col]
                    subset = subset.apply(cleaner) 
                    X[col] = subset
                except:
                    pass
        except:
            pass
        return X
    
    #function as outlined above
    def _debrander(self, X):
        try:
            for col in self.debrander:
                try:
                    subset = X.loc[:,col]
                    subset = subset.apply(debrander) 
                    X[col] = subset
                except:
                    pass
        except:
            pass
        return X
    
    #drops any unwanted columns
    def _drop_unused_cols(self, X):
        for col in self.columns_to_drop:
            try:
                X = X.drop(col, axis=1)
            except:
                pass
        return X

    def transform(self, X, *args):
        X = self._cleaner(X)
        X = self._debrander(X)
        X = self._drop_unused_cols(X)
        return X

    def fit(self, X,*args):
        return self
    
#set up an instance of our preprocessor class
ContentPrep = ContentPreprocessor(cleaner=['Post_Content'],debrander=['Post_Content'],
                                  columns_to_drop=['Date','Year','All_Responses', 'Comments', 'Shares', 'Views', 'Contains_Link', 'Contains_Video', 'Has_Hashtag', 'Hashtag_Count', 'Likes', 'Response_Rate', 'Comments_Rate', 'Shares_Rate', 'Video_Rate'])

X = ContentPrep.transform(df).Post_Content
y = ContentPrep.transform(df).Brand

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 100,stratify=y, shuffle=True, test_size=0.25)

stopWords = set(list(esw)).union(set(brand_cues))
tvec = TfidfVectorizer(stop_words=stopWords,
                       sublinear_tf=True,
                        max_df=0.25)
# tvec.fit(X_train)
# tvec_mat = tvec.transform(X_train)

# X_train = tvec_mat
# X_test = tvec.transform(X_test)

vc = joblib.load('social_predictor.sav')

# vc_pipe.fit(X_train,y_train)
vc_pipe = Pipeline([('content_prep', ContentPrep),('tfidf_vec', tvec),('logit', vc)])


#-------- ROUTES GO HERE -----------#

@app.route('/')
@app.route('/page')
def page():
    with open("templates/page.html", 'r') as viz_file:
        return viz_file.read()

@app.route('/result', methods=['POST', 'GET'])
def result():
    '''Gets prediction using the HTML form'''
    if flask.request.method == 'POST':
        inputs = flask.request.form
        content = inputs['content']
        score = vc_pipe.predict(content).tolist()[0]
        return score
    
if __name__ == '__main__':
    '''Connects to the server'''

    HOST = '127.0.0.1'
    PORT = 5000

    app.run(HOST, PORT)


Overwriting social_predictor.py


In [169]:
ContentPrep

ContentPreprocessor(cleaner=['Post_Content'],
          columns_to_drop=['Date', 'Year', 'All_Responses', 'Comments', 'Shares', 'Views', 'Contains_Link', 'Contains_Video', 'Has_Hashtag', 'Hashtag_Count', 'Likes', 'Response_Rate', 'Comments_Rate', 'Shares_Rate', 'Video_Rate'],
          debrander=['Post_Content'])

In [168]:
X = ContentPrep.transform(df).Post_Content
y = ContentPrep.transform(df).Brand


0       Bake a festive showstopper with Sainsbury’s ma...
1       Get in the party spirit with Sainsbury’s magaz...
2       Harry and Meghan’s wedding cake maker Claire P...
3       These cookie-cup mince pies are deliciously ch...
4       We need your help to brighten 1 million Christ...
5       Did you know you can make your Christmas puddi...
6       Kids AND adults will love making these cute re...
7       Calling all cheese lovers! Christmas really ha...
8       *Watches on repeat for the kid dressed as a plug*
9       He didn't choose the plug life, the plug life ...
10      🎶We give all we’ve got for the ones we love🎶 🌟❤️🎄
11      Drink your cocktails and eat them too with Sai...
12      Stuff this mulled beef brisket into wraps with...
13      Who do you wear your poppy for? You can rememb...
14      Bake this fabulous chocolate ginger bundt cake...
15      That's right, a YORKSHIRE PUD is starring in t...
16      Get ready for Christmas with Sainsbury’s magaz...
17            

In [158]:
score = vc_pipe.predict([""]).tolist()[0]

In [159]:
score

'Lidl'

In [160]:
vc_pipe.score(X_test,y_test)

0.843198992443325

In [152]:
def nohash(x):
    return re.sub(r'#\S+', '',x)
def nohttp(x):
    return re.sub(r'http\S+', '',x)
def nobitly(x):
    return re.sub(r'bit.ly\S+', '',x)
def nolink(x):
    return re.sub(r'\b\w*[/.]\w*\b','',x)
def cleaner(x):
    return nohash(nohttp(nobitly(nolink(x))))

brand_cues =  ['\n','Terms:' ,'See More', 'store','stores',
             'Sainsburys',"Sainsbury’s",'sainsburys',"sainsbury’s",'sainsbury','Sainsbury','nectar','Nectar','Good Living','Tu','tu',
             'Tesco','tesco',"Tesco's",'clubcard','Clubcard','Tesco Extra','Jamie','Jamie Oliver',
             'Waitrose','waitrose','heston','Heston',
             'Lidl','LidlUk','lidl',"lidl's",'lidluk',
             'Heidi','heidi','klum','Klum',
             'Morrisons','morrisons','msstorefinder','nutmeg','Nutmeg',
             'ASDA','asda','Asda','bestchristmasever','George',
             'M&S','MnS','Marks & Spencer','Marks&Spencer','marks and spencer','marksandspencer','marks&spencer'] 

#Function that removes any obvious branding cues (above) from the post content i.e. brand names, celebrities
def debrander(x):
    x = re.sub(r'[,.!]',' ',x).split()
    for word in x:
        if word in brand_cues:
            x.remove(word)
    x = [' {} '.format(elem) for elem in x]
    return ''.join(x) 


class ContentPreprocessor(BaseEstimator, TransformerMixin):
    def __init__(self, cleaner=None,debrander=None,columns_to_drop=None,):
        self.cleaner = cleaner
        self.debrander = debrander
        self.columns_to_drop = columns_to_drop
       
    #function as outlined above
    def _cleaner(self, X):
        try:
            for col in self.cleaner:
                try:
                    subset = X.loc[:,col]
                    subset = subset.apply(cleaner) 
                    X[col] = subset
                except:
                    pass
        except:
            pass
        return X
    
    #function as outlined above
    def _debrander(self, X):
        try:
            for col in self.debrander:
                try:
                    subset = X.loc[:,col]
                    subset = subset.apply(debrander) 
                    X[col] = subset
                except:
                    pass
        except:
            pass
        return X
    
    #drops any unwanted columns
    def _drop_unused_cols(self, X):
        for col in self.columns_to_drop:
            try:
                X = X.drop(col, axis=1)
            except:
                pass
        return X

    def transform(self, X, *args):
        X = self._cleaner(X)
        X = self._debrander(X)
        X = self._drop_unused_cols(X)
        return X

    def fit(self, X,*args):
        return self
    
#set up an instance of our preprocessor class
ContentPrep = ContentPreprocessor(cleaner=['Post_Content'],debrander=['Post_Content'],
                                  columns_to_drop=['Date','Year','All_Responses', 'Comments', 'Shares', 'Views', 'Contains_Link', 'Contains_Video', 'Has_Hashtag', 'Hashtag_Count', 'Likes', 'Response_Rate', 'Comments_Rate', 'Shares_Rate', 'Video_Rate'])


In [155]:
ContentPrep.transform(df)

Unnamed: 0,Brand,Post_Content
0,Sainsburys,Bake a festive showstopper with Sainsbury’s ma...
1,Sainsburys,Get in the party spirit with Sainsbury’s magaz...
2,Sainsburys,Harry and Meghan’s wedding cake maker Claire P...
3,Sainsburys,These cookie-cup mince pies are deliciously ch...
4,Sainsburys,We need your help to brighten 1 million Christ...
5,Sainsburys,Did you know you can make your Christmas puddi...
6,Sainsburys,Kids AND adults will love making these cute re...
7,Sainsburys,Calling all cheese lovers! Christmas really ha...
8,Sainsburys,*Watches on repeat for the kid dressed as a plug*
9,Sainsburys,"He didn't choose the plug life, the plug life ..."


In [154]:
X

0       Bake a festive showstopper with Sainsbury’s ma...
1       Get in the party spirit with Sainsbury’s magaz...
2       Harry and Meghan’s wedding cake maker Claire P...
3       These cookie-cup mince pies are deliciously ch...
4       We need your help to brighten 1 million Christ...
5       Did you know you can make your Christmas puddi...
6       Kids AND adults will love making these cute re...
7       Calling all cheese lovers! Christmas really ha...
8       *Watches on repeat for the kid dressed as a plug*
9       He didn't choose the plug life, the plug life ...
10      🎶We give all we’ve got for the ones we love🎶 🌟❤️🎄
11      Drink your cocktails and eat them too with Sai...
12      Stuff this mulled beef brisket into wraps with...
13      Who do you wear your poppy for? You can rememb...
14      Bake this fabulous chocolate ginger bundt cake...
15      That's right, a YORKSHIRE PUD is starring in t...
16      Get ready for Christmas with Sainsbury’s magaz...
17            