### load data

In [1]:
import pandas as pd

In [2]:
df = pd.read_excel('SloganData.xlsx')

In [3]:
df.head()

Unnamed: 0,COMPANY,SLOGAN,WORDS
0,3M Company,Science. Applied to life.,4
1,Abbott Laboratories,Life. To the fullest.,4
2,AbbVie,People. Passion. Possibilities.,3
3,Accenture plc,High performance. Delivered.,3
4,Activision Blizzard,We make Great Games,4


In [4]:
df.columns = ['COMPANY', 'SLOGAN', 'WordCount']

### add CharacterCount

In [5]:
df.loc[:, 'CharacterCount'] = df['SLOGAN'].map(lambda x: len(x))

### add WordCount

In [6]:
df.loc[:, 'WordCount'] = df['SLOGAN'].map(lambda x: len(x.split(' ')))

### add UsePunctuation

In [7]:
import re

In [8]:
pattern = """[.,?!:;/<>"]"""
# no commas b/c don't want to count contractions

In [9]:
df.loc[:, 'UsePunctuation'] = df['SLOGAN'].map(lambda x: 1 if re.search(pattern, x) else 0)

### add SentimentScore

In [10]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer as analyser
# scores are 1 if super positive, -1 if super negative, 0 if neutral

In [11]:
sentiment = analyser()

In [12]:
df.loc[:, 'SentimentScore'] = df['SLOGAN'].map(lambda x: sentiment.polarity_scores(x)['compound'])

### add NameIncluded

In [13]:
def in_name(row):
    name = row['COMPANY'].lower().split()
    slogan = row['SLOGAN'].lower().split()
    
    for x in name:
        
        if x in slogan:
            return 1
        
        else:
            pass
        
    return 0

In [14]:
df.loc[:, 'NameIncluded'] = df[['COMPANY', 'SLOGAN']].apply(in_name, axis=1)

### add PossessiveIncluded

In [15]:
possessives = ['my',
               'mine',
               'your',
               'yours',
               'his',
               'hers',
               'our',
               'ours'
               'its',
               'their',
               'theirs']

In [16]:
def poss_included(slogan):
    slogan = slogan.lower()
    for x in possessives:
        if x in slogan:
            return 1
        else:
            pass
    return 0

In [17]:
df.loc[:, 'PossessiveIncluded'] = df['SLOGAN'].map(poss_included)

### add RelativePronounIncluded

In [18]:
rel_pronouns = ['how',
                'that',
                'which',
                'who',
                'whom',
                'whose']

In [19]:
def rel_pronoun(slogan):
    slogan = slogan.lower()
    
    for x in rel_pronouns:
    
        if x in slogan:
            return 1
        
        else:
            pass
    
    return 0

In [20]:
df.loc[:, 'RelativePronounIncluded'] = df['SLOGAN'].map(rel_pronoun)

### add Rhyme

In [21]:
from pronouncing import rhymes as check_rhymes

In [22]:
def return_if_rhymes(slogan):
    slogan = slogan.lower().strip("""!@#$%^&*()"'/-+.,<>~`""").split()
    
    for x in slogan:
        rhymes = check_rhymes(x)
        
        for x in rhymes:
            
            if x in slogan:
                return 1
            
            else:
                pass
            
    return 0

In [23]:
df.loc[:, 'Rhymes'] = df['SLOGAN'].map(return_if_rhymes)

### add Alliteration

In [24]:
def allit(slogan):
    slogan = [x[0] for x in slogan.lower().strip("""!@#$%^&*()"'/-+.,<>~`""").split()]
    
    return 1 if len(set(slogan)) < len(slogan) else 0

In [25]:
df.loc[:, 'Alliteration'] = df['SLOGAN'].map(allit)

### add AlliterationScore

In [26]:
# returns fraction of words that start with the same letter as another word
# ex. "ad alliteration project" = 0.66, "ad alliteration" = 1.00

In [27]:
def allit_score(slogan):
    slogan = [x[0] for x in slogan.lower().strip("""!@#$%^&*()"'/-+.,<>~`""").split()]
    num = len(slogan)
    
    unmatched = 0
    for x in slogan:
        
        if slogan.count(x) >= 2:
            pass
        
        else:
            unmatched += 1
    
    return (num - unmatched) / num

In [28]:
df.loc[:, 'AlliterationScore'] = df['SLOGAN'].map(allit_score)

### CapitalizationScore

In [29]:
# fraction of words in slogan that are capitalized

In [32]:
def cap_score(slogan):
    alph = list(filter(str.isalpha, slogan))
    return sum(map(str.isupper, alph)) / len(alph)

In [33]:
df.loc[:, 'CapitalizationScore'] = df['SLOGAN'].map(cap_score)

### export

In [34]:
df.to_excel('SloganData+DerivedAttributes.xlsx')