In [1]:
import csv
import string
import pandas as pd


#Open the csv file and convert it to a list of dictionaries
def openfileStopWords(filename):
    with open(filename, encoding = 'ascii', errors = 'ignore') as csvfile:
        reader = csv.reader(csvfile)
        contents = [i for row in reader for i in row if i != '']
        csvfile.close()
    return contents

def openfile(filename):
    """
    important indexes for tables:
        "id" = 0
        "postURL" = 1
        "message" = 2
        "cta" = 4
        "intents" = 3
    """
    final = []
    with open(filename, encoding = 'ascii', errors = 'ignore') as csvfile:
        reader = csv.reader(csvfile)
        contents = [row for row in reader]
        csvfile.close()
    tables = contents[0]
    for i in contents[1:]:
        temp_dict = {"id": None, "postURL": None, "message": None, "cta": None, "intent": None }
        temp = i
        temp_dict["id"] = temp[0]
        temp_dict["postURL"] = temp[1]
        temp_dict["message"] = temp[2]
        temp_dict["cta"] = temp[4]
        temp_dict["intent"] = temp[3]
        final.append(temp_dict)
    return final

#stopWords = ["to", "a", "the", "is"]

#Filter the list of the posts depending on the user-inputed intent number
def selectIntent(list_of_dictionary, intent):
    list = [dict for dict in list_of_dictionary if dict['intent'] == intent]
    return list

#Take one message and strip, then make a list of dictionaries of words and its counts within that message

def toWordDict(rawMessage, stopWords):                                    #original
    word_dict = []
    added = False
    message = str.lower(rawMessage)
    translator = str.maketrans('', '', string.punctuation)
    message = message.translate(translator)

    for word in message.split():
        for i in word_dict:
            if i['word'] == word:
                if word in stopWords:
                    added = True
                else:
                    i['score'] = i['score'] + 1
                    added = True
        if added == False and (word in stopWords) == False:
            word_dict.append({'word': word, 'score': 1})
        else:
            added = False
    return word_dict
                                      

#Take a wholeList of words with scores and combine it with one word dictionary extracted from one message
def combine(wholeList, wordDict):
    for i in wordDict:
        dict = list(filter(lambda x: x['word'] == i['word'], wholeList))
        if dict == []:
            wholeList.append({'word': i['word'], 'score': i['score']})
        else:
            foundDict = dict[0]
            foundDict['score'] = foundDict['score'] + i['score']
    return wholeList

test_dict = [{'message': "hi my name! is ellen?", 'intent': "1"},
 {'message': "what is your Name!", 'intent': "1"},
 {'message': "this has a different intent", 'intent': "2"}]

intent_number = ["1", "2", "3", "4"]

def combinecsv():
    to_merge = ['word_counts_intent{}.csv'.format(i+1) for i in range(4)]
    dfs = []
    for filename in to_merge:
        # read the csv, making sure the first two columns are str
        df = pd.read_csv(filename, header=None, converters={0: str, 1: str})
        # throw away all but the first two columns
        df = df.ix[:,:1]
        # change the column names so they won't collide during concatenation
        df.columns = [filename + str(cname) for cname in df.columns]
        dfs.append(df)

    # concatenate them horizontally
    merged = pd.concat(dfs,axis=1)
    # write it out
    merged.to_csv("word_counts_intent_MERGED.csv", header=None, index=None)


def main():
    list_of_dictionary = openfile('data_intent_bucketing.csv')
    stopWords = openfileStopWords('stopWords.csv')
    for number in intent_number:
        list_of_selected = selectIntent(list_of_dictionary, number)
        
        #extract a list of messages from a list of posts
        list_of_messages = [i['message'] for i in list_of_selected]
        list_of_words = []#List of dictionary [{'word': "Donate", 'score': "13"}, ...]

        #extract a list of words/score from a list of messages
        for j in list_of_messages:
            wordDict = toWordDict(j, stopWords)
            list_of_words = combine(list_of_words, wordDict)

        rankedList = sorted(list_of_words, key = (lambda p: -p['score']))

        #export it as a .csv file
        print(rankedList)
        keys = rankedList[0].keys()

        fileName = "word_counts_intent" + number + ".csv"
        with open(fileName, 'w') as output_file:
            dict_writer = csv.DictWriter(output_file, keys)
            dict_writer.writeheader()
            dict_writer.writerows(rankedList)
        

if __name__ == '__main__':
    main()
    combinecsv()

[{'score': 132, 'word': 'i'}, {'score': 121, 'word': 'we'}, {'score': 106, 'word': 'you'}, {'score': 106, 'word': 'in'}, {'score': 104, 'word': 'this'}, {'score': 102, 'word': 'kickstarter'}, {'score': 101, 'word': 'with'}, {'score': 100, 'word': 'for'}, {'score': 75, 'word': 'our'}, {'score': 66, 'word': 'out'}, {'score': 61, 'word': 'rt'}, {'score': 48, 'word': 'about'}, {'score': 48, 'word': 'live'}, {'score': 45, 'word': 'your'}, {'score': 45, 'word': 'have'}, {'score': 41, 'word': 'clonsters'}, {'score': 40, 'word': 'my'}, {'score': 36, 'word': 'new'}, {'score': 36, 'word': 'dream'}, {'score': 35, 'word': 'now'}, {'score': 35, 'word': 'will'}, {'score': 34, 'word': 'time'}, {'score': 33, 'word': 'book'}, {'score': 32, 'word': 'from'}, {'score': 32, 'word': 'make'}, {'score': 32, 'word': 'day'}, {'score': 31, 'word': 'all'}, {'score': 31, 'word': 'more'}, {'score': 30, 'word': 'us'}, {'score': 29, 'word': 'get'}, {'score': 29, 'word': 'were'}, {'score': 29, 'word': 'me'}, {'score':