# Conversão de Todos os Arquivos

In [8]:
import pandas as pd
from collections import Counter, OrderedDict
import matplotlib.pyplot as plt
import numpy as np

plt.rcParams['figure.dpi']= 150

veja = pd.read_csv('tables/veja.csv')
estadao = pd.read_csv('tables/estadao.csv')
folha = pd.read_csv('tables/folha.csv')
uol = pd.read_csv('tables/uol.csv')

def getColumnsWithThemes(table):
    return list(filter(lambda column: 'Tema' in column, table.columns))

def getAllThemes(table):
    columnsWithTheme = getColumnsWithThemes(table)
    allThemes = Counter()
    
    for column in columnsWithTheme:
        for theme in table[column]:
            allThemes[theme] += 1
            
    allThemes = dict(allThemes)
    
    return allThemes

def filterTableByYear(table, year):
    return table[table.date.str.contains(year)]

def get2018Table(table):
    return filterTableByYear(table, '2018')
def get2017Table(table):
    return filterTableByYear(table, '2017')
    

vejaThemes = getAllThemes(veja)
estadaoThemes = getAllThemes(estadao)
folhaThemes = getAllThemes(folha)
uolThemes = getAllThemes(uol)

baseThemes = [\
 'saude',\
 'entretenimento',\
 'politica',\
 'esporte',\
 'ciencia',\
 'brasil',\
 'educacao',\
 'tecnologia',\
 'blog',\
 'economia',\
 'outros']

def matchBaseTheme(s):
    for theme in baseThemes:
        if (theme in s):
            return (theme, s)
    return None

def createConversionTable(themes, cleanUpFunction):
    conversionTable = {}

    data = map(\
        lambda x: cleanUpFunction(x),\
        list(themes.keys())\
    )

    for theme, originalTheme in data:
        conversionTable[originalTheme] = theme
        
    return conversionTable

def countThemes(themes, conversionTable):
    quantityOfThemes = Counter()
    
    for theme, quantity in themes.items():
        theme = str(theme)
        quantityOfThemes[conversionTable[theme]] += quantity
        
    return OrderedDict(quantityOfThemes.most_common())

def printTable(table, cleanupFunction, filename, title):
    themesTable = getAllThemes(table)
    conversionTable = createConversionTable(themesTable, cleanupFunction)
    quantityOfThemes = countThemes(themesTable, conversionTable)

    index = np.arange(len(quantityOfThemes.keys()))
    plt.bar(index, quantityOfThemes.values())
    plt.rcParams['figure.dpi']= 300
    plt.xlabel('Tema', fontsize=10)
    plt.ylabel('Quantitidade', fontsize=10)
    plt.xticks(index, quantityOfThemes.keys(), fontsize=8, rotation=50)
    plt.title(title)
    plt.savefig('results/' + filename, bbox_inches='tight')
    plt.cla()
    plt.clf()
    plt.close()
    
def cleanupEstadaoThemes(themeString):
    themeString = str(themeString)
    if (matchBaseTheme(themeString) is not None):
        return matchBaseTheme(themeString)
    if ('emais' in themeString):
        return ('entretenimento', themeString)
    return ('outros', themeString)


def cleanVejaThemes(themeString):
    themeString = str(themeString)
    if (matchBaseTheme(themeString) is not None):
        return matchBaseTheme(themeString)
    
    if ('esporte' in themeString or\
        'placar' in themeString):
        return ('esporte', themeString)
    if ('blog' in themeString):
        return 'blog'
    if ('galeria-fotos' in themeString or\
        'nan' in themeString or\
        'especiais' in themeString or\
        'gastronomia' in themeString or\
        'mundo' in themeString or\
        'ideias' in themeString or\
        '?p=2643144' in themeString or\
        'revista-veja' in themeString or\
        '?p=1833069' in themeString or\
        'tveja' in themeString):
        return ('outros', themeString)
    return (themeString, themeString)


def cleanUolThemes(themeString):
    themeString = str(themeString)
    if (matchBaseTheme(themeString) is not None):
        return matchBaseTheme(themeString)
    
    if (\
        'sport' in themeString or\
        'espn' in themeString or\
        'andrerocha' in themeString or\
        'copadomundo' in themeString or\
        'placar' in themeString or\
        'marcelrizzo' in themeString or\
        'corneta' in themeString\
       ):
        return ('esporte', themeString)
    
    if ('blog' in themeString or\
        'liabock' in themeString or\
        'paulosampaio' in themeString
       ):
        return ('blog', themeString)
    
    if (\
        'jamilchade' in themeString or\
        'poder' in themeString or\
        'carlosmelo' in themeString or\
        'congressoemfoco' in themeString or\
        'josiasdesouza' in themeString or\
        'eleicoes' in themeString\
       ):
        return ('politica', themeString)
    
    if ('noticiasdatv' in themeString or\
        'glamurama' in themeString or\
        'estilo' in themeString or\
        'maxima' in themeString or\
        'chicobarney' in themeString or\
        'revistatrip' in themeString or\
        'chicobarney' in themeString or\
        'mensageirosideral' in themeString or\
        'humor' in themeString or\
        'tvfama' in themeString or\
        'comidasebebeidas' in themeString or\
        'paginacinco' in themeString or\
        'superpop' in themeString or\
        'memes' in themeString or\
        'turismo' in themeString or\
        'virgula' in themeString or\
        'carnaval' in themeString or\
        'tvefamosos' in themeString or\
        'f5' in themeString or\
        'ilustrada' in themeString or\
        'comidasebebidas' in themeString or\
        'cinema' in themeString or\
        'musica' in themeString or\
        'mauriciostycer' in themeString or\
        'caras' in themeString or\
        'cenapop' in themeString or\
        'amauryjr' in themeString or\
        'cenapop' in themeString or\
        'natelinha' in themeString or\
        'recreio' in themeString\
       ):
        return ('entretenimento', themeString)
    
    if ('gizmodo' in themeString or\
        'ciencia' in themeString):
        return ('tecnologia', themeString)
    
    if ('mundo' in themeString or\
        'jornalismo' in themeString or\
        'carros' in themeString or\
        'paranormal' in themeString or\
        'universa' in themeString or\
        'paranaportal' in themeString or\
        'aovivo' in themeString or\
        'uol' in themeString or\
        'piaui' in themeString or\
        'zip' in themeString or\
        'aventurasnahistoria' in themeString or\
        'viagem' in themeString or\
        'cotidiano' in themeString\
       ):
        return ('outros', themeString)
    
    if ('colunas' in themeString or\
        'opiniao' in themeString):
        return ('blog', themeString)
    
    if ('mercado' in themeString or\
        'todosabordo' in themeString):
        return ('economia', themeString)
    
    if ('vivabem' in themeString\
       ):
        return ('saude', themeString)
    
    return ('outros', themeString)
    possibleThemes = themeString.split("#")
    
    if (possibleThemes[0].startswith('www')):
        return (possibleThemes[1], themeString)
    
    return (possibleThemes[0], themeString)

def cleanEstadaoThemes(themeString):
    themeString = str(themeString)
    if (matchBaseTheme(themeString) is not None):
        return matchBaseTheme(themeString)
    if ('emais' in themeString):
        return ('entretenimento', themeString)
    return ('outros', themeString)


def cleanFolhaThemes(themeString):
    themeString = str(themeString)
    if (matchBaseTheme(themeString) is not None):
        return matchBaseTheme(themeString)
    
    if ('poder' in themeString):
        return ('politica', themeString)
    
    if ('mercado' in themeString):
        return ('economia', themeString)
    
    if ('colunas' in themeString or\
        'colunista' in themeString or\
        'opiniao' in themeString
       ):
        return ('blogs', themeString)
    
    return ('outros', themeString)



# Veja

In [10]:
printTable(get2017Table(veja), cleanVejaThemes, 'veja-2017-total.png', 'Noticias Veja 2017')
printTable(get2018Table(veja), cleanVejaThemes, 'veja-2018-total.png', 'Noticias Veja 2018')

# UOL

In [9]:
printTable(get2017Table(uol), cleanUolThemes, 'uol-2017-total.png', 'Noticias UOL 2017')
printTable(get2018Table(uol), cleanUolThemes, 'uol-2018-total.png', 'Noticias UOL 2018')

# Estadao 

In [5]:
printTable(get2017Table(estadao), cleanEstadaoThemes, 'estadao-2017-total.png', 'Noticias Estadão 2017')
printTable(get2018Table(estadao), cleanEstadaoThemes, 'estadao-2018-total.png', 'Noticias Estadão 2018')

# Folha

In [7]:
printTable(get2017Table(folha), cleanFolhaThemes, 'folha-2017-total.png', 'Noticias Folha 2017')
printTable(get2018Table(folha), cleanFolhaThemes, 'folha-2018-total.png', 'Noticias Folha 2018')