In [None]:
import pymongo
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from adtk.detector import AutoregressionAD
from adtk.data import validate_series

In [None]:
client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client["dadosVivamente"]
collection = db["postsAgregadoComRespostas"]

In [None]:
# Filtrando documentos com o atributo maior ou igual a 2
filtro = {"suicida": {"$eq": "2"}}
documentos = collection.find(filtro)

In [None]:
# Transformando os documentos em um DataFrame
df = pd.DataFrame(list(documentos))

In [None]:
# Visualizando o DataFrame
df.head()

In [None]:
df['quantAddPhotoWithOthers'] = df['postStory'].apply(
    lambda x: 1 if isinstance(x, str) and (
            'added a new photo — with' in x.lower() or
            'new photos — with' in x.lower() or
            ('photo to the album:' in x.lower() and 'with' in x.lower() and 'and' in x.lower()) or
            'others' in x.lower()
    ) else 0
)

df['quantAddPhotoWith'] = df['postStory'].apply(
    lambda x: 1 if isinstance(x, str) and (
            'added a new photo — with' in x.lower() or
            'new photos — with' in x.lower() or
            ('photo to the album:' in x.lower() and 'with' in x.lower())
    ) else 0
)

df['quantAddPhoto'] = df['postStory'].apply(
    lambda x: (x.lower().count('added a new photo') +
               x.lower().count('new photos') +
               x.lower().count('photo to the album:')) if isinstance(x, str) else 0
)

df['quantSharedPhoto'] = df['postStory'].apply(
    lambda x: (x.lower().count('shared ') if 'photo' in x.lower() else 0) if isinstance(x, str) else 0
)

df['quantSharedVideo'] = df['postStory'].apply(
    lambda x: (x.lower().count('shared ') if 'video' in x.lower() else 0) if isinstance(x, str) else 0
)

df['quantSharedLink'] = df['postStory'].apply(
    lambda x: x.lower().count('shared a link') if isinstance(x, str) else 0
)

df['quantSharedPost'] = df['postStory'].apply(
    lambda x: (x.lower().count('shared ') if 'post' in x.lower() else 0) if isinstance(x, str) else 0
)

df['quantTimelinePhoto'] = df['postStory'].apply(
    lambda x: x.lower().count('shared a photo to your timeline.') if isinstance(x, str) else 0
)

df['quantTimelineVideo'] = df['postStory'].apply(
    lambda x: x.lower().count('shared a video to your timeline.') if isinstance(x, str) else 0
)

df['quantTimelineWrote'] = df['postStory'].apply(
    lambda x: x.lower().count('wrote on your timeline.') if isinstance(x, str) else 0
)

df['quantTimelineLink'] = df['postStory'].apply(
    lambda x: x.lower().count('a link to your timeline.') if isinstance(x, str) else 0
)

In [None]:
df['postCreated_time'] = pd.to_datetime(df['postCreated_time'])
df.head()

In [None]:
df['data'] = df['postCreated_time'].dt.to_period('M').dt.to_timestamp()
df.head()

In [None]:
df_grouped_data = df.groupby(['id_usuario', 'data']).size().reset_index(name='quantidade')

In [None]:
for usuario_id in df_grouped_data['id_usuario'].unique():
    df_usuario = df_grouped_data[df_grouped_data['id_usuario'] == usuario_id]
    
    df_usuario.set_index('data', inplace=True)

    # Usar AutoregressionAD para detecção de anomalias
    autoreg_ad = AutoregressionAD(n_steps=1, step_size=1, c=3.0)
    anomalies = autoreg_ad.fit_detect()

    # Passo 4: Plotar os resultados
    plt.figure(figsize=(14, 7))

    # Plotar a série temporal
    sns.lineplot(x=s.index.astype(str), y=s.values, label='Quantidade de Posts')

    # Plotar as anomalias
    anomaly_points = s[anomalies == True]
    sns.scatterplot(x=anomaly_points.index.astype(str), y=anomaly_points.values, color='red', s=100, label='Anomalias')

    # Configurar o gráfico
    plt.title(f'Usuário {usuario_id} - Quantidade de Posts por Mês/Ano')
    plt.xlabel('Mês/Ano')
    plt.ylabel('Quantidade de Posts')
    plt.xticks(rotation=45)
    plt.grid(True)
    plt.legend()

    # Mostrar o gráfico
    plt.show()