In [None]:
import pymongo
import pandas as pd
import re
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

In [None]:
client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client["dadosVivamente"]
collection = db["postsComPerguntasComInfos"]

In [None]:
# Filtrando documentos com o atributo nivel maior ou igual a 2
filtro = {"nivel": {"$gte": 2}}
documentos = collection.find(filtro)

In [None]:
# Transformando os documentos em um DataFrame
df = pd.DataFrame(list(documentos))

In [None]:
df['quantProfile'] = df['postStory'].apply(
    lambda x: len(re.findall(r'updated (his|her) profile picture', x.lower())) if isinstance(x, str) else 0
)

df['quantCover'] = df['postStory'].apply(
    lambda x: len(re.findall(r'updated (his|her) cover photo', x.lower())) if isinstance(x, str) else 0
)

df['quantAddPhotoWithOthers'] = df['postStory'].apply(
    lambda x: 1 if isinstance(x, str) and (
            'added a new photo — with' in x.lower() or
            'new photos — with' in x.lower() or
            ('photo to the album:' in x.lower() and 'with' in x.lower() and 'and' in x.lower()) or
            'others' in x.lower()
    ) else 0
)

df['quantAddPhotoWith'] = df['postStory'].apply(
    lambda x: 1 if isinstance(x, str) and (
            'added a new photo — with' in x.lower() or
            'new photos — with' in x.lower() or
            ('photo to the album:' in x.lower() and 'with' in x.lower())
    ) else 0
)

df['quantAddPhoto'] = df['postStory'].apply(
    lambda x: (x.lower().count('added a new photo') +
               x.lower().count('new photos') +
               x.lower().count('photo to the album:')) if isinstance(x, str) else 0
)

df['quantFeelThank'] = df['postStory'].apply(
    lambda x: x.lower().count('feeling thankful with') if isinstance(x, str) else 0
)

df['quantFeelBliss'] = df['postStory'].apply(
    lambda x: x.lower().count('feeling blissful with') if isinstance(x, str) else 0
)

df['quantFeelExcited'] = df['postStory'].apply(
    lambda x: x.lower().count('feeling excited with') if isinstance(x, str) else 0
)

df['quantSharedPhoto'] = df['postStory'].apply(
    lambda x: (x.lower().count('shared ') if 'photo' in x.lower() else 0) if isinstance(x, str) else 0
)

df['quantSharedVideo'] = df['postStory'].apply(
    lambda x: (x.lower().count('shared ') if 'video' in x.lower() else 0) if isinstance(x, str) else 0
)

df['quantSharedLink'] = df['postStory'].apply(
    lambda x: x.lower().count('shared a link') if isinstance(x, str) else 0
)

df['quantSharedPost'] = df['postStory'].apply(
    lambda x: (x.lower().count('shared ') if 'post' in x.lower() else 0) if isinstance(x, str) else 0
)

df['quantSharedEvent'] = df['postStory'].apply(
    lambda x: (x.lower().count('shared ') if 'event' in x.lower() else 0) if isinstance(x, str) else 0
)

df['quantSharedMemmory'] = df['postStory'].apply(
    lambda x: x.lower().count('shared a memory') if isinstance(x, str) else 0
)

df['quantStatus'] = df['postStory'].apply(
    lambda x: len(re.findall(r'updated (his|her) status', x.lower())) if isinstance(x, str) else 0
)

df['quantRelationWith'] = df['postStory'].apply(
    lambda x: x.lower().count('relationship with') if isinstance(x, str) else 0
)

df['quantLifeEvent'] = df['postStory'].apply(
    lambda x: x.lower().count('added a life event from') if isinstance(x, str) else 0
)

df['quantTimelinePhoto'] = df['postStory'].apply(
    lambda x: x.lower().count('shared a photo to your timeline.') if isinstance(x, str) else 0
)

df['quantTimelineVideo'] = df['postStory'].apply(
    lambda x: x.lower().count('shared a video to your timeline.') if isinstance(x, str) else 0
)

df['quantTimelineWrote'] = df['postStory'].apply(
    lambda x: x.lower().count('wrote on your timeline.') if isinstance(x, str) else 0
)

df['quantTimelineLink'] = df['postStory'].apply(
    lambda x: x.lower().count('a link to your timeline.') if isinstance(x, str) else 0
)

In [None]:
# df.head()

# Salvar o DataFrame em um arquivo CSV
df.to_csv('postsComPerguntasComInfos.csv', index=False)

In [None]:
df.head()

In [None]:
df['postCreated_time'] = pd.to_datetime(df['postCreated_time'])
df = df.sort_values(by='postCreated_time')

In [None]:
df['data'] = df['postCreated_time'].dt.date

In [None]:
df_grouped = df.groupby(['id_usuario', 'data']).size().reset_index(name='quantidade')

In [None]:
usuario_id = df_grouped['id_usuario'].iloc[0]
df_usuario = df_grouped[df_grouped['id_usuario'] == usuario_id]

In [None]:
# Configurando a série temporal
df_usuario.set_index('data', inplace=True)

In [None]:
# Análise ACF e PACF para ajudar a identificar p, d, q
plot_acf(df_usuario['quantidade'])
plot_pacf(df_usuario['quantidade'])
plt.show()

In [None]:
# Definindo o modelo ARIMA (p, d, q)
model = ARIMA(df_usuario['quantidade'], order=(1, 1, 1))
model_fit = model.fit()

In [None]:
# Resumo do modelo
print(model_fit.summary())

In [None]:
# Previsão
forecast = model_fit.forecast(steps=30)  # Prevendo os próximos 30 dias
print(forecast)

In [None]:
# Visualizando a previsão
plt.figure(figsize=(10, 6))
plt.plot(df_usuario.index, df_usuario['quantidade'], label='Observado')
plt.plot(forecast.index, forecast, label='Previsto', color='red')
plt.title(f'Previsão de Postagens para o Usuário {usuario_id}')
plt.xlabel('Data')
plt.ylabel('Contagem de Postagens')
plt.legend()
plt.show()