In [None]:
import pymongo
import pandas as pd

DB_CONN_STRING = "mongodb://localhost:27017/"
DB_NAME = "recent-poland-covid-vaccine-tweets"
COLLECTION_NAME = "tweets"

mongo_client = pymongo.MongoClient(DB_CONN_STRING)
database_name = mongo_client[DB_NAME]
data_collection = database_name[COLLECTION_NAME]

projection = {"date":1, 'sentiment':1}
tweets = [tweet for tweet in data_collection.find({},projection)]

df = pd.DataFrame(tweets)

In [None]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime

# all_dates = [datetime.datetime.strptime(d[:10], "%Y-%m-%d").date() for d in df['date']]
# all_setiment = [s for s in df['sentiment']]

df_2 = df[['date','sentiment']]
df_2['date'] = df_2['date'].apply(lambda d: datetime.datetime.strptime(d[:10], "%Y-%m-%d").date())
df_2['date'] = df_2.loc[df_2['date'] > datetime.date(2021, 1, 30)]
df_2['sentiment'] = df_2['sentiment'].apply(lambda sentiment: float(sentiment))
df2_grouped = df_2.groupby(['date']).sum()

In [None]:
x_values = df2_grouped.index
y_values = df2_grouped['sentiment']

fig, ax = plt.subplots(figsize=(15,10))
formatter = mdates.DateFormatter("%Y-%m-%d")
ax.xaxis.set_major_formatter(formatter)
locator = mdates.MonthLocator()
ax.xaxis.set_major_locator(locator)


plt.xlabel("Data")
plt.ylabel("Sumaryczny sentyment")
plt.title("Sumaryczny sentyment tweetów")

plt.plot(x_values, y_values, '#1da1f2')
fig.autofmt_xdate()
# plt.xticks(rotation=45)
plt.show()

In [None]:
x_values[:10]

### Narzędzie do analizy czasowej 

In [None]:
# from_ = datetime.date(2021, 3, 19)
# to_ = datetime.date(2021, 3, 29)

In [None]:
def get_positive_tweets_df(df):
    return df[df['sentiment'] > 0]

def get_neutral_tweets_df(df):
    return df[df['sentiment'] == 0]

def get_negative_tweets_df(df):
    return df[df['sentiment'] < 0]

def get_tweets_count_by_date(df2, from_, to_):
    df2 = df2.assign(sentiment=1)
    df2_grouped = df2.groupby(['date']).sum()

    df2_grouped = df2_grouped.loc[df2_grouped.index > from_]
    df2_grouped = df2_grouped.loc[df2_grouped.index < to_]

    x = df2_grouped.index
    y = df2_grouped['sentiment']
    return x, y

In [None]:
def atom_plot(ax, df, date, event):
    from_ = date - datetime.timedelta(5)
    to_ = date + datetime.timedelta(5)
    
    x_positive, y_positive = get_tweets_count_by_date(get_positive_tweets_df(df), from_, to_)
    x_neutral, y_neutral = get_tweets_count_by_date(get_neutral_tweets_df(df), from_, to_)
    x_negative, y_negative = get_tweets_count_by_date(get_negative_tweets_df(df), from_, to_)
    
    ax.plot(x_positive, y_positive, '#13bd00')
    ax.plot(x_neutral, y_neutral, '#ffe100')
    ax.plot(x_negative, y_negative, '#b50000')
    
    ax.set_title(event)
    ax.set_xticklabels([from_] + ["" for i in range(3)]+ [date] + ["" for i in range(3)] + [to_], rotation=75)


In [None]:
def plot_for_dates(df, dates, events):
    n = len(dates)
    fig, ax = plt.subplots(nrows=n//4+1, ncols=4, figsize=(16,(n//4+1)*4 ), sharey=True)
    for i in range(n):
        atom_plot(ax[i//4][i%4], df, dates[i], events[i])
    for i in range(n, (n//4+1)*4):
        ax[i//4][i%4].axis('off')
    plt.subplots_adjust(left=0.1,
                    bottom=0.1, 
                    right=0.9, 
                    top=0.9, 
                    wspace=0.4, 
                    hspace=0.8)

In [None]:
dates_and_events = [
    (datetime.date(2021, 2, 5), "Ogłoszenie luzowania obostrzeń od 12.02"),
    (datetime.date(2021, 2, 12), "Luzowanie obostrzeń"),
    (datetime.date(2021, 2, 24), "Przedłużenie poprzednich obostrzeń"),
    (datetime.date(2021, 3, 5), "Zaostrzenie zasad na Pomorzu"),
    (datetime.date(2021, 3, 11), "Nowe rejony z obostrzeniami"),
    (datetime.date(2021, 3, 17), "Nowe zasady w całej PL"),
    (datetime.date(2021, 3, 20), "Przyspieszenie rejestracji\n na szczepienia dla osób 60+"),
    (datetime.date(2021, 3, 24), "Rekordowa liczba zakażeń"),
    (datetime.date(2021, 4, 1), "Decyzja Ministerstwa Zdrowia\n ws. szczepionki AstraZeneca"),
    (datetime.date(2021, 4, 14), "Oświadczenie episkopatu\n ws. moralności AstraZeneca"),
]

dates = list(map(lambda x: x[0], dates_and_events))
events = list(map(lambda x: x[1], dates_and_events))
plot_for_dates(df_2, dates, events)