# COIN Project

Authors:

    - Mona
    - Roman
    - Nick
    - Mateo

# Setup

## Packages

In [1]:
import pandas as pd
import numpy as np
import sqlalchemy as sa
import tweepy
import dotenv
import os
import yaml
import datetime
from pprint import pprint

## Environment variables

In [2]:
dotenv.load_dotenv()

True

In [3]:
with open("../twitter.yml", "r") as f:
    config = yaml.safe_load(f)

In [5]:
config

{'accounts': ['Fridays4future',
  'FridayForFuture',
  'GretaThunberg',
  'luisaneubauer'],
 'hashtags': {'green': ['EndCoal',
   'EndFossilFuels',
   'PeopleNotProfit',
   'NoMoreEmptyPromises',
   'UprootTheSystem',
   'FridaysForFuture',
   'ClimateAction',
   'ClimateJustice',
   'ClimateEmergency',
   'ClimateStrike',
   'SaveThePlanet'],
  'brown': ['climatescam',
   'climatechangehoax',
   'fakeclimate',
   'climatehoax',
   'globalwarmingisahoax'],
  'neutral': ['ClimateCrisis', 'ClimateChange', 'Climate', 'GlobalWarming']}}

## Database connection

In [6]:
username = os.environ["DB_USERNAME"]
password = os.environ["DB_PASSWORD"]
host = os.environ["DB_HOST"]
port = os.environ["DB_PORT"]
name = os.environ["DB_NAME"]

In [7]:
engine = sa.create_engine("mssql+pymssql://{}:{}@{}/{}".format(username, password, host, name))

## Tweepy

In [8]:
client = tweepy.Client(bearer_token=os.environ["BEARER_TOKEN"], wait_on_rate_limit=True)

In [9]:
auth = tweepy.OAuth2BearerHandler(bearer_token=os.environ["BEARER_TOKEN"])
api = tweepy.API(auth)

# Twitter API

## Get UserIDs from Twitter

In [16]:
users = {user: client.get_user(username=user).data.id for user in config["accounts"]}

In [17]:
users

{'Fridays4future': 1053768884732547072,
 'FridayForFuture': 1072187272815149057,
 'GretaThunberg': 1006419421244678144,
 'luisaneubauer': 2689488949}

In [18]:
user_id = users["FridayForFuture"]

## Get tweets from user

In [None]:
def get_user_tweets(user_id, start_time='2017-01-01', end_time: str = None, fmt="%Y-%m-%d"):
    expansions = ["author_id"]
    tweet_fields = ["created_at", "geo", "public_metrics", "text", "context_annotations", "entities"]
    start_time = datetime.datetime.strptime(start_time, fmt)
    if end_time:
        end_time = datetime.datetime.strptime(end_time, fmt)
    else: 
        datetime.datetime.now()
    response = tweepy.Paginator(
        client.get_users_tweets,
        id=user_id,
        start_time=start_time,
        end_time=end_time,
        expansions=expansions,
        tweet_fields=tweet_fields
    ).flatten()
    response = list(response)
    tweets = [
        {
            "id": r.id,
            "author_id": r.author_id,
            "created_at": r.created_at,
            "geo": r.geo,
            "retweet_count": r.public_metrics["retweet_count"],
            "reply_count": r.public_metrics["reply_count"],
            "like_count": r.public_metrics["like_count"],
            "qoute_count": r.public_metrics["quote_count"],
            "text": r.text,
            "entities": str(r.entities),
            "context_annotations": str(r.context_annotations)
        } for r in response
    ]
    tweets = pd.DataFrame(tweets)
    tweets.to_csv("tweets_from_accounts.csv", index=False, header=False, mode="a")
    return tweets

In [None]:
def get_tweets_by_hashtag(hashtag, start_time='2017-01-01', end_time: str = None, fmt="%Y-%m-%d"):
    expansions = ["author_id"]
    tweet_fields = ["created_at", "geo", "public_metrics", "text", "context_annotations", "entities"]
    start_time = datetime.datetime.strptime(start_time, fmt)
    if end_time:
        end_time = datetime.datetime.strptime(end_time, fmt)
    else: 
        datetime.datetime.now()
    response = tweepy.Paginator(
        client.search_all_tweets,
        query="#" + hashtag,
        start_time=start_time,
        end_time=end_time,
        expansions=expansions,
        tweet_fields=tweet_fields
    ).flatten()
    response = list(response)
    tweets = [
        {
            "id": r.id,
            "author_id": r.author_id,
            "created_at": r.created_at,
            "geo": r.geo,
            "retweet_count": r.public_metrics["retweet_count"],
            "reply_count": r.public_metrics["reply_count"],
            "like_count": r.public_metrics["like_count"],
            "qoute_count": r.public_metrics["quote_count"],
            "text": r.text,
            "entities": str(r.entities),
            "context_annotations": str(r.context_annotations)
        } for r in response
    ]
    tweets = pd.DataFrame(tweets)
    tweets.to_csv("tweets_from_hashtags.csv", index=False, header=False, mode="a")
    return tweets

In [None]:
df_usertweets = get_user_tweets(user_id)

In [None]:
df_hashtags = get_tweets_by_hashtag("ClimateAction", start_time="2022-05-10")

In [None]:
start = datetime.datetime(2017, 5, 1)
end = datetime.datetime(2022, 5, 18)
delta = datetime.timedelta(days=7)

In [None]:
for i in range((end - start).days):
    start_date = end - delta
    end_date = end
    end -= delta
    get_tweets_by_hashtag("ClimateAction", start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d"))
    print(start_date, end_date)
    if i > 5:
        break

In [None]:
datetime.datetime.today().date()

In [52]:
pd.read_csv("data/tweets_20220519_1247.csv", on_bad_lines="warn")

Unnamed: 0,author_id,conversation_id,created_at,geo,id,text,retweet_count,reply_count,like_count,quote_count,media_keys
0,1068337838,1527219801147461632,2022-05-19 09:29:09+00:00,,1527219801147461632,Dieses Video werden die Aktionär:innen der @De...,50.0,15.0,295.0,2.0,
1,1068337838,1527214056746229773,2022-05-19 09:06:20+00:00,,1527214056746229773,Ein ganz normaler Tag im Klimakollaps: Mit de...,215.0,100.0,1427.0,5.0,
2,1068337838,1527209125851037696,2022-05-19 08:46:44+00:00,,1527209125851037696,"RT @350Deutschland: In Germany, the @KoalaKoll...",36.0,0.0,0.0,0.0,
3,1068337838,1527207260015013889,2022-05-19 08:39:19+00:00,,1527207260015013889,"RT @350Deutschland: 1200 km Ölpipeline, 216000...",31.0,0.0,0.0,0.0,
4,1068337838,1527204602030047232,2022-05-19 08:28:45+00:00,,1527204602030047232,RT @350Deutschland: Die @DeutscheBank untersch...,23.0,0.0,0.0,0.0,
...,...,...,...,...,...,...,...,...,...,...,...
30742,1006419421244678144,1007142657720836096,2018-06-14 06:08:16+00:00,,1007142657720836096,RT @daniel_bensson: Obey the law Move the mon...,15.0,0.0,0.0,0.0,
30743,1006419421244678144,1007142570839957504,2018-06-14 06:07:55+00:00,,1007142570839957504,RT @PerArnesjo: Slutsatsen: kött och mjölkprod...,18.0,0.0,0.0,0.0,
30744,1006419421244678144,1006780456539389953,2018-06-13 06:09:00+00:00,,1006780456539389953,"RT @circulareconomy: For example, the fashion ...",70.0,0.0,0.0,0.0,
30745,1006419421244678144,1006584165947576323,2018-06-12 17:09:01+00:00,,1006584165947576323,RT @ParHolmgren: Varmare atmosfär och världsha...,56.0,0.0,0.0,0.0,
