In [32]:
import os
from dotenv import load_dotenv, find_dotenv
import tweepy as tw
from pathlib import Path
import matplotlib.pyplot as plt
import pandas as pd
from transformers import TextClassificationPipeline, AutoModelForSequenceClassification, AutoTokenizer
import requests

In [33]:
load_dotenv(find_dotenv())
consumer_key = os.getenv('CONSUMER_KEY')
consumer_secret = os.getenv('CONSUMER_SECRET')
access_token = os.getenv('ACCESS_TOKEN')
access_token_secret = os.getenv('ACCESS_TOKEN_SECRET')
bearer_token = os.getenv('BEARER_TOKEN')


In [34]:
# V1
# auth = tw.OAuthHandler(consumer_key, consumer_secret)
# auth.set_access_token(access_token, access_token_secret)
# api = tw.API(auth, wait_on_rate_limit=True)

#V2
client = tw.Client(bearer_token, 
                        return_type = requests.Response,
                        wait_on_rate_limit=True)


In [35]:
#V1
# def get_tweets(max_tweets=5):
#     # Define the search term
#     search_words = "#bitcoin -filter:retweets"
#     # Collect tweets
#     tweets = tw.Cursor(api.search_tweets, q=search_words, lang="en", tweet_mode="extended").items(max_tweets)
#     return [tweet.full_text for tweet in tweets]

#V2
def get_tweets(max_tweets=10):
    # Define the search term
    query = 'bitcoin #BTC -is:retweet'
    
    tweets = client.search_recent_tweets(query=query, tweet_fields=['text'], max_results=max_tweets).json()['data']
    # tweets_dict = tweets.json()
    # tweets_data = tweets_dict['data']
    # print(tweets)

    return [tweet['text'] for tweet in tweets]


In [36]:
posts = get_tweets()
print(posts)

['you ?? The furthest dis#粉丝 #twitter粉丝  #twitter刷粉  #blockchain #btc #eth比特币 以太坊 #Bitcoin#Binance#web#NFTs#NFT#NFTCommunity#NFTGiveaway#Crypto#cryptocutance in the world is not when I stand in', '📈 $BTC / $USDT Longed(Buy) $19396.70\n[01:31:21 UTC] [10:31:21 JST]\n34.356 #BTC ($666393.03 #USDT)\n#BTCUSDT #Bitcoin #Tether #Binance #Futures #BinanceFutures #xCryptoAlert #xCryptoAlert_BTC\n🐳🐳🐳\n\nhttps://t.co/mvjG3b1P2H', 'that you were s#粉丝 #twitter粉丝  #twitter刷粉  #blockchain #btc #eth比特币 以太坊 #Bitcoin#Binance#web#NFTs#NFT#NFTCommunity#NFTGiveaway#Crypto#cryptocu12 o ordinary and ugly', '#Bitcoin Last Price $19416 #BTC 🚀\nDaily Indicators:\n•Variation since 00h00(UTC): +0.01%\n•MACD:\n -MACD Line: -116\n -Signal Line: -187\n\n#Ethereum Last Price $1313 #ETH\n•Variation: -0.16%\n•MACD:\n -MACD Line: -44\n -Signal Line: -55\n\n#Blockchain #Web3\n(2/6)\n282193', 'get your first $15 bonus at  https://t.co/GTFvUmTtEL   #cryptocurrency #crypto #casino #bitcoin #blackjack #btc #gambling #bettin

### Load the model

https://huggingface.co/ElKulako/cryptobert#:~:text=CryptoBERT%20is%20a%20pre%2Dtrained,cryptocurrency%2Drelated%20social%20media%20posts

In [37]:
model_name = "ElKulako/cryptobert"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels = 3)
pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer)

### Get the sentiment scores

In [38]:
# post_1 & post_3 = bullish, post_2 = bearish
preds = pipe(posts)
type(pd.DataFrame(posts))

pandas.core.frame.DataFrame

In [39]:
df = pd.DataFrame(preds)
type(df)

pandas.core.frame.DataFrame

In [40]:
pd.concat([pd.DataFrame(posts), df], axis=1)

Unnamed: 0,0,label,score
0,you ?? The furthest dis#粉丝 #twitter粉丝 #twitte...,Bullish,0.771797
1,📈 $BTC / $USDT Longed(Buy) $19396.70\n[01:31:2...,Neutral,0.568028
2,that you were s#粉丝 #twitter粉丝 #twitter刷粉 #bl...,Bullish,0.708102
3,#Bitcoin Last Price $19416 #BTC 🚀\nDaily Indic...,Bullish,0.462602
4,get your first $15 bonus at https://t.co/GTFv...,Bullish,0.768864
5,2022年10月9日ビットコイン相場分析\nhttps://t.co/Xin7a25D5r\...,Neutral,0.686278
6,Seven Bored Apes Worth Over $700K Stolen - htt...,Bullish,0.609911
7,You're an inch i#粉丝 #twitter粉丝 #twitter刷粉 #b...,Bullish,0.72657
8,"💵4,944 #BTC (96,013,250 USD) move from unknown...",Bullish,0.603553
9,💵💵🌱🌱\nJust letting everyone know. For the Sept...,Bullish,0.68517
