# NFT Authenticity and Prediction using Sentiment Analysis and Deep Learning

## Imports

In [7]:
import json
import asyncio
import tqdm
import tqdm.asyncio
import nest_asyncio
from sklearn.naive_bayes import GaussianNB
from sklearn import svm
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn import metrics
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
import pandas as pd
nest_asyncio.apply()
import os
from dotenv import load_dotenv
load_dotenv()
from pprint import pprint
from datetime import datetime
from dateutil.relativedelta import relativedelta
from aiohttp import ClientSession, TCPConnector
from matplotlib import pyplot
import tweepy

## Data Collection

Sample Data

In [11]:
sample_slugs = ['evolved-apes-inc','oraclenft','boredapeyachtclub','neo-tokyo-identities','cool-cats-nft','cryptopunks','veefriends']
slug_date = {}
slug_price = {}

### OpenSea Metrics from Collection Slugs

Asynchronous Data Collection

In [12]:
async def fetchData(url,session):
    async with session.get(url) as response:
        try:
            response = await response.read()
            assert 'detail' not in json.loads(response.decode('UTF-8'))
            return response
        except:
            await asyncio.sleep(5)
            return await fetchData(url,session)

async def transactionScrape():
    tasks = []
    responses = []
    connector = TCPConnector(limit_per_host=1)
    url = "https://api.opensea.io/api/v1/events?collection_slug={slug}&only_opensea=false&event_type=successful&limit=300&occurred_after={start}&occurred_before={end}"
    headers={"Accept": "application/json", "X-API-KEY": os.getenv('OPENSEA_API_KEY')}
    async with ClientSession(connector=connector, headers=headers) as session:
        for i in range(len(sample_slugs)):
            start_date = datetime.fromisoformat(slug_date[sample_slugs[i]].split('T')[0]) - relativedelta(days=1)
            for j in range(14):
                start_date = start_date + relativedelta(days=1)
                end_date = start_date + relativedelta(days=1) 
                task = asyncio.ensure_future(fetchData(url.format(slug=sample_slugs[i],start=start_date.timestamp(), end=end_date.timestamp()),session))
                tasks.append(task)
        for f in tqdm.tqdm(asyncio.as_completed(tasks), total=len(tasks)):
            responses.append(await f)
    for response in responses:
        response = json.loads(response.decode('utf8'))
        try:
            collection_slug = response['asset_events'][0]['collection_slug'] 
            for event in response['asset_events']:
                slug_price[collection_slug]['total_volume'] += float(event['total_price'])/1000000000000000000
                slug_price[collection_slug]['num_transactions'] += 1
        except:
            pass

async def collectionScrape():
    tasks = []
    responses = []
    connector = TCPConnector()
    url = "https://api.opensea.io/api/v1/collection/{}"
    async with ClientSession(connector=connector) as session:
        for i in range(len(sample_slugs)):
            task = asyncio.ensure_future(fetchData(url.format(sample_slugs[i]),session))
            tasks.append(task)
        for f in tqdm.tqdm(asyncio.as_completed(tasks), total=len(tasks)):
            responses.append(await f)
    for response in responses:
        response = json.loads(response.decode('utf8'))
        name = response['collection']['name']
        floor = response['collection']['stats']['floor_price']
        volume = response['collection']['stats']['total_volume']
        date_created = response['collection']['created_date']
        slug_date[response['collection']['slug']] = date_created
        slug_price[response['collection']['slug']] = {'total_volume' : 0, 'num_transactions': 0 }
        print(f'{name}: Floor: {floor}ETH --- Total Volume: {volume}ETH --- Created {date_created}')


Run Collection Script

In [13]:
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(collectionScrape())
loop.run_until_complete(future)

100%|██████████| 7/7 [00:00<00:00, 34.15it/s]

Cool Cats NFT: Floor: 7.95ETH --- Total Volume: 58832.68086868852ETH --- Created 2021-06-27T09:03:35.403074
Evolved Apes Inc: Floor: 0.0025ETH --- Total Volume: 1155.893264119258ETH --- Created 2021-09-23T12:30:03.410309
Neo Tokyo Identities: Floor: 16.45ETH --- Total Volume: 11202.127364958142ETH --- Created 2021-10-04T15:14:32.376341
Bored Ape Yacht Club: Floor: 51.98ETH --- Total Volume: 248435.61510895318ETH --- Created 2021-04-22T23:14:03.967121
VeeFriends: Floor: 7.948ETH --- Total Volume: 32010.294588127756ETH --- Created 2021-05-11T18:22:19.398578
CryptoPunks: Floor: NoneETH --- Total Volume: 733080.3212493034ETH --- Created 2019-04-26T22:13:09.691572
Oracle (official): Floor: 0.0ETH --- Total Volume: 27.59211999999997ETH --- Created 2021-09-22T10:53:26.948567





Run Transaction Collections

In [14]:
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(transactionScrape())
loop.run_until_complete(future)
pprint(slug_price)

100%|██████████| 98/98 [05:17<00:00,  3.24s/it]


{'boredapeyachtclub': {'num_transactions': 1493,
                       'total_volume': 1036.042150458877},
 'cool-cats-nft': {'num_transactions': 3000, 'total_volume': 1348.559469115312},
 'cryptopunks': {'num_transactions': 112, 'total_volume': 50.751900000000006},
 'evolved-apes-inc': {'num_transactions': 2968,
                      'total_volume': 359.1697327089688},
 'neo-tokyo-identities': {'num_transactions': 541,
                          'total_volume': 5044.5502018807865},
 'oraclenft': {'num_transactions': 465, 'total_volume': 20.559649999999987},
 'veefriends': {'num_transactions': 86, 'total_volume': 204.1384013352507}}


In [8]:
pprint(slug_price)

{'cool-cats-nft': {'num_transactions': 900, 'total_volume': 60.342537256013955},
 'cryptopunks': {'num_transactions': 3, 'total_volume': 2.7},
 'evolved-apes-inc': {'num_transactions': 2028,
                      'total_volume': 320.1946983101896},
 'neo-tokyo-identities': {'num_transactions': 463,
                          'total_volume': 4264.695525091693},
 'veefriends': {'num_transactions': 10, 'total_volume': 14.500000000024919}}


### Twitter Historical Tweet Collection

In [19]:
consumer_key = os.getenv('CONSUMER_KEY')
consumer_secret = os.getenv('CONSUMER_SECRET')
access_token = os.getenv('ACCESS_TOKEN')
access_token_secret = os.getenv('ACCESS_TOKEN_SECRET')

In [20]:
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

api = tweepy.API(auth)

In [22]:
collections = pd.read_csv("collections.csv")["name"]
print(collections)

0       Bored Ape Yacht Club
1           Evolved Apes NFT
2                 Oracle NFT
3       Neo Tokyo Identities
4              Cool Cats NFT
5             Hype Hippo NFT
6             Veefriends NFT
7            The Sandbox NFT
8                 Apymon NFT
9             CyberKongz NFT
10               Meebits NFT
11        World of Women NFT
12     Mutant Human Club NFT
13        The Doge Pound NFT
14        Creature World NFT
15         Angry Anglers NFT
16                CloneX NFT
17               Doodles NFT
18      Panda Golf Squad NFT
19        Pepsi Mic Drop NFT
20         Miss Universe NFT
21    CryptoBull Society NFT
22      Chromie Squiggle NFT
23    Farmer Apes (FAYC) NFT
24          Jadu Jetpack NFT
25            Lazy Lions NFT
26          CryptoVoxels NFT
27           Doge Battle NFT
Name: name, dtype: object


In [23]:
def get_tweets(proj):
    filename = "./data/" + "_".join(proj.split(" ")) + ".csv"
    print("Collecting tweets for:", proj)
    print("Writing results to", filename)

    resp = api.search_full_archive("prod", proj)

    tweets = []
    timestamps = []

    for status in resp:
        tweets.append(status.text)
        timestamps.append(status.created_at)

    df = pd.DataFrame({
            "tweets": pd.Series(tweets),
            "timestamps": pd.Series(timestamps)
            })

    df.to_csv(filename)

In [25]:
for proj in collections:
    get_tweets(proj)

Collecting tweets for: Bored Ape Yacht Club
Writing results to ./data/Bored_Ape_Yacht_Club.csv
Collecting tweets for: Evolved Apes NFT
Writing results to ./data/Evolved_Apes_NFT.csv
Collecting tweets for: Oracle NFT
Writing results to ./data/Oracle_NFT.csv
Collecting tweets for: Neo Tokyo Identities
Writing results to ./data/Neo_Tokyo_Identities.csv
Collecting tweets for: Cool Cats NFT
Writing results to ./data/Cool_Cats_NFT.csv
Collecting tweets for: Hype Hippo NFT
Writing results to ./data/Hype_Hippo_NFT.csv
Collecting tweets for: Veefriends NFT
Writing results to ./data/Veefriends_NFT.csv
Collecting tweets for: The Sandbox NFT
Writing results to ./data/The_Sandbox_NFT.csv
Collecting tweets for: Apymon NFT
Writing results to ./data/Apymon_NFT.csv
Collecting tweets for: CyberKongz NFT
Writing results to ./data/CyberKongz_NFT.csv
Collecting tweets for: Meebits NFT
Writing results to ./data/Meebits_NFT.csv
Collecting tweets for: World of Women NFT
Writing results to ./data/World_of_Wome

## Classification Models

### Gaussian Naive Bayes

In [1]:
def predictNB(features, labels):
    scores = cross_val_score(GaussianNB(), features, labels, scoring='accuracy', cv=10)
    return scores.mean()

### Support Vector Machine (SVM)

In [None]:
def predictSVM(train_x, test_x, train_y, test_y):
    model = svm.SVC()
    model.fit(train_x,train_y)
    y_pred = model.predict(test_x)
    return metrics.accuracy_score(test_y, y_pred)

### Multilayer Perceptron (MLP)

In [1]:
def predictMLP(train_x, test_x, train_y, test_y):
    n_features = train_x.shape[1]
    model = Sequential()
    model.add(Dense(10, activation='relu', kernel_initializer='he_normal', input_shape=(n_features,)))
    model.add(Dropout(0.2))
    model.add(Dense(8, activation='relu', kernel_initializer='he_normal'))
    model.add(Dropout(0.2))
    model.add(Dense(8, activation='relu', kernel_initializer='he_normal'))
    model.add(Dropout(0.2))
    model.add(Dense(5, activation='relu', kernel_initializer='he_normal'))
    model.add(Dropout(0.2))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    history = model.fit(train_x, train_y, epochs=150, batch_size=32, verbose=0)
    pyplot.title('Learning Curve')
    pyplot.xlabel('Epoch')
    pyplot.ylabel('Binary Cross Entropy')
    pyplot.plot(history.history['loss'], label='train')
    pyplot.legend()
    pyplot.show()   
    loss, acc = model.evaluate(test_x, test_y, verbose=0)
    return acc
