# Loading Required Libraries

In [118]:
print("# Loading all Libraries")
import re
import nltk
import tweepy
import pickle
import numpy as np
import pandas as pd
from tqdm import tqdm
import tensorflow as tf
from joblib import dump, load
from tweepy import OAuthHandler
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

nltk.download('stopwords')

# Loading all Libraries


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Artemis\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

# Loading Sentiment Analysis Model

In [101]:
print("# loading model with weights")
def load_model(model, weight) :
    with open(model, 'r') as file:
        yaml_model = file.read()

    model = tf.keras.models.model_from_yaml(yaml_model)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.load_weights(weight)

    return model

model = load_model('../sentiment-analysis-model/model-cpu.yaml', '../sentiment-analysis-model/weights-cpu.h5')
model.summary()

# loading model with weights


  config = yaml.load(yaml_string)


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 48, 128)           640000    
_________________________________________________________________
spatial_dropout1d (SpatialDr (None, 48, 128)           0         
_________________________________________________________________
lstm (LSTM)                  (None, 48, 196)           254800    
_________________________________________________________________
spatial_dropout1d_1 (Spatial (None, 48, 196)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 196)               308112    
_________________________________________________________________
dense (Dense)                (None, 2)                 394       
Total params: 1,203,306
Trainable params: 1,203,306
Non-trainable params: 0
______________________________________________

# Loading Tokenizer

In [102]:
print("# loading the tokenizer")
with open('../sentiment-analysis-model/tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

# loading the tokenizer


# Loading Classification Model

In [120]:
classifier = load('../classification-model/clf')

# Setting Up Twitter API

In [104]:
consumer_key = 'e3quFb7yTv8RJBfJtcsH172ey'
consumer_secret =  'SI8hYfTDQ6t90DVzk8saJlbp3Frz9eo0IWW9qCBK5JzgLj4ofa'
access_token = '724078891384582144-KG0kZkal2PbRFiXOQva8Uatull9qVRx'
access_token_secret = 'JWZlsm0KYB4vkjzc2CuJOkVaoym0L2Ts2lK9bBhSRMm3t'

try :
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    api = tweepy.API(auth)
    print("Authentication Successful!")
except :
    print("Authentication Failed!")

Authentication Successful!


# Class to Fetch Timeline Tweets

In [105]:
class Twitter_User():
    def __init__(self,username,count=200):
        self.id = api.get_user(username).id
        self.username = username
        self.count = count
        self.data = None

    def get_tweets(self):
        store_tweets = api.user_timeline(self.id, count=self.count)
        simple_list = []
        for status in store_tweets:
            array = [status._json["text"].strip()]
            simple_list.append(array)

        self.data = pd.DataFrame(simple_list, columns=["TEXT"])
        self.data = self.data[~self.data["TEXT"].str.startswith('RT')]
        return self.data


    def __repr__(self):
        id = api.get_user(self.id)
        return id.screen_name

# Function to process input URL

In [164]:
def process_url(url) :
    url = re.findall('http[s]?://twitter.com/(?:[a-zA-Z]|[0-9])+/status/[0-9]+', url)
    try :
        url = url[0].split("/")
        user_id = url[3]
        tweet_id = url[5]
        return (user_id, tweet_id)
    except :
        return False

# Function to Fetch Tweet/User Info

In [165]:
def tweet_info(url) :
    tweet = api.get_status(process_url(url)[1])
    
    tweet_keys = ['created_at', 'id', 'text', 'retweet_count', 'favorite_count']
    tweet_values = []
    for key in tweet_keys :
        tweet_values.append(tweet._json[key])

    user_keys = ['id', 'name', 'screen_name', 'url', 'description', 'followers_count', 'friends_count', 'profile_image_url_https']
    user_values = []
    for key in user_keys :
        user_values.append(tweet._json['user'][key])

    tweet_info = dict(zip(tweet_keys, tweet_values))
    user_info = dict(zip(user_keys, user_values))
    
    return (user_info, tweet_info)

# Functions to Preprocess Timeline Tweets

In [108]:
porter = PorterStemmer()
sw = stopwords.words('english')
sw.remove('not')

def remove_tags(text):
    TAG_RE = re.compile(r'<[^>]+>')
    return TAG_RE.sub('', text)

def remove_single_chars(text) :
    array = text.split()
    return (" ".join([w for w in array if len(w) > 1]))

def remove_stopwords(text) :
    text = " ".join([word for word in text.split() if word not in sw])
    return text

def apply_stemming(text) :
    arr1 = text.split(" ")
    arr2 = []
    for word in arr1 :
        arr2.append(porter.stem(word))
    text = " ".join(arr2)
    return text

def preprocess_text(sen) :
    sentence = remove_tags(sen)
    sentence = sentence.lower()
    sentence = re.sub('@[A-Za-z]+[A-Za-z0-9-_]+', '', sentence)
    sentence = re.sub(r"http\S+", "", sentence)
    sentence = re.sub('[^a-zA-Z]', ' ', sentence)
    sentence = remove_stopwords(sentence)
    sentence = remove_single_chars(sentence)
    sentence = apply_stemming(sentence)
    return sentence

# Function to Calculate User Rating

In [109]:
def calculate_rating(user_screen_name) :
    user = Twitter_User(user_screen_name) 
    tweets = list(user.get_tweets().TEXT.values.tolist())
    
    # preprocess all tweets
    preprocessed = []
    for sent in tweets :
        preprocessed.append(preprocess_text(sent))
        
    # tokenize and pad all tweets
    X = tokenizer.texts_to_sequences(preprocessed)
    X = pad_sequences(X, 48)
    
    # predict sentiment
    pred = model.predict(X)
    prediction = []
    for value in pred :
        prediction.append(value[1])
        
    # calculate rating :
    score = 0
    for value in prediction :
        if value < 0.3 :
            score = score - 1
        elif value > 0.7 :
            score = score + 1
    
    return 0 if score < 0 else 1

# Fuction to Predcit Sentiment of Original Tweet

In [110]:
def sentiment(tweet) :
    X = tokenizer.texts_to_sequences([tweet])
    X = pad_sequences(X, 48)
    prediction = model.predict(X)[0][1]
    return prediction

# Calculate Sentence Score

In [111]:
pos = pd.read_csv("../../db/10-positive-word-score.csv")
neg = pd.read_csv("../../db/10-negative-word-score.csv")
lst = pos.values.tolist()
for row in neg.values.tolist() :
    lst.append(row)
    
dictionary = dict(lst)

In [112]:
def calculate_sent_score(text) :
    arr = text.split(" ")
    score = 0
    for word in arr :
        if word in dictionary :
            score = score + dictionary[word]
            
    return 0 if score < 0 else 1

# Fetching User & Tweet Details

# Main Function

In [185]:
def main(url) :
    user, tweet = tweet_info(url)
    userid = user['screen_name']
    text = tweet['text']
    newtext = preprocess_text(tweet['text'])
    
    prediction = sentiment(pp_tweet)
    rating = calculate_rating(userid)
    score = calculate_sent_score(pp_tweet)
    
    df = pd.DataFrame([[prediction, rating, score]])
    prediction = classifier.predict(ip)[0]
    return prediction

In [187]:
main("https://twitter.com/WorstInvestor2/status/1395045485195505667?s=20")

1