Importing libraries

In [None]:
import numpy as np
import re

import pickle
!pip install tweepy==3.10.0



In [None]:
loaded_model = pickle.load(open('/content/trained_model.sav', 'rb'))
loaded_tfidf = pickle.load(open('/content/tfidf_vectorizer.sav','rb'))

In [None]:
def preprocess_data(data):
    # Removal of URL
    text = re.sub(r'https?://\S+|www\.\S+|http?://\S+', ' ', data)

    # Decontraction (expanding contractions - expanding shortened forms of words)
    text = re.sub(r"won\'t", " will not", text)
    text = re.sub(r"won\'t've", " will not have", text)
    text = re.sub(r"can\'t", " can not", text)
    text = re.sub(r"don\'t", " do not", text)
    text = re.sub(r"can\'t've", " can not have", text)
    text = re.sub(r"ma\'am", " madam", text)
    text = re.sub(r"let\'s", " let us", text)
    text = re.sub(r"ain\'t", " am not", text)
    text = re.sub(r"shan\'t", " shall not", text)
    text = re.sub(r"sha\n't", " shall not", text)
    text = re.sub(r"o\'clock", " of the clock", text)
    text = re.sub(r"y\'all", " you all", text)
    text = re.sub(r"n\'t", " not", text)
    text = re.sub(r"n\'t've", " not have", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"\'s", " is", text)
    text = re.sub(r"\'d", " would", text)
    text = re.sub(r"\'d've", " would have", text)
    text = re.sub(r"\'ll", " will", text)
    text = re.sub(r"\'ll've", " will have", text)
    text = re.sub(r"\'t", " not", text)
    text = re.sub(r"\'ve", " have", text)
    text = re.sub(r"\'m", " am", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(u'http\S+', u'', text)
    text = re.sub(u'(\s)@\w+', u'', text)
    text = re.sub(u'#', u'', text)
    text = re.sub(u'RT', u'', text)

    # Removal of HTML tags
    text = re.sub(r'<.*?>', ' ', text)

    # Removal of emojis
    emoji_pattern = re.compile("["
                               u"\U0001F600-\U0001F64F"  # emoticons
                               u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                               u"\U0001F680-\U0001F6FF"  # transport & map symbols
                               u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                               u"\U00002702-\U000027B0"  # dingbats
                               u"\U000024C2-\U0001F251"  # enclosed characters
                               "]+")
    text = emoji_pattern.sub(' ', text)

    # Filtering out miscellaneous text
    text = re.sub('[^a-zA-Z0-9]', ' ', text)
    text = re.sub(r"\([^()]*\)", "", text)

    # Remove mentions
    text = re.sub('@\S+', '', text)

    # Remove punctuations
    text = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), '', text)

    # Lowercase all the words in text
    text = text.lower()
    text = text.split()

    # Removal of words with length < 2
    text = [i for i in text if len(i) > 2 or i.isnumeric()]
    text = ' '.join(text)
    return text

In [None]:
def replace_abbreviations(text):
    abbreviations = {
        "lol": "laughing out loud",
        "omg": "oh my god",
        "btw": "by the way",
        "idk": "I don't know",
        "omw": "on my way",
        "brb": "be right back",
        "imo": "in my opinion",
        "tbh": "to be honest",
        "fyi": "for your information",
        "wtf": "what the heck",
        "gtg": "got to go",
        "afk": "away from keyboard",
        "irl": "in real life",
        "bff": "best friends forever",
        "gr8": "great",
        "np": "no problem",
        "thx": "thanks",
        "yw": "you're welcome",
        # Add more abbreviations and their full forms as needed
    }
    words = text.split()
    replaced_words = [abbreviations[word] if word in abbreviations else word for word in words]
    replaced_text = " ".join(replaced_words)
    return replaced_text

In [None]:
# Removing stopwords. (a.is,an,the,in)
import nltk
nltk.download('punkt')
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
stop_words = set(stopwords.words('english'))
def remove_stopwords(text):
    # Tokenize the text into words
    tokens = word_tokenize(text)
    tokens = [token.lower() for token in tokens]    # lowercasing
    # Remove stopwords from the tokenized words
    filtered_tokens = [word for word in tokens if word.lower() not in stop_words]

    # Join the filtered tokens back into a sentence
    filtered_text = ' '.join(filtered_tokens)

    return filtered_text

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
import nltk
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer

# Initialize the lemmatizer                ##used to reduce words to their base or dictionary form
lemmatizer = WordNetLemmatizer()

def lemmatize(text):
  lemmatize_text = [lemmatizer.lemmatize(text) for word in text]
  return text

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [None]:
def tweets_predictions(df):
  df['clean_tweet'] = df['tweet_text'].apply(lambda txt: preprocess_data(txt))
  df["clean_tweet"] = df["clean_tweet"].apply(replace_abbreviations)
  df["clean_tweet"] = df["clean_tweet"].apply(remove_stopwords)
  df["clean_tweet"] = df["clean_tweet"].apply(lemmatize)

  # Perform feature extraction on the new dataset
  X_new_tfidf = loaded_tfidf.transform(df["clean_tweet"] )
  # Make predictions on the new dataset along with category
  test_pred = loaded_model.predict(X_new_tfidf)

  for tweets, label in zip(df["clean_tweet"], test_pred):
    print("Tweets:", tweets)
    print("Predicted Label:", label)
    print()

In [None]:
#importing libraries
import pandas as pd
import tweepy
import time
import warnings
warnings.filterwarnings('ignore')

#for privacy purposes, these keys are encrypted
consumer_key="oixvWUAPsL3Iu7lhbQb7ILEIA"
consumer_secret="dpJSAsraNLbOXae61HCgBVW0B51lm84VriDD8AZDjL4DRe3WZt"
access_token="1445326523045605377-ydF2FUhXQ2KfxLuBKdj0Ct41PtpTwM"
access_token_secret="RsAXphRoam7ir44kmkgbZavPl6nuxrHnI1Hh2FvFw4ea2"

#Accessing twitter API
auth=tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token=(access_token, access_token_secret)
api= tweepy.API(auth)

pd.set_option('display.max_colwidth', -1) #set the maximum width of a column when displaying DataFrame or Series objects to an unlimited length. The -1 value passed as the second argument indicates that there is no maximum limit for the column width.

"Extracting tweets from a particular string"

# Get user input using Streamlit input widget
text_query = input("Enter Keyword: ")
count = 1000

try:

    #creation of query method using parameters
    tweets= tweepy.Cursor(api.search, q=text_query, tweet_mode="extended",lang='en').items(count)

    #getting the information from twitter object
    tweet_list= [[tweet.created_at,tweet.user.location,tweet.full_text,tweet.retweet_count]
                 for tweet in tweets if tweet.retweet_count > 100] # valid tweets whether they are retweeted
    #creating a data frame from the list
    df= pd.DataFrame(tweet_list, columns=['Date','location','tweet_text','retweets_count'])



except BaseException as e:
    print('failed_on_status:', str(e))
    time.sleep(60)


Enter Keyword: earthquake


In [None]:
df.head()

Unnamed: 0,Date,location,tweet_text,retweets_count
0,2023-07-05 11:03:34,"Andhra Pradesh, India",RT @PrabhasRaju: Get ready for the thunder! ⚡️ SALAAR teaser dropping in 2 days. Expect nothing less than a cinematic earthquake! 🎬 This is…,1637
1,2023-07-05 10:58:11,Athens,"RT @Naila_Ayad: This is not Turkey after Earthquake, this is PALESTINE (West Bank) after the Isr*eli raid into Jenin. https://t.co/Z51SaNmS…",576
2,2023-07-05 10:57:52,,RT @PrabhasRaju: Get ready for the thunder! ⚡️ SALAAR teaser dropping in 2 days. Expect nothing less than a cinematic earthquake! 🎬 This is…,1637
3,2023-07-05 10:56:37,"Minneapolis, MN",RT @Tony11Sim: WHISTLEBLOWER\n\n Exposes that there is an EARTHQUAKE a MACHINE in ANTARCTICA. \n\nWhat are your thoughts on this? https://t.co…,132
4,2023-07-05 10:56:06,,RT @PrabhasRaju: Get ready for the thunder! ⚡️ SALAAR teaser dropping in 2 days. Expect nothing less than a cinematic earthquake! 🎬 This is…,1637


In [None]:
tweets_predictions(df)

Tweets: get ready thunder salaar teaser dropping 2 days expect nothing less cinematic earthquake
Predicted Label: Positive

Tweets: turkey earthquake palestine west bank isr eli raid jenin
Predicted Label: Neutral

Tweets: get ready thunder salaar teaser dropping 2 days expect nothing less cinematic earthquake
Predicted Label: Positive

Tweets: whistleblower exposes eahquake machine antarctica thoughts
Predicted Label: Neutral

Tweets: get ready thunder salaar teaser dropping 2 days expect nothing less cinematic earthquake
Predicted Label: Positive

Tweets: travis scott raged hard concert italy neighbors western area city thought earthquak
Predicted Label: Negative

Tweets: 8 0 jaemin food mineral water earthquake
Predicted Label: Neutral

Tweets: travis scott raged hard concert italy neighbors western area city thought earthquak
Predicted Label: Negative

Tweets: get ready thunder salaar teaser dropping 2 days expect nothing less cinematic earthquake
Predicted Label: Positive

Tweets: