# NLP

In [1]:
# Initial imports
import os
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
from pathlib import Path
import re
nltk.download('vader_lexicon')
nltk.download('stopwords')
analyzer = SentimentIntensityAnalyzer()

%matplotlib inline

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/tamobee/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/tamobee/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
# Display all the text in columns
pd.set_option('display.max_colwidth',None)

In [3]:
# Read in the tweets csv as a pandas DataFrame
file_name='clean_elon_tweets_vF.csv'
file_path = Path(f"../Resources/{file_name}")
tweets_data = pd.read_csv(file_path,parse_dates=True, infer_datetime_format=True)

# convert NaNs into blanks 
tweets_data = tweets_data.replace(np.nan, '', regex=True)

tweets_data.tail(5)

Unnamed: 0,date,tweet
2652,2021-01-11,"@archillect Once in a while, something happens that is even better than we imagined @skorusARK The factory is the product @AustinTeslaClub @TheBabylonBee Probably commenting on @signalapp will do more @PPathole I bought the replacement glass from a junkyard for $20. They’re great places to get spare parts. @TheBabylonBee A lot of people are going to be super unhappy with West Coast high tech as the de facto arbiter of free speech @alexandersibert @signalapp Already donated to Signal a year ago. Will donate more. @benmezrich I am primarily trying to advance two causes: sustainable energy &amp; extending life/consciousness beyond Earth. There is also the existential threat of AI, which we should aspire to mitigate. This doesn’t leave time to write books. @Ragazaga5 @SpacePadreIsle Maybe a Tesla AI day later this year @SpacePadreIsle The Prancing Pony? @BoredElonMusk 🤣🤣 @song_wooseok @GerberKawasaki @benmezrich 🤣🤣 @erigganewmoney 😢 @mayemusk It’s true @glenn22x @woketopiansa Yes, but not for long @woketopiansa Yeah, they seemed optimistic too My 14-year-old son, Saxon, said he feels like 2021 will be a good year. I agree. Let us all make it so. @Erdayastronaut @SpaceX Was also thinking that SpaceX Boca should hold fun parties and give tours for the community from time to time @Erdayastronaut @SpaceX I super support public spaces around our production &amp; launch sites, wherever it can be done safely and securely. When covid passes, we will open up our restaurant to the public &amp; figure out ways to allow limited access to the High Bar. @Christo49699922 @sean18743005 @Erdayastronaut @SpaceX That too @sean18743005 @Erdayastronaut @SpaceX Catapult high into the air &amp; land on the roof with a base jumping parachute (the only way to travel) @Erdayastronaut @SpaceX That’s actually our restaurant, but SpaceX is building a futuristic bar at the top of the high bay with 360 degree windows &amp; a glass floor looking down on the rocket factory @techreview And increase fun! Hopefully. @benmezrich Me neither @TonyChoMrMagic @TeslaEnergy One of the best Tesla Solar Roof installations @91demigod No, but maybe one day @PPathole @engineers_feed Yes, that would be the best way to do it. A large solar field or reactor is needed for power, and then some ice mining droids or long maneuvering drills are needed for the H2O. These are probably the trickier bits. @engineers_feed His sick beats https://t.co/8PbsgffzRp @flcnhvy Probably less than a few dozen primary decision-makers. Technically, only three."
2653,2021-01-12,"@SpaceX @Space_Station One day, we will get a camera up there that also shows the star field in the background @MachinePix The robot future is coming @wonderofscience @physicsJ 300km every millisecond, but there are times I wish it were faster https://t.co/TKkruvgBth https://t.co/ocy2gFRYvN @iamdevloper https://t.co/BkKeiIicYe Mus protec 👶 yoda https://t.co/1dvhG0nTkU @roscosmos Он был великолепен @RationalEtienne @TheBabylonBee This is an important distinction @ValaAfshar Much more than this &amp; increasing rapidly @CodingMark @flcnhvy It is"
2654,2021-01-13,"@NASASpaceflight Sea shanty tiktok takes it to a new level with actual pirate ships @ConnectDotsToo Sure Today at SpaceX is about practicing Starship engine starts. Ship is held down by massive pins while engines are fired. Two starts completed, about to try a third. @Tesmanian_com As promised Legalize comedy @lvladimirovBG You can steal our name/logos &amp; we probably won’t sue you @arstechnica @SciGuySpace We’re just trying to get people to Mars. Help would be appreciated. @lexfridman Suicide is more than double homicide https://t.co/MvWGPs9uQE @lexfridman Absolutely Hey you … Yeah you Queen … You’re gonna make it! 💕💕 https://t.co/LrqdIrbIyd @TheOldManPar @DJSnM @MachinePix Hate to say it, but might be true"
2655,2021-01-14,"@skorusARK Prototypes are easy, volume production is hard, positive cash flow is excruciating @Tesla Physics @Erdayastronaut Detanking &amp; inspections now. Good progress towards our “Hop in &amp; go to Mars!” goal. All three static fires completed &amp; no RUDs! @justpaulinelol @Erdayastronaut @SpaceX Wow, a lot has happened in 10 years! @OfficialJlipper Fair enough haha https://t.co/ho7yGXAS3a"
2656,2021-01-15,"@SuperclusterHQ @w00ki33 Fallout New Texas @Breedlove22 @benmezrich Only Chuck Norris can divide by zero @Cerberu21014829 @Breedlove22 @benmezrich Good point @Breedlove22 @benmezrich The thing we call money is just an information system for labor allocation. What actually matters is making goods &amp; providing services. We should look at currencies from an information theory standpoint. Whichever has least error &amp; latency will win. Monty Python is amazing https://t.co/UJq94IWT88 @RationalEtienne @tobyliiiiiiiiii @Erdayastronaut They sure can twist the knife in that show! @tobyliiiiiiiiii @Erdayastronaut Probably wise @Erdayastronaut We’re making major improvements to ease of engine swap. Needs to be a few hours at most. @PPathole @johnkrausphotos @SpaceX Two of the engines need slight repairs, so will be switched out @johnkrausphotos @SpaceX Nice shot @realOmarAbdalah We don’t have high school internships, but please apply when you’re in college!"


In [4]:
# Functions for cleaning the raw tweet data
def remove_pattern(input_txt, pattern):
    r = re.findall(pattern, input_txt)
    for i in r:
        input_txt = re.sub(i, '', input_txt)        
    return input_txt

def clean_tweets(tweets):
    #remove twitter Return handles (RT @xxx:)
    tweets = np.vectorize(remove_pattern)(tweets, "RT @[\w]*:")
    
    #remove twitter handles (@xxx)
    tweets = np.vectorize(remove_pattern)(tweets, "@[\w]*")
    
    #remove URL links (httpxxx)
    tweets = np.vectorize(remove_pattern)(tweets, "https?://[A-Za-z0-9./]*")
    
    #remove special characters, numbers, punctuations (except for #)
    tweets = np.core.defchararray.replace(tweets, "[^a-zA-Z]", " ")
    
    return tweets

In [5]:
# Create a column for cleaned tweet to original DataFrame
tweets_data['cleaned_tweet'] = clean_tweets(tweets_data['tweet'])
tweets_data.tail(5)

Unnamed: 0,date,tweet,cleaned_tweet
2652,2021-01-11,"@archillect Once in a while, something happens that is even better than we imagined @skorusARK The factory is the product @AustinTeslaClub @TheBabylonBee Probably commenting on @signalapp will do more @PPathole I bought the replacement glass from a junkyard for $20. They’re great places to get spare parts. @TheBabylonBee A lot of people are going to be super unhappy with West Coast high tech as the de facto arbiter of free speech @alexandersibert @signalapp Already donated to Signal a year ago. Will donate more. @benmezrich I am primarily trying to advance two causes: sustainable energy &amp; extending life/consciousness beyond Earth. There is also the existential threat of AI, which we should aspire to mitigate. This doesn’t leave time to write books. @Ragazaga5 @SpacePadreIsle Maybe a Tesla AI day later this year @SpacePadreIsle The Prancing Pony? @BoredElonMusk 🤣🤣 @song_wooseok @GerberKawasaki @benmezrich 🤣🤣 @erigganewmoney 😢 @mayemusk It’s true @glenn22x @woketopiansa Yes, but not for long @woketopiansa Yeah, they seemed optimistic too My 14-year-old son, Saxon, said he feels like 2021 will be a good year. I agree. Let us all make it so. @Erdayastronaut @SpaceX Was also thinking that SpaceX Boca should hold fun parties and give tours for the community from time to time @Erdayastronaut @SpaceX I super support public spaces around our production &amp; launch sites, wherever it can be done safely and securely. When covid passes, we will open up our restaurant to the public &amp; figure out ways to allow limited access to the High Bar. @Christo49699922 @sean18743005 @Erdayastronaut @SpaceX That too @sean18743005 @Erdayastronaut @SpaceX Catapult high into the air &amp; land on the roof with a base jumping parachute (the only way to travel) @Erdayastronaut @SpaceX That’s actually our restaurant, but SpaceX is building a futuristic bar at the top of the high bay with 360 degree windows &amp; a glass floor looking down on the rocket factory @techreview And increase fun! Hopefully. @benmezrich Me neither @TonyChoMrMagic @TeslaEnergy One of the best Tesla Solar Roof installations @91demigod No, but maybe one day @PPathole @engineers_feed Yes, that would be the best way to do it. A large solar field or reactor is needed for power, and then some ice mining droids or long maneuvering drills are needed for the H2O. These are probably the trickier bits. @engineers_feed His sick beats https://t.co/8PbsgffzRp @flcnhvy Probably less than a few dozen primary decision-makers. Technically, only three.","Once in a while, something happens that is even better than we imagined The factory is the product Probably commenting on will do more I bought the replacement glass from a junkyard for $20. They’re great places to get spare parts. A lot of people are going to be super unhappy with West Coast high tech as the de facto arbiter of free speech Already donated to Signal a year ago. Will donate more. I am primarily trying to advance two causes: sustainable energy &amp; extending life/consciousness beyond Earth. There is also the existential threat of AI, which we should aspire to mitigate. This doesn’t leave time to write books. Maybe a Tesla AI day later this year The Prancing Pony? 🤣🤣 🤣🤣 😢 It’s true Yes, but not for long Yeah, they seemed optimistic too My 14-year-old son, Saxon, said he feels like 2021 will be a good year. I agree. Let us all make it so. Was also thinking that SpaceX Boca should hold fun parties and give tours for the community from time to time I super support public spaces around our production &amp; launch sites, wherever it can be done safely and securely. When covid passes, we will open up our restaurant to the public &amp; figure out ways to allow limited access to the High Bar. That too Catapult high into the air &amp; land on the roof with a base jumping parachute (the only way to travel) That’s actually our restaurant, but SpaceX is building a futuristic bar at the top of the high bay with 360 degree windows &amp; a glass floor looking down on the rocket factory And increase fun! Hopefully. Me neither One of the best Tesla Solar Roof installations No, but maybe one day Yes, that would be the best way to do it. A large solar field or reactor is needed for power, and then some ice mining droids or long maneuvering drills are needed for the H2O. These are probably the trickier bits. His sick beats Probably less than a few dozen primary decision-makers. Technically, only three."
2653,2021-01-12,"@SpaceX @Space_Station One day, we will get a camera up there that also shows the star field in the background @MachinePix The robot future is coming @wonderofscience @physicsJ 300km every millisecond, but there are times I wish it were faster https://t.co/TKkruvgBth https://t.co/ocy2gFRYvN @iamdevloper https://t.co/BkKeiIicYe Mus protec 👶 yoda https://t.co/1dvhG0nTkU @roscosmos Он был великолепен @RationalEtienne @TheBabylonBee This is an important distinction @ValaAfshar Much more than this &amp; increasing rapidly @CodingMark @flcnhvy It is","One day, we will get a camera up there that also shows the star field in the background The robot future is coming 300km every millisecond, but there are times I wish it were faster Mus protec 👶 yoda Он был великолепен This is an important distinction Much more than this &amp; increasing rapidly It is"
2654,2021-01-13,"@NASASpaceflight Sea shanty tiktok takes it to a new level with actual pirate ships @ConnectDotsToo Sure Today at SpaceX is about practicing Starship engine starts. Ship is held down by massive pins while engines are fired. Two starts completed, about to try a third. @Tesmanian_com As promised Legalize comedy @lvladimirovBG You can steal our name/logos &amp; we probably won’t sue you @arstechnica @SciGuySpace We’re just trying to get people to Mars. Help would be appreciated. @lexfridman Suicide is more than double homicide https://t.co/MvWGPs9uQE @lexfridman Absolutely Hey you … Yeah you Queen … You’re gonna make it! 💕💕 https://t.co/LrqdIrbIyd @TheOldManPar @DJSnM @MachinePix Hate to say it, but might be true","Sea shanty tiktok takes it to a new level with actual pirate ships Sure Today at SpaceX is about practicing Starship engine starts. Ship is held down by massive pins while engines are fired. Two starts completed, about to try a third. As promised Legalize comedy You can steal our name/logos &amp; we probably won’t sue you We’re just trying to get people to Mars. Help would be appreciated. Suicide is more than double homicide Absolutely Hey you … Yeah you Queen … You’re gonna make it! 💕💕 Hate to say it, but might be true"
2655,2021-01-14,"@skorusARK Prototypes are easy, volume production is hard, positive cash flow is excruciating @Tesla Physics @Erdayastronaut Detanking &amp; inspections now. Good progress towards our “Hop in &amp; go to Mars!” goal. All three static fires completed &amp; no RUDs! @justpaulinelol @Erdayastronaut @SpaceX Wow, a lot has happened in 10 years! @OfficialJlipper Fair enough haha https://t.co/ho7yGXAS3a","Prototypes are easy, volume production is hard, positive cash flow is excruciating Physics Detanking &amp; inspections now. Good progress towards our “Hop in &amp; go to Mars!” goal. All three static fires completed &amp; no RUDs! Wow, a lot has happened in 10 years! Fair enough haha"
2656,2021-01-15,"@SuperclusterHQ @w00ki33 Fallout New Texas @Breedlove22 @benmezrich Only Chuck Norris can divide by zero @Cerberu21014829 @Breedlove22 @benmezrich Good point @Breedlove22 @benmezrich The thing we call money is just an information system for labor allocation. What actually matters is making goods &amp; providing services. We should look at currencies from an information theory standpoint. Whichever has least error &amp; latency will win. Monty Python is amazing https://t.co/UJq94IWT88 @RationalEtienne @tobyliiiiiiiiii @Erdayastronaut They sure can twist the knife in that show! @tobyliiiiiiiiii @Erdayastronaut Probably wise @Erdayastronaut We’re making major improvements to ease of engine swap. Needs to be a few hours at most. @PPathole @johnkrausphotos @SpaceX Two of the engines need slight repairs, so will be switched out @johnkrausphotos @SpaceX Nice shot @realOmarAbdalah We don’t have high school internships, but please apply when you’re in college!","Fallout New Texas Only Chuck Norris can divide by zero Good point The thing we call money is just an information system for labor allocation. What actually matters is making goods &amp; providing services. We should look at currencies from an information theory standpoint. Whichever has least error &amp; latency will win. Monty Python is amazing They sure can twist the knife in that show! Probably wise We’re making major improvements to ease of engine swap. Needs to be a few hours at most. Two of the engines need slight repairs, so will be switched out Nice shot We don’t have high school internships, but please apply when you’re in college!"


In [6]:
# Declare variables for scores
scores = []
compound_list = []
positive_list = []
negative_list = []
neutral_list = []

# Create the sentiment scores DataFrame for Cleaned Tweets
for i in range(tweets_data['cleaned_tweet'].shape[0]):
    try:
        compound = analyzer.polarity_scores(tweets_data['cleaned_tweet'][i])["compound"]
        pos = analyzer.polarity_scores(tweets_data['cleaned_tweet'][i])["pos"]
        neu = analyzer.polarity_scores(tweets_data['cleaned_tweet'][i])["neu"]
        neg = analyzer.polarity_scores(tweets_data['cleaned_tweet'][i])["neg"]
    
        scores.append({
            "Compound": compound, 
            "Positive": pos, 
            "Negative": neg, 
            "Neutral": neu
        })
    except AttributeError:
        pass

sentiment_scores = pd.DataFrame(scores)
sentiment_scores.head(5)

Unnamed: 0,Compound,Positive,Negative,Neutral
0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0


In [7]:
# Describe the Tweet sentiment
sentiment_scores.describe()

Unnamed: 0,Compound,Positive,Negative,Neutral
count,2657.0,2657.0,2657.0,2657.0
mean,0.29801,0.105032,0.02958,0.431817
std,0.448051,0.148589,0.056043,0.395979
min,-0.9594,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.617
75%,0.7845,0.182,0.047,0.789
max,0.9997,1.0,0.756,1.0


In [8]:
# Join Tweets DataFrame and sentiment scores DataFrame
#scores_df = pd.DataFrame.from_dict(scores)
tweets_data = tweets_data.join(sentiment_scores)
tweets_data.set_index('date',inplace=True)
tweets_data.head(5)

Unnamed: 0_level_0,tweet,cleaned_tweet,Compound,Positive,Negative,Neutral
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-06-29,,,0.0,0.0,0.0,0.0
2010-06-30,,,0.0,0.0,0.0,0.0
2010-07-01,,,0.0,0.0,0.0,0.0
2010-07-02,,,0.0,0.0,0.0,0.0
2010-07-06,,,0.0,0.0,0.0,0.0


In [9]:
# Add the Sentiment column to the Tweets DataFrame
# dummy encoding 
i = 0

Sentiment = []

while(i<len(tweets_data)):
    if ((tweets_data.iloc[i]['Compound'] > 0.5)):
        Sentiment.append('positive')
        i = i+1
    elif ((tweets_data.iloc[i]['Compound'] > -0.5) & (tweets_data.iloc[i]['Compound'] <= 0.5)):
        Sentiment.append('neutral')
        i = i+1
    elif ((tweets_data.iloc[i]['Compound'] <= -0.5)):
        Sentiment.append('negative')
        i = i+1

tweets_data['Sentiment'] = Sentiment
tweets_data

Unnamed: 0_level_0,tweet,cleaned_tweet,Compound,Positive,Negative,Neutral,Sentiment
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-06-29,,,0.0000,0.000,0.000,0.000,neutral
2010-06-30,,,0.0000,0.000,0.000,0.000,neutral
2010-07-01,,,0.0000,0.000,0.000,0.000,neutral
2010-07-02,,,0.0000,0.000,0.000,0.000,neutral
2010-07-06,,,0.0000,0.000,0.000,0.000,neutral
...,...,...,...,...,...,...,...
2021-01-11,"@archillect Once in a while, something happens that is even better than we imagined @skorusARK The factory is the product @AustinTeslaClub @TheBabylonBee Probably commenting on @signalapp will do more @PPathole I bought the replacement glass from a junkyard for $20. They’re great places to get spare parts. @TheBabylonBee A lot of people are going to be super unhappy with West Coast high tech as the de facto arbiter of free speech @alexandersibert @signalapp Already donated to Signal a year ago. Will donate more. @benmezrich I am primarily trying to advance two causes: sustainable energy &amp; extending life/consciousness beyond Earth. There is also the existential threat of AI, which we should aspire to mitigate. This doesn’t leave time to write books. @Ragazaga5 @SpacePadreIsle Maybe a Tesla AI day later this year @SpacePadreIsle The Prancing Pony? @BoredElonMusk 🤣🤣 @song_wooseok @GerberKawasaki @benmezrich 🤣🤣 @erigganewmoney 😢 @mayemusk It’s true @glenn22x @woketopiansa Yes, but not for long @woketopiansa Yeah, they seemed optimistic too My 14-year-old son, Saxon, said he feels like 2021 will be a good year. I agree. Let us all make it so. @Erdayastronaut @SpaceX Was also thinking that SpaceX Boca should hold fun parties and give tours for the community from time to time @Erdayastronaut @SpaceX I super support public spaces around our production &amp; launch sites, wherever it can be done safely and securely. When covid passes, we will open up our restaurant to the public &amp; figure out ways to allow limited access to the High Bar. @Christo49699922 @sean18743005 @Erdayastronaut @SpaceX That too @sean18743005 @Erdayastronaut @SpaceX Catapult high into the air &amp; land on the roof with a base jumping parachute (the only way to travel) @Erdayastronaut @SpaceX That’s actually our restaurant, but SpaceX is building a futuristic bar at the top of the high bay with 360 degree windows &amp; a glass floor looking down on the rocket factory @techreview And increase fun! Hopefully. @benmezrich Me neither @TonyChoMrMagic @TeslaEnergy One of the best Tesla Solar Roof installations @91demigod No, but maybe one day @PPathole @engineers_feed Yes, that would be the best way to do it. A large solar field or reactor is needed for power, and then some ice mining droids or long maneuvering drills are needed for the H2O. These are probably the trickier bits. @engineers_feed His sick beats https://t.co/8PbsgffzRp @flcnhvy Probably less than a few dozen primary decision-makers. Technically, only three.","Once in a while, something happens that is even better than we imagined The factory is the product Probably commenting on will do more I bought the replacement glass from a junkyard for $20. They’re great places to get spare parts. A lot of people are going to be super unhappy with West Coast high tech as the de facto arbiter of free speech Already donated to Signal a year ago. Will donate more. I am primarily trying to advance two causes: sustainable energy &amp; extending life/consciousness beyond Earth. There is also the existential threat of AI, which we should aspire to mitigate. This doesn’t leave time to write books. Maybe a Tesla AI day later this year The Prancing Pony? 🤣🤣 🤣🤣 😢 It’s true Yes, but not for long Yeah, they seemed optimistic too My 14-year-old son, Saxon, said he feels like 2021 will be a good year. I agree. Let us all make it so. Was also thinking that SpaceX Boca should hold fun parties and give tours for the community from time to time I super support public spaces around our production &amp; launch sites, wherever it can be done safely and securely. When covid passes, we will open up our restaurant to the public &amp; figure out ways to allow limited access to the High Bar. That too Catapult high into the air &amp; land on the roof with a base jumping parachute (the only way to travel) That’s actually our restaurant, but SpaceX is building a futuristic bar at the top of the high bay with 360 degree windows &amp; a glass floor looking down on the rocket factory And increase fun! Hopefully. Me neither One of the best Tesla Solar Roof installations No, but maybe one day Yes, that would be the best way to do it. A large solar field or reactor is needed for power, and then some ice mining droids or long maneuvering drills are needed for the H2O. These are probably the trickier bits. His sick beats Probably less than a few dozen primary decision-makers. Technically, only three.",0.9966,0.206,0.048,0.746,positive
2021-01-12,"@SpaceX @Space_Station One day, we will get a camera up there that also shows the star field in the background @MachinePix The robot future is coming @wonderofscience @physicsJ 300km every millisecond, but there are times I wish it were faster https://t.co/TKkruvgBth https://t.co/ocy2gFRYvN @iamdevloper https://t.co/BkKeiIicYe Mus protec 👶 yoda https://t.co/1dvhG0nTkU @roscosmos Он был великолепен @RationalEtienne @TheBabylonBee This is an important distinction @ValaAfshar Much more than this &amp; increasing rapidly @CodingMark @flcnhvy It is","One day, we will get a camera up there that also shows the star field in the background The robot future is coming 300km every millisecond, but there are times I wish it were faster Mus protec 👶 yoda Он был великолепен This is an important distinction Much more than this &amp; increasing rapidly It is",0.6956,0.101,0.000,0.899,positive
2021-01-13,"@NASASpaceflight Sea shanty tiktok takes it to a new level with actual pirate ships @ConnectDotsToo Sure Today at SpaceX is about practicing Starship engine starts. Ship is held down by massive pins while engines are fired. Two starts completed, about to try a third. @Tesmanian_com As promised Legalize comedy @lvladimirovBG You can steal our name/logos &amp; we probably won’t sue you @arstechnica @SciGuySpace We’re just trying to get people to Mars. Help would be appreciated. @lexfridman Suicide is more than double homicide https://t.co/MvWGPs9uQE @lexfridman Absolutely Hey you … Yeah you Queen … You’re gonna make it! 💕💕 https://t.co/LrqdIrbIyd @TheOldManPar @DJSnM @MachinePix Hate to say it, but might be true","Sea shanty tiktok takes it to a new level with actual pirate ships Sure Today at SpaceX is about practicing Starship engine starts. Ship is held down by massive pins while engines are fired. Two starts completed, about to try a third. As promised Legalize comedy You can steal our name/logos &amp; we probably won’t sue you We’re just trying to get people to Mars. Help would be appreciated. Suicide is more than double homicide Absolutely Hey you … Yeah you Queen … You’re gonna make it! 💕💕 Hate to say it, but might be true",0.5226,0.141,0.090,0.769,positive
2021-01-14,"@skorusARK Prototypes are easy, volume production is hard, positive cash flow is excruciating @Tesla Physics @Erdayastronaut Detanking &amp; inspections now. Good progress towards our “Hop in &amp; go to Mars!” goal. All three static fires completed &amp; no RUDs! @justpaulinelol @Erdayastronaut @SpaceX Wow, a lot has happened in 10 years! @OfficialJlipper Fair enough haha https://t.co/ho7yGXAS3a","Prototypes are easy, volume production is hard, positive cash flow is excruciating Physics Detanking &amp; inspections now. Good progress towards our “Hop in &amp; go to Mars!” goal. All three static fires completed &amp; no RUDs! Wow, a lot has happened in 10 years! Fair enough haha",0.9357,0.336,0.120,0.545,positive


# CONFIRM WE CAN DELETE

In [10]:
# Save the Vader scores DataFrame as a csv file
# file_name="vader_scores.csv"
# output_file = Path(f"../Resources/{file_name}")
# sentiment_scores.to_csv(f"{output_file}")

# Export csv files with cleaned Tweets and sentiment scores
# file_name="cleaned_tweets_vader_scores.csv"
# output_file = Path(f"../Resources/{file_name}")
# tweets_data.to_csv(f"{output_file}")

# Tokenizer

In [11]:
# Imports
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer, PorterStemmer
from string import punctuation
import string
import re

lemmatizer = WordNetLemmatizer()
sw_addon = stopwords.words('english')
sw_addon.append("u")
sw_addon.append("it'")
sw_addon.append("char")
sw_addon.append("’")
sw_addon.append("…")
sw_addon.append("”")
sw_addon.append('“')
sw_addon.append('”')
sw_addon.append('amp')
sw_addon.append('would')
sw_addon.append("n't")
sw_addon.append('wo')

sw_addon = set(sw_addon)


In [12]:
# Tokenize tweets
def tokenizer(text):
    """Tokenizes text."""   
    # Create a list of the words
    words = word_tokenize(text)
    # Convert the words to lowercase
    words = list(filter(lambda w: w.lower(), words))   
    # Remove the punctuation
    words = list(filter(lambda t: t not in punctuation, words))   
    # Remove the stopwords
    words = list(filter(lambda t: t.lower() not in sw_addon, words))   
    # Lemmatize Words into root words
    tokens = [lemmatizer.lemmatize(word) for word in words]
    
    return tokens

In [13]:
# Add the tokens column to the Tweets DataFrame
tweets_data["tokens"] = tweets_data.cleaned_tweet.apply(tokenizer)
tweets_data.tail(5)

Unnamed: 0_level_0,tweet,cleaned_tweet,Compound,Positive,Negative,Neutral,Sentiment,tokens
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-01-11,"@archillect Once in a while, something happens that is even better than we imagined @skorusARK The factory is the product @AustinTeslaClub @TheBabylonBee Probably commenting on @signalapp will do more @PPathole I bought the replacement glass from a junkyard for $20. They’re great places to get spare parts. @TheBabylonBee A lot of people are going to be super unhappy with West Coast high tech as the de facto arbiter of free speech @alexandersibert @signalapp Already donated to Signal a year ago. Will donate more. @benmezrich I am primarily trying to advance two causes: sustainable energy &amp; extending life/consciousness beyond Earth. There is also the existential threat of AI, which we should aspire to mitigate. This doesn’t leave time to write books. @Ragazaga5 @SpacePadreIsle Maybe a Tesla AI day later this year @SpacePadreIsle The Prancing Pony? @BoredElonMusk 🤣🤣 @song_wooseok @GerberKawasaki @benmezrich 🤣🤣 @erigganewmoney 😢 @mayemusk It’s true @glenn22x @woketopiansa Yes, but not for long @woketopiansa Yeah, they seemed optimistic too My 14-year-old son, Saxon, said he feels like 2021 will be a good year. I agree. Let us all make it so. @Erdayastronaut @SpaceX Was also thinking that SpaceX Boca should hold fun parties and give tours for the community from time to time @Erdayastronaut @SpaceX I super support public spaces around our production &amp; launch sites, wherever it can be done safely and securely. When covid passes, we will open up our restaurant to the public &amp; figure out ways to allow limited access to the High Bar. @Christo49699922 @sean18743005 @Erdayastronaut @SpaceX That too @sean18743005 @Erdayastronaut @SpaceX Catapult high into the air &amp; land on the roof with a base jumping parachute (the only way to travel) @Erdayastronaut @SpaceX That’s actually our restaurant, but SpaceX is building a futuristic bar at the top of the high bay with 360 degree windows &amp; a glass floor looking down on the rocket factory @techreview And increase fun! Hopefully. @benmezrich Me neither @TonyChoMrMagic @TeslaEnergy One of the best Tesla Solar Roof installations @91demigod No, but maybe one day @PPathole @engineers_feed Yes, that would be the best way to do it. A large solar field or reactor is needed for power, and then some ice mining droids or long maneuvering drills are needed for the H2O. These are probably the trickier bits. @engineers_feed His sick beats https://t.co/8PbsgffzRp @flcnhvy Probably less than a few dozen primary decision-makers. Technically, only three.","Once in a while, something happens that is even better than we imagined The factory is the product Probably commenting on will do more I bought the replacement glass from a junkyard for $20. They’re great places to get spare parts. A lot of people are going to be super unhappy with West Coast high tech as the de facto arbiter of free speech Already donated to Signal a year ago. Will donate more. I am primarily trying to advance two causes: sustainable energy &amp; extending life/consciousness beyond Earth. There is also the existential threat of AI, which we should aspire to mitigate. This doesn’t leave time to write books. Maybe a Tesla AI day later this year The Prancing Pony? 🤣🤣 🤣🤣 😢 It’s true Yes, but not for long Yeah, they seemed optimistic too My 14-year-old son, Saxon, said he feels like 2021 will be a good year. I agree. Let us all make it so. Was also thinking that SpaceX Boca should hold fun parties and give tours for the community from time to time I super support public spaces around our production &amp; launch sites, wherever it can be done safely and securely. When covid passes, we will open up our restaurant to the public &amp; figure out ways to allow limited access to the High Bar. That too Catapult high into the air &amp; land on the roof with a base jumping parachute (the only way to travel) That’s actually our restaurant, but SpaceX is building a futuristic bar at the top of the high bay with 360 degree windows &amp; a glass floor looking down on the rocket factory And increase fun! Hopefully. Me neither One of the best Tesla Solar Roof installations No, but maybe one day Yes, that would be the best way to do it. A large solar field or reactor is needed for power, and then some ice mining droids or long maneuvering drills are needed for the H2O. These are probably the trickier bits. His sick beats Probably less than a few dozen primary decision-makers. Technically, only three.",0.9966,0.206,0.048,0.746,positive,"[something, happens, even, better, imagined, factory, product, Probably, commenting, bought, replacement, glass, junkyard, 20, great, place, get, spare, part, lot, people, going, super, unhappy, West, Coast, high, tech, de, facto, arbiter, free, speech, Already, donated, Signal, year, ago, donate, primarily, trying, advance, two, cause, sustainable, energy, extending, life/consciousness, beyond, Earth, also, existential, threat, AI, aspire, mitigate, leave, time, write, book, Maybe, Tesla, AI, day, later, year, Prancing, Pony, 🤣🤣, 🤣🤣, 😢, true, Yes, long, Yeah, seemed, optimistic, 14-year-old, son, Saxon, said, feel, like, 2021, good, year, agree, Let, u, make, also, thinking, SpaceX, Boca, hold, fun, party, give, tour, community, ...]"
2021-01-12,"@SpaceX @Space_Station One day, we will get a camera up there that also shows the star field in the background @MachinePix The robot future is coming @wonderofscience @physicsJ 300km every millisecond, but there are times I wish it were faster https://t.co/TKkruvgBth https://t.co/ocy2gFRYvN @iamdevloper https://t.co/BkKeiIicYe Mus protec 👶 yoda https://t.co/1dvhG0nTkU @roscosmos Он был великолепен @RationalEtienne @TheBabylonBee This is an important distinction @ValaAfshar Much more than this &amp; increasing rapidly @CodingMark @flcnhvy It is","One day, we will get a camera up there that also shows the star field in the background The robot future is coming 300km every millisecond, but there are times I wish it were faster Mus protec 👶 yoda Он был великолепен This is an important distinction Much more than this &amp; increasing rapidly It is",0.6956,0.101,0.0,0.899,positive,"[One, day, get, camera, also, show, star, field, background, robot, future, coming, 300km, every, millisecond, time, wish, faster, Mus, protec, 👶, yoda, Он, был, великолепен, important, distinction, Much, increasing, rapidly]"
2021-01-13,"@NASASpaceflight Sea shanty tiktok takes it to a new level with actual pirate ships @ConnectDotsToo Sure Today at SpaceX is about practicing Starship engine starts. Ship is held down by massive pins while engines are fired. Two starts completed, about to try a third. @Tesmanian_com As promised Legalize comedy @lvladimirovBG You can steal our name/logos &amp; we probably won’t sue you @arstechnica @SciGuySpace We’re just trying to get people to Mars. Help would be appreciated. @lexfridman Suicide is more than double homicide https://t.co/MvWGPs9uQE @lexfridman Absolutely Hey you … Yeah you Queen … You’re gonna make it! 💕💕 https://t.co/LrqdIrbIyd @TheOldManPar @DJSnM @MachinePix Hate to say it, but might be true","Sea shanty tiktok takes it to a new level with actual pirate ships Sure Today at SpaceX is about practicing Starship engine starts. Ship is held down by massive pins while engines are fired. Two starts completed, about to try a third. As promised Legalize comedy You can steal our name/logos &amp; we probably won’t sue you We’re just trying to get people to Mars. Help would be appreciated. Suicide is more than double homicide Absolutely Hey you … Yeah you Queen … You’re gonna make it! 💕💕 Hate to say it, but might be true",0.5226,0.141,0.09,0.769,positive,"[Sea, shanty, tiktok, take, new, level, actual, pirate, ship, Sure, Today, SpaceX, practicing, Starship, engine, start, Ship, held, massive, pin, engine, fired, Two, start, completed, try, third, promised, Legalize, comedy, steal, name/logos, probably, sue, trying, get, people, Mars, Help, appreciated, Suicide, double, homicide, Absolutely, Hey, Yeah, Queen, gon, na, make, 💕💕, Hate, say, might, true]"
2021-01-14,"@skorusARK Prototypes are easy, volume production is hard, positive cash flow is excruciating @Tesla Physics @Erdayastronaut Detanking &amp; inspections now. Good progress towards our “Hop in &amp; go to Mars!” goal. All three static fires completed &amp; no RUDs! @justpaulinelol @Erdayastronaut @SpaceX Wow, a lot has happened in 10 years! @OfficialJlipper Fair enough haha https://t.co/ho7yGXAS3a","Prototypes are easy, volume production is hard, positive cash flow is excruciating Physics Detanking &amp; inspections now. Good progress towards our “Hop in &amp; go to Mars!” goal. All three static fires completed &amp; no RUDs! Wow, a lot has happened in 10 years! Fair enough haha",0.9357,0.336,0.12,0.545,positive,"[Prototypes, easy, volume, production, hard, positive, cash, flow, excruciating, Physics, Detanking, inspection, Good, progress, towards, Hop, go, Mars, goal, three, static, fire, completed, RUDs, Wow, lot, happened, 10, year, Fair, enough, haha]"
2021-01-15,"@SuperclusterHQ @w00ki33 Fallout New Texas @Breedlove22 @benmezrich Only Chuck Norris can divide by zero @Cerberu21014829 @Breedlove22 @benmezrich Good point @Breedlove22 @benmezrich The thing we call money is just an information system for labor allocation. What actually matters is making goods &amp; providing services. We should look at currencies from an information theory standpoint. Whichever has least error &amp; latency will win. Monty Python is amazing https://t.co/UJq94IWT88 @RationalEtienne @tobyliiiiiiiiii @Erdayastronaut They sure can twist the knife in that show! @tobyliiiiiiiiii @Erdayastronaut Probably wise @Erdayastronaut We’re making major improvements to ease of engine swap. Needs to be a few hours at most. @PPathole @johnkrausphotos @SpaceX Two of the engines need slight repairs, so will be switched out @johnkrausphotos @SpaceX Nice shot @realOmarAbdalah We don’t have high school internships, but please apply when you’re in college!","Fallout New Texas Only Chuck Norris can divide by zero Good point The thing we call money is just an information system for labor allocation. What actually matters is making goods &amp; providing services. We should look at currencies from an information theory standpoint. Whichever has least error &amp; latency will win. Monty Python is amazing They sure can twist the knife in that show! Probably wise We’re making major improvements to ease of engine swap. Needs to be a few hours at most. Two of the engines need slight repairs, so will be switched out Nice shot We don’t have high school internships, but please apply when you’re in college!",0.9429,0.182,0.0,0.818,positive,"[Fallout, New, Texas, Chuck, Norris, divide, zero, Good, point, thing, call, money, information, system, labor, allocation, actually, matter, making, good, providing, service, look, currency, information, theory, standpoint, Whichever, least, error, latency, win, Monty, Python, amazing, sure, twist, knife, show, Probably, wise, making, major, improvement, ease, engine, swap, Needs, hour, Two, engine, need, slight, repair, switched, Nice, shot, high, school, internship, please, apply, college]"


In [14]:
# Get dummy variables for Sentiment column
tweets_data['Sentiment'] = pd.get_dummies(tweets_data.Sentiment)
tweets_data

Unnamed: 0_level_0,tweet,cleaned_tweet,Compound,Positive,Negative,Neutral,Sentiment,tokens
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-06-29,,,0.0000,0.000,0.000,0.000,0,[]
2010-06-30,,,0.0000,0.000,0.000,0.000,0,[]
2010-07-01,,,0.0000,0.000,0.000,0.000,0,[]
2010-07-02,,,0.0000,0.000,0.000,0.000,0,[]
2010-07-06,,,0.0000,0.000,0.000,0.000,0,[]
...,...,...,...,...,...,...,...,...
2021-01-11,"@archillect Once in a while, something happens that is even better than we imagined @skorusARK The factory is the product @AustinTeslaClub @TheBabylonBee Probably commenting on @signalapp will do more @PPathole I bought the replacement glass from a junkyard for $20. They’re great places to get spare parts. @TheBabylonBee A lot of people are going to be super unhappy with West Coast high tech as the de facto arbiter of free speech @alexandersibert @signalapp Already donated to Signal a year ago. Will donate more. @benmezrich I am primarily trying to advance two causes: sustainable energy &amp; extending life/consciousness beyond Earth. There is also the existential threat of AI, which we should aspire to mitigate. This doesn’t leave time to write books. @Ragazaga5 @SpacePadreIsle Maybe a Tesla AI day later this year @SpacePadreIsle The Prancing Pony? @BoredElonMusk 🤣🤣 @song_wooseok @GerberKawasaki @benmezrich 🤣🤣 @erigganewmoney 😢 @mayemusk It’s true @glenn22x @woketopiansa Yes, but not for long @woketopiansa Yeah, they seemed optimistic too My 14-year-old son, Saxon, said he feels like 2021 will be a good year. I agree. Let us all make it so. @Erdayastronaut @SpaceX Was also thinking that SpaceX Boca should hold fun parties and give tours for the community from time to time @Erdayastronaut @SpaceX I super support public spaces around our production &amp; launch sites, wherever it can be done safely and securely. When covid passes, we will open up our restaurant to the public &amp; figure out ways to allow limited access to the High Bar. @Christo49699922 @sean18743005 @Erdayastronaut @SpaceX That too @sean18743005 @Erdayastronaut @SpaceX Catapult high into the air &amp; land on the roof with a base jumping parachute (the only way to travel) @Erdayastronaut @SpaceX That’s actually our restaurant, but SpaceX is building a futuristic bar at the top of the high bay with 360 degree windows &amp; a glass floor looking down on the rocket factory @techreview And increase fun! Hopefully. @benmezrich Me neither @TonyChoMrMagic @TeslaEnergy One of the best Tesla Solar Roof installations @91demigod No, but maybe one day @PPathole @engineers_feed Yes, that would be the best way to do it. A large solar field or reactor is needed for power, and then some ice mining droids or long maneuvering drills are needed for the H2O. These are probably the trickier bits. @engineers_feed His sick beats https://t.co/8PbsgffzRp @flcnhvy Probably less than a few dozen primary decision-makers. Technically, only three.","Once in a while, something happens that is even better than we imagined The factory is the product Probably commenting on will do more I bought the replacement glass from a junkyard for $20. They’re great places to get spare parts. A lot of people are going to be super unhappy with West Coast high tech as the de facto arbiter of free speech Already donated to Signal a year ago. Will donate more. I am primarily trying to advance two causes: sustainable energy &amp; extending life/consciousness beyond Earth. There is also the existential threat of AI, which we should aspire to mitigate. This doesn’t leave time to write books. Maybe a Tesla AI day later this year The Prancing Pony? 🤣🤣 🤣🤣 😢 It’s true Yes, but not for long Yeah, they seemed optimistic too My 14-year-old son, Saxon, said he feels like 2021 will be a good year. I agree. Let us all make it so. Was also thinking that SpaceX Boca should hold fun parties and give tours for the community from time to time I super support public spaces around our production &amp; launch sites, wherever it can be done safely and securely. When covid passes, we will open up our restaurant to the public &amp; figure out ways to allow limited access to the High Bar. That too Catapult high into the air &amp; land on the roof with a base jumping parachute (the only way to travel) That’s actually our restaurant, but SpaceX is building a futuristic bar at the top of the high bay with 360 degree windows &amp; a glass floor looking down on the rocket factory And increase fun! Hopefully. Me neither One of the best Tesla Solar Roof installations No, but maybe one day Yes, that would be the best way to do it. A large solar field or reactor is needed for power, and then some ice mining droids or long maneuvering drills are needed for the H2O. These are probably the trickier bits. His sick beats Probably less than a few dozen primary decision-makers. Technically, only three.",0.9966,0.206,0.048,0.746,0,"[something, happens, even, better, imagined, factory, product, Probably, commenting, bought, replacement, glass, junkyard, 20, great, place, get, spare, part, lot, people, going, super, unhappy, West, Coast, high, tech, de, facto, arbiter, free, speech, Already, donated, Signal, year, ago, donate, primarily, trying, advance, two, cause, sustainable, energy, extending, life/consciousness, beyond, Earth, also, existential, threat, AI, aspire, mitigate, leave, time, write, book, Maybe, Tesla, AI, day, later, year, Prancing, Pony, 🤣🤣, 🤣🤣, 😢, true, Yes, long, Yeah, seemed, optimistic, 14-year-old, son, Saxon, said, feel, like, 2021, good, year, agree, Let, u, make, also, thinking, SpaceX, Boca, hold, fun, party, give, tour, community, ...]"
2021-01-12,"@SpaceX @Space_Station One day, we will get a camera up there that also shows the star field in the background @MachinePix The robot future is coming @wonderofscience @physicsJ 300km every millisecond, but there are times I wish it were faster https://t.co/TKkruvgBth https://t.co/ocy2gFRYvN @iamdevloper https://t.co/BkKeiIicYe Mus protec 👶 yoda https://t.co/1dvhG0nTkU @roscosmos Он был великолепен @RationalEtienne @TheBabylonBee This is an important distinction @ValaAfshar Much more than this &amp; increasing rapidly @CodingMark @flcnhvy It is","One day, we will get a camera up there that also shows the star field in the background The robot future is coming 300km every millisecond, but there are times I wish it were faster Mus protec 👶 yoda Он был великолепен This is an important distinction Much more than this &amp; increasing rapidly It is",0.6956,0.101,0.000,0.899,0,"[One, day, get, camera, also, show, star, field, background, robot, future, coming, 300km, every, millisecond, time, wish, faster, Mus, protec, 👶, yoda, Он, был, великолепен, important, distinction, Much, increasing, rapidly]"
2021-01-13,"@NASASpaceflight Sea shanty tiktok takes it to a new level with actual pirate ships @ConnectDotsToo Sure Today at SpaceX is about practicing Starship engine starts. Ship is held down by massive pins while engines are fired. Two starts completed, about to try a third. @Tesmanian_com As promised Legalize comedy @lvladimirovBG You can steal our name/logos &amp; we probably won’t sue you @arstechnica @SciGuySpace We’re just trying to get people to Mars. Help would be appreciated. @lexfridman Suicide is more than double homicide https://t.co/MvWGPs9uQE @lexfridman Absolutely Hey you … Yeah you Queen … You’re gonna make it! 💕💕 https://t.co/LrqdIrbIyd @TheOldManPar @DJSnM @MachinePix Hate to say it, but might be true","Sea shanty tiktok takes it to a new level with actual pirate ships Sure Today at SpaceX is about practicing Starship engine starts. Ship is held down by massive pins while engines are fired. Two starts completed, about to try a third. As promised Legalize comedy You can steal our name/logos &amp; we probably won’t sue you We’re just trying to get people to Mars. Help would be appreciated. Suicide is more than double homicide Absolutely Hey you … Yeah you Queen … You’re gonna make it! 💕💕 Hate to say it, but might be true",0.5226,0.141,0.090,0.769,0,"[Sea, shanty, tiktok, take, new, level, actual, pirate, ship, Sure, Today, SpaceX, practicing, Starship, engine, start, Ship, held, massive, pin, engine, fired, Two, start, completed, try, third, promised, Legalize, comedy, steal, name/logos, probably, sue, trying, get, people, Mars, Help, appreciated, Suicide, double, homicide, Absolutely, Hey, Yeah, Queen, gon, na, make, 💕💕, Hate, say, might, true]"
2021-01-14,"@skorusARK Prototypes are easy, volume production is hard, positive cash flow is excruciating @Tesla Physics @Erdayastronaut Detanking &amp; inspections now. Good progress towards our “Hop in &amp; go to Mars!” goal. All three static fires completed &amp; no RUDs! @justpaulinelol @Erdayastronaut @SpaceX Wow, a lot has happened in 10 years! @OfficialJlipper Fair enough haha https://t.co/ho7yGXAS3a","Prototypes are easy, volume production is hard, positive cash flow is excruciating Physics Detanking &amp; inspections now. Good progress towards our “Hop in &amp; go to Mars!” goal. All three static fires completed &amp; no RUDs! Wow, a lot has happened in 10 years! Fair enough haha",0.9357,0.336,0.120,0.545,0,"[Prototypes, easy, volume, production, hard, positive, cash, flow, excruciating, Physics, Detanking, inspection, Good, progress, towards, Hop, go, Mars, goal, three, static, fire, completed, RUDs, Wow, lot, happened, 10, year, Fair, enough, haha]"


In [15]:
# Save the Tweets DataFrame with the tokens as a csv
file_name="tokens_n_vader_sentiment.csv"
output_file = Path(f"../Resources/{file_name}")
tweets_data.to_csv(f"{output_file}")

## NGrams and Frequency Analysis

In [16]:
# Imports
from collections import Counter
from nltk import ngrams
import inspect

In [17]:
def seriesToList(s):
    """"Converts series into one list"""
    
    lst = []      
    # traverse in the lists   
    for ele in s:  
        lst += ele     
    # return list   
    return lst

tweets = seriesToList(tweets_data["tokens"])

In [18]:
# add to stopwords: amp, would, "n't", 'wo'
# keywords: model 3, coming soon, next week, next year, falcon 9, falcon heavy, model x, 
# next month, space station, sustainable energy, electric car, climate change, software update, dual motor, self-driving, giga berlin, rocket engine, solar power

bigrams = ngrams(tweets, n=2)
tweets_dict = dict(Counter(bigrams).most_common(10))
tweets_bigrams = tweets_dict.items()
list(tweets_bigrams)

[(('Model', '3'), 176),
 (('Falcon', '9'), 67),
 (('next', 'week'), 60),
 (('next', 'year'), 60),
 (('coming', 'soon'), 49),
 (('Falcon', 'Heavy'), 48),
 (('Model', 'X'), 47),
 (('Coming', 'soon'), 41),
 (('Good', 'point'), 41),
 (('year', 'ago'), 40)]

In [19]:
keywords = ['model 3', 'coming soon', 'next week', 'next year', 'falcon 9', 'falcon heavy', 
            'model x', 'next month', 'space station', 'sustainable energy', 'electric car', 'climate change', 
            'software update', 'dual motor', 'self-driving', 'giga berlin', 'rocket', 'engine', 'solar power', 'SpaceX', 
           ]
keywords_df = pd.DataFrame(keywords)
keywords_df

Unnamed: 0,0
0,model 3
1,coming soon
2,next week
3,next year
4,falcon 9
5,falcon heavy
6,model x
7,next month
8,space station
9,sustainable energy


In [20]:
# keywords: Tesla, car, Model, 3, engine, spacex, launch, production, Falcon, Starship, Mars, 
# maybe?: time, rocket, soon, 
keywords1 = ['launch', 'production', 'starship', 'mars', 'time', 'rocket', 'soon', 'tesla', 'spacex']

bigrams = ngrams(tweets, n=1)
tweets_dict = dict(Counter(bigrams).most_common(50))
tweets_bigrams = tweets_dict.items()
list(tweets_bigrams)

[(('Tesla',), 1057),
 (('car',), 541),
 (('Yes',), 479),
 (('Model',), 397),
 (('year',), 397),
 (('like',), 389),
 (('time',), 333),
 (('good',), 315),
 (('make',), 313),
 (('3',), 309),
 (('much',), 292),
 (('need',), 280),
 (('one',), 260),
 (('rocket',), 255),
 (('great',), 254),
 (('work',), 247),
 (('people',), 242),
 (('get',), 238),
 (("'s",), 237),
 (('next',), 231),
 (('high',), 230),
 (('soon',), 227),
 (('SpaceX',), 222),
 (('Yeah',), 221),
 (('engine',), 211),
 (('right',), 208),
 (('week',), 205),
 (('way',), 203),
 (('day',), 201),
 (('launch',), 198),
 (('better',), 196),
 (('production',), 192),
 (('go',), 190),
 (('month',), 189),
 (('team',), 187),
 (('Falcon',), 182),
 (('test',), 179),
 (('Exactly',), 176),
 (('many',), 173),
 (('Good',), 173),
 (('Thanks',), 171),
 (('point',), 168),
 (('Starship',), 168),
 (('Mars',), 166),
 (('even',), 164),
 (('think',), 163),
 (('first',), 161),
 (('actually',), 161),
 (('lot',), 158),
 (('new',), 157)]

In [21]:
def token_count(tokens, N=15):
    """Returns the top N tokens from the frequency count"""
    return Counter(tokens).most_common(N)

In [22]:
token_count(tweets)

[('Tesla', 1057),
 ('car', 541),
 ('Yes', 479),
 ('Model', 397),
 ('year', 397),
 ('like', 389),
 ('time', 333),
 ('good', 315),
 ('make', 313),
 ('3', 309),
 ('much', 292),
 ('need', 280),
 ('one', 260),
 ('rocket', 255),
 ('great', 254)]