In [1]:
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
pd.set_option('display.max.column', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.precision', 3)

In [3]:
tweets = pd.read_csv(r"movie_tweets20.csv")
tweets.rename(columns={"Movie_hashtag": "tag", "Tweet": "tweet"}, inplace=True)
tweets.head()

Unnamed: 0,tag,tweet
0,#toystory,*Fun facts:\n#TheLittleMermaid is the first movie I remember watching at the theater \n#TheLionKing was my first trip to the movies without adult supervision \n#ToyStory was the first movie my sister and I watched at the theater together\n2/2
1,#toystory,"🎬 The first feature-length computer-animated movie was ""Toy Story"" (1995), introducing a new era in animated cinema.\n\n#ToyStory #ComputerAnimatedMovie #AnimatedCinema #FeatureLength #Cinema #365Tidbits #TidbitOfTheDay #FunFacts #Trivia #ForeverCurious #AI"
2,#toystory,"Toy Story is a near-perfect film. The only weakness is the dated animation, which feels nostalgic rather than outdated, and it fits the toy aesthetic. It’s funny, clever, appropriately low stakes, well paced, and heartwarming enough to please all ages - 5/5 Stars #toystory #pixar"
3,#toystory,Happy 5th anniversary to #ToyStory4. Top Easter egg in the antique shop from a Pixar film. #pixar #InsideOut2 #Celebrities #cinema #DisneyPlus #DisneyWorld #toystory #Disneyland #disneyparks #moviereviews @Ryan_Treasures @missnotyou @brutalpuncher1 @JJRavenation52
4,#toystory,#buzzlightyearofstarcommand #TimAllen #disney #pixar #animation #toystory #2000s #movie #moviereview #movies #film #filmreview #films #cinemaloco #wayneknight #scifi #sciencefiction #scifimovies #FilmTwitter #FilmX \n\n@Disney\n@DisneyStudios\n@Pixar\n@toystory\n\n- Mike Sandwich


In [4]:
tweets.shape

(37151, 2)

In [5]:
print(f"Number of unique tags: {tweets['tag'].nunique()}")
print(f"Number of unique tweets: {tweets['tweet'].nunique()}")

Number of unique tags: 1409
Number of unique tweets: 35589


In [6]:
tweets = tweets.drop_duplicates(subset="tweet", keep="first")
print(f"Number of rows after removing duplicates: {tweets.shape[0]}")

Number of rows after removing duplicates: 35589


In [7]:
print(f"Number of tweets per tag: {tweets['tag'].value_counts()}")
print(f"\nAverage number of tweets per tag: {tweets.groupby('tag').size().mean():.2f}")

Number of tweets per tag: tag
#fieldofdreams                 39
#themanwhoknewtoolittle        39
#rebelwithoutacause            39
#nothingpersonal               39
#breakingthewaves              39
                               ..
#secretslies                    1
#tetsuoiibodyhammer             1
#lottoland                      1
#denisecallsup                  1
#nakedgun3313thefinalinsult     1
Name: count, Length: 1407, dtype: int64

Average number of tweets per tag: 25.29


In [8]:
tweets[tweets['tag'].str.endswith('the')]

Unnamed: 0,tag,tweet
2370,#rockthe,Video editing is a form of magic coordinated by the cameraman and the editor. \nRight but the magic who did is the editor who convert anything in a raw form to a film . And its called an amazing Magic .\n\n#videoeditor #rockthe #VideoViral
2371,#rockthe,@akshaykumar akki sir best wishesh for your movie #rockthe bollywood
7616,#clientthe,@MomCentral #clientThe person who always be there is my friend no matter when where so blessed to have her in my life so this movie is us
9419,#blobthe,"So pleased that The Blob (aka #BlobThe) ended with ""The End"" followed by a ""?"" More films need to conclude in this manner!"
12580,#fanthe,@SRKFC1 @iamsrk @FanTheFilm @ferdbouk Fan will be amazing so excited to see #Fanthe Film
25055,#vanthe,Jayam Ravi in boomi movie\nAs excellent #vanthe mathiram
26864,#machinethe,@themustafaB Excellntttttttttttt movie mustafa love the climax and @Advani_Kiara i have no word to describe you..love u both #machinethe
27616,#mightythe,@_kiriiku To buy food stuff\n#MightyThe Movie
27617,#mightythe,@_kiriiku Me boss it will go a loooong way helping me get my cooking utensils for sales #MightyThe Movie
28695,#fearthe,"Any big PF and Centers looking for a team, DM film to this account! Looking forward to seeing what you all can do! #GoNoles #FearThe Spear"


In [9]:
tweets = tweets[~tweets['tag'].str.endswith('the')]
tweets[tweets["tag"].str.endswith("the")].empty

True

In [10]:
def process_tweets(tw):
    tw = re.sub(r"\s*#\w+\b", "", tw)
    tw = re.sub(r"\s*@\w+\b", "", tw)
    tw = re.sub(r"\n", "", tw)
    tw = re.sub(r"[^\w\s]", "", tw)
    tw = re.sub(r"[^a-zA-Z\s]", "", tw)
    tw = tw.lower()
    return tw

tweets['tweet'] = tweets['tweet'].apply(process_tweets)

tweets["tag"] = tweets["tag"].apply(lambda tag: tag[1:])
tweets.head()

Unnamed: 0,tag,tweet
0,toystory,fun facts is the first movie i remember watching at the theater was my first trip to the movies without adult supervision was the first movie my sister and i watched at the theater together
1,toystory,the first featurelength computeranimated movie was toy story introducing a new era in animated cinema
2,toystory,toy story is a nearperfect film the only weakness is the dated animation which feels nostalgic rather than outdated and it fits the toy aesthetic its funny clever appropriately low stakes well paced and heartwarming enough to please all ages stars
3,toystory,happy th anniversary to top easter egg in the antique shop from a pixar film
4,toystory,mike sandwich


In [13]:
import spacy
from spacytextblob.spacytextblob import SpacyTextBlob

nlp = spacy.load("en_core_web_sm")
nlp.add_pipe("spacytextblob")

<spacytextblob.spacytextblob.SpacyTextBlob at 0x20bb5f1ce30>

In [14]:
# Function to perform tokenization, lemmatization, and sentiment analysis using SpaCy and SpacyTextBlob

def analyze_sentiment(tweet):

    # Tokenize and lemmatize the text
    doc = nlp(tweet)
    lemmatized_text = " ".join([token.lemma_ for token in doc])

    # Analyze sentiment
    lemmatized_doc = nlp(lemmatized_text)
    return lemmatized_doc._.polarity


tweets['score']= tweets['tweet'].apply(analyze_sentiment)
tweets.head()

Unnamed: 0,tag,tweet,score
0,toystory,fun facts is the first movie i remember watching at the theater was my first trip to the movies without adult supervision was the first movie my sister and i watched at the theater together,0.23
1,toystory,the first featurelength computeranimated movie was toy story introducing a new era in animated cinema,0.193
2,toystory,toy story is a nearperfect film the only weakness is the dated animation which feels nostalgic rather than outdated and it fits the toy aesthetic its funny clever appropriately low stakes well paced and heartwarming enough to please all ages stars,-0.01
3,toystory,happy th anniversary to top easter egg in the antique shop from a pixar film,0.65
4,toystory,mike sandwich,0.0


In [None]:
# tweets.to_csv(r'tweets_polarity.csv', index=False)

In [15]:
tweets[tweets['score'] == 0]['tag'].value_counts()

tag
north                            29
inthelineoffire                  25
asgoodasitgets                   22
junior                           20
indianajonesandthelastcrusade    19
                                 ..
nemesis2nebula                    1
truthorconsequencesnm             1
intimaterelations                 1
butterflykiss                     1
thewarathome                      1
Name: count, Length: 1324, dtype: int64

In [16]:
# Group by 'Movie_tag' and calculate the mean
average_sentiment = tweets.groupby('tag').agg(
    average_score=('score', 'mean'),
).reset_index()

# Display the DataFrame with average sentiment scores
average_sentiment

Unnamed: 0,tag,average_score
0,101dalmatians,0.201
1,12angrymen,0.199
2,187,0.154
3,1900,0.050
4,20000leaguesunderthesea,0.234
...,...,...
1393,youngfrankenstein,0.322
1394,youngguns,0.222
1395,younggunsii,0.246
1396,yousocrazy,0.152


In [27]:
# average_sentiment.to_csv(r'average_sentiment.csv', index=False)