In [1]:
import numpy as np 
import pandas as pd 

In [2]:
df = pd.read_csv('youtubedata.csv')
df 

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,title,description,view_count,like_count,favorite_count,comment_count,time_in_seconds,comment
0,0,0,Toy Story (1995),Trailer for Toy Story (1995) captured from the...,112928,125,0,0,91.0,
1,1,1,Grumpier Old Men (1995),"The more things change, the more they stay the...",212610,218,0,13,112.0,Buena película de comedia romántica
2,2,2,Heat (1995),"Director: Michael Mann.\nCast: Al Pacino, Robe...",1390267,7539,0,638,148.0,Pacino and De Niro make this movie together
3,3,3,GoldenEye (1995),"Release Date: November 13, 1995\n\nPierce Bros...",378404,2175,0,200,172.0,Cool IT men
4,4,4,"American President, The (1995)",Subscribe to MovieTrailersByVD: http://bit.ly/...,16305,25,0,2,170.0,Why does this have the Little Women (1994) sco...
...,...,...,...,...,...,...,...,...,...,...
17807,17807,3708,Memoirs of an Invisible Man (1992),"Like most Chevy Chase movies from the '90s, Me...",75124,130,0,17,116.0,"And finally, I get to see the full trailer of ..."
17808,17808,3709,God told Me To (1976),"""Shot guerilla-style all over New York City by...",5562,109,0,5,90.0,The perfect film for Easter ! 😉
17809,17809,3710,"Brood, The (1979)",Trailer of the film The Brood without any defe...,55317,87,0,18,63.0,0:39 Am I the only one totally impressed by th...
17810,17810,3711,Underworld (2003),Director: Len Wiseman\r\nCast: Kate Beckinsale...,349495,285,0,15,148.0,What’s the song in the trailer


##### Data Cleaning

In [3]:
df.describe()

Unnamed: 0.1,Unnamed: 0,view_count,like_count,favorite_count,comment_count,time_in_seconds
count,3713.0,3713.0,3713.0,3713.0,3713.0,3713.0
mean,1856.0,406127.7,1109.281444,0.0,108.236736,146.765419
std,1071.995103,2327292.0,3918.823368,0.0,300.448995,126.667461
min,0.0,241.0,0.0,0.0,0.0,61.0
25%,928.0,33013.0,97.0,0.0,9.0,103.0
50%,1856.0,101620.0,258.0,0.0,30.0,130.0
75%,2784.0,332630.0,786.0,0.0,93.0,154.0
max,3712.0,125455700.0,97644.0,0.0,7430.0,3037.0


In [4]:
df.isnull().sum()

Unnamed: 0           0
title                0
description         91
view_count           0
like_count           0
favorite_count       0
comment_count        0
time_in_seconds      0
comment            142
dtype: int64

In [5]:
df.fillna('N/A', inplace=True)

##### A list of the top-10 videos based on the total views

In [6]:
# Sorting the data on the basis of view counts
df_sorted = df.sort_values(by='view_count', ascending=False)

In [7]:
type(df_sorted)

pandas.core.frame.DataFrame

In [8]:
# Getting and printing the top 10 videos on the basis of view counts
top_10_videos = df_sorted.head(10)

print("Top 10 videos based on the total views: ")
top_10_titles_and_views = top_10_videos[['title', 'view_count']].reset_index(drop=True)
print(top_10_titles_and_views)



Top 10 videos based on the total views: 
                                              title  view_count
0                                 Unfaithful (2002)   125455689
1  Pauline at the Beach (Pauline à la Plage) (1983)    37865868
2                                   High Art (1998)    19685025
3                           Romeo and Juliet (1968)    16586461
4                                  Like Mike (2002)    13322041
5                               Finding Nemo (2003)    12766765
6                                  RoboCop 2 (1990)    12503094
7                                  Secretary (2002)    12019459
8                                        xXx (2002)     8249161
9                        Requiem for a Dream (2000)     8191873


##### A list of the bottom-10 videos based on the total views

In [9]:
df_sorted = df.sort_values(by='view_count', ascending=True)

In [10]:
# Getting and printing the bottom 10 videos on the basis of view counts
bottom_10_videos = df_sorted.head(10)

print("Bottom 10 videos based on the total views: ")
bottom_10_titles_and_views = bottom_10_videos[['title', 'view_count']].reset_index(drop=True)
print(bottom_10_titles_and_views)

Bottom 10 videos based on the total views: 
                                          title  view_count
0                         Love Walked In (1998)         241
1  Sacco and Vanzetti (Sacco e Vanzetti) (1971)         459
2                                   Eden (1997)         627
3                       Leopard Son, The (1996)         634
4                             My Kingdom (2001)         709
5                                  Trans (1998)         715
6                                  Rosie (1998)         782
7                               Low Life (1994)         860
8                Philadelphia Story, The (1940)         868
9                       Bringing Up Baby (1938)         912


##### The most liked video

In [11]:
# Find the row with the maximum number of likes
most_liked_video = df[df['like_count'] == df['like_count'].max()]

print("The most liked video is: ", most_liked_video['title'].to_string(index=False, header=False), "with", most_liked_video['like_count'].to_string(index=False, header=False), "likes.")

The most liked video is:  RoboCop 2 (1990) with 97644 likes.


##### The least liked video

In [12]:
# Find the row with the maximum number of likes
least_liked_video = df[df['like_count'] == df['like_count'].min()]

print("Least Liked videos based on the like count: ")
least_likes_titles_and_views = least_liked_video[['title', 'like_count']].reset_index(drop=True)
print(least_likes_titles_and_views)

Least Liked videos based on the like count: 
                                            title  like_count
0  Bread and Chocolate (Pane e cioccolata) (1973)           0
1                           Love Walked In (1998)           0


##### The video with the highest duration

In [13]:
# Find the row with the highest duration
highest_duration_video = df[df['time_in_seconds'] == df['time_in_seconds'].max()]

In [14]:
highest_duration_video

Unnamed: 0.1,Unnamed: 0,title,description,view_count,like_count,favorite_count,comment_count,time_in_seconds,comment
2565,2565,Siegfried & Roy: The Magic Box (1999),The Story of Siegfried and Roy.,124780,577,0,71,3037.0,R.I.P Siegfried and Roy


##### Applying sentiment analysis on the downloaded comments for each of the videos using VADER.

In [15]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [16]:
# Initialize the VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

In [17]:
# Drop rows with no comments or "N/A" comments
df = df[df['comment'].notna() & (df['comment'] != "N/A") & (df['comment'] != "")]

In [18]:
sentiments = [analyzer.polarity_scores(each)['compound'] for each in df['comment']]

In [19]:
sentiments

[0.0,
 0.0,
 0.3182,
 0.0,
 0.7579,
 0.0,
 0.4588,
 0.0,
 0.0,
 0.4201,
 -0.7184,
 0.7964,
 0.6369,
 0.875,
 -0.5859,
 0.0,
 0.6369,
 0.4824,
 0.6369,
 0.6486,
 -0.7005,
 0.9666,
 0.4404,
 0.743,
 0.0,
 -0.296,
 0.0,
 -0.0388,
 0.7506,
 0.4939,
 0.0,
 0.0,
 0.0,
 0.5994,
 0.1536,
 -0.9313,
 -0.34,
 0.7964,
 0.7073,
 0.4019,
 0.7096,
 0.8258,
 0.0,
 0.6892,
 0.9638,
 0.0,
 0.5859,
 0.173,
 0.0,
 0.0,
 0.3818,
 -0.3612,
 0.0,
 -0.5719,
 0.0,
 0.0,
 0.5994,
 0.0,
 0.2944,
 0.0,
 0.4019,
 0.6369,
 0.4588,
 0.6369,
 0.8126,
 0.8126,
 0.9049,
 0.0,
 -0.0516,
 -0.5096,
 0.0,
 0.0,
 -0.765,
 -0.4767,
 0.0,
 0.636,
 0.1531,
 0.0,
 0.6597,
 0.0,
 -0.4404,
 0.0,
 0.6166,
 0.6606,
 0.0,
 0.4404,
 0.4588,
 0.0,
 0.4588,
 0.0,
 -0.2975,
 -0.34,
 -0.7081,
 0.0,
 0.0,
 -0.2023,
 0.7506,
 0.0,
 0.0,
 0.0,
 0.5093,
 0.6833,
 -0.5106,
 0.0,
 0.6879,
 0.4215,
 0.6369,
 0.0,
 0.0,
 -0.5093,
 0.4215,
 0.0,
 0.0,
 0.8507,
 0.0,
 0.802,
 -0.965,
 0.3109,
 0.4588,
 0.0,
 0.0,
 0.0,
 0.032,
 0.34,
 0.0,
 0.5423

In [20]:
# Add a new column named 'sentiment' to the DataFrame
df['sentiment'] = sentiments
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['sentiment'] = sentiments


Unnamed: 0.1,Unnamed: 0,title,description,view_count,like_count,favorite_count,comment_count,time_in_seconds,comment,sentiment
1,1,Grumpier Old Men (1995),"The more things change, the more they stay the...",212610,218,0,13,112.0,Buena película de comedia romántica,0.0000
2,2,Heat (1995),"Director: Michael Mann.\nCast: Al Pacino, Robe...",1390267,7539,0,638,148.0,Pacino and De Niro make this movie together,0.0000
3,3,GoldenEye (1995),"Release Date: November 13, 1995\n\nPierce Bros...",378404,2175,0,200,172.0,Cool IT men,0.3182
4,4,"American President, The (1995)",Subscribe to MovieTrailersByVD: http://bit.ly/...,16305,25,0,2,170.0,Why does this have the Little Women (1994) sco...,0.0000
5,5,Nixon (1995),"One of the best film trailers of all time, thi...",290681,896,0,237,273.0,Anthony Hopkins can play any role; that&#39;s ...,0.7579
...,...,...,...,...,...,...,...,...,...,...
3707,3707,Once Upon a Time in China III (Wong Fei-hung t...,Original trailer to the 1993 martial arts epic...,74632,58,0,15,179.0,"<a href=""https://www.youtube.com/watch?v=S_Gh2...",0.0000
3708,3708,Memoirs of an Invisible Man (1992),"Like most Chevy Chase movies from the '90s, Me...",75124,130,0,17,116.0,"And finally, I get to see the full trailer of ...",-0.4404
3709,3709,God told Me To (1976),"""Shot guerilla-style all over New York City by...",5562,109,0,5,90.0,The perfect film for Easter ! 😉,0.6114
3710,3710,"Brood, The (1979)",Trailer of the film The Brood without any defe...,55317,87,0,18,63.0,0:39 Am I the only one totally impressed by th...,0.7960
