In [1]:
#!pip install pycountry
#!pip install torch
#!pip install transformers
#!pip install xformers

In [45]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import pycountry
from time import sleep

import torch
from transformers import pipeline
from tqdm.notebook import tqdm
from transformers import AutoTokenizer, RobertaForSequenceClassification

import warnings
warnings.filterwarnings("ignore")

Dataset is from Kaggle: https://www.kaggle.com/datasets/dgoenrique/amazon-prime-movies-and-tv-shows

In [3]:
amazon_df = pd.read_csv("titles.csv")

In [4]:
amazon_df.head()

Unnamed: 0,id,title,type,description,release_year,age_certification,runtime,genres,production_countries,seasons,imdb_id,imdb_score,imdb_votes,tmdb_popularity,tmdb_score
0,tm87233,It's a Wonderful Life,MOVIE,A holiday favourite for generations... George...,1946,PG,130,"['drama', 'family', 'fantasy', 'romance', 'com...",['US'],,tt0038650,8.6,467766.0,27.611,8.261
1,tm143047,Duck Soup,MOVIE,Rufus T. Firefly is named president/dictator o...,1933,,69,"['comedy', 'war']",['US'],,tt0023969,7.8,60933.0,9.013,7.357
2,tm83884,His Girl Friday,MOVIE,"Hildy, the journalist former wife of newspaper...",1940,,92,"['drama', 'romance', 'comedy']",['US'],,tt0032599,7.8,60244.0,14.759,7.433
3,ts20945,The Three Stooges,SHOW,The Three Stooges were an American vaudeville ...,1934,TV-PG,19,"['comedy', 'family']",['US'],26.0,tt0850645,8.5,1149.0,15.424,7.6
4,tm5012,Red River,MOVIE,Headstrong Thomas Dunson starts a thriving Tex...,1948,,133,"['western', 'drama', 'romance', 'action']",['US'],,tt0040724,7.8,32210.0,12.4,7.4


In [5]:
amazon_df.shape

(10873, 15)

In [6]:
amazon_df.isnull().sum()

id                         0
title                      0
type                       0
description              144
release_year               0
age_certification       7185
runtime                    0
genres                     0
production_countries       0
seasons                 9322
imdb_id                  701
imdb_score              1108
imdb_votes              1120
tmdb_popularity          571
tmdb_score              2126
dtype: int64

In [7]:
# get null percentage of each column
null_rate_dict = {}
for i in amazon_df.columns:
    null_rate = amazon_df[i].isna().sum() / len(amazon_df) * 100
    null_rate_dict[i] = null_rate

In [8]:
# sort the null percentage from highest to lowest
sorted_null_rate = sorted(null_rate_dict.items(), key=lambda x:x[1], reverse = True)
print(sorted_null_rate)

[('seasons', 85.73530764278487), ('age_certification', 66.08111836659616), ('tmdb_score', 19.55302124528649), ('imdb_votes', 10.300744964591189), ('imdb_score', 10.19037983997057), ('imdb_id', 6.4471626965878785), ('tmdb_popularity', 5.251540513197829), ('description', 1.3243814954474387), ('id', 0.0), ('title', 0.0), ('type', 0.0), ('release_year', 0.0), ('runtime', 0.0), ('genres', 0.0), ('production_countries', 0.0)]


In [9]:
# only print out columns with null percentage
for i in sorted_null_rate:
    if i[1] > 0:
        print(f"{i[0]} null percentage: {round(i[1], 2)}%")

seasons null percentage: 85.74%
age_certification null percentage: 66.08%
tmdb_score null percentage: 19.55%
imdb_votes null percentage: 10.3%
imdb_score null percentage: 10.19%
imdb_id null percentage: 6.45%
tmdb_popularity null percentage: 5.25%
description null percentage: 1.32%


In [10]:
amazon_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10873 entries, 0 to 10872
Data columns (total 15 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   id                    10873 non-null  object 
 1   title                 10873 non-null  object 
 2   type                  10873 non-null  object 
 3   description           10729 non-null  object 
 4   release_year          10873 non-null  int64  
 5   age_certification     3688 non-null   object 
 6   runtime               10873 non-null  int64  
 7   genres                10873 non-null  object 
 8   production_countries  10873 non-null  object 
 9   seasons               1551 non-null   float64
 10  imdb_id               10172 non-null  object 
 11  imdb_score            9765 non-null   float64
 12  imdb_votes            9753 non-null   float64
 13  tmdb_popularity       10302 non-null  float64
 14  tmdb_score            8747 non-null   float64
dtypes: float64(5), int6

In [11]:
round(amazon_df.describe(), 2)

Unnamed: 0,release_year,runtime,seasons,imdb_score,imdb_votes,tmdb_popularity,tmdb_score
count,10873.0,10873.0,1551.0,9765.0,9753.0,10302.0,8747.0
mean,2004.08,85.87,2.66,5.97,8973.23,7.61,5.98
std,24.88,34.16,3.72,1.36,48977.67,45.85,1.51
min,1912.0,0.0,1.0,1.1,5.0,0.0,0.5
25%,2002.0,65.0,1.0,5.1,119.0,1.33,5.07
50%,2015.0,89.0,1.0,6.1,488.0,2.66,6.0
75%,2019.0,102.0,3.0,7.0,2493.0,6.18,6.98
max,2023.0,940.0,53.0,9.9,2081757.0,3187.53,10.0


In [12]:
# all the null seasons are movie type
amazon_df[amazon_df['seasons'].isna()]['type'].unique()

array(['MOVIE'], dtype=object)

In [13]:
# fill the null values of seasons to zero
amazon_df['seasons'] = amazon_df['seasons'].fillna(0)

In [14]:
# get the first genre from genres as our primary genre
amazon_df['primary_genre'] = amazon_df['genres'].apply(lambda x: x.strip("[ ").strip("]").split(",")[0])
amazon_df['primary_genre'] = amazon_df['primary_genre'].str.replace("'", "")
amazon_df.head()

Unnamed: 0,id,title,type,description,release_year,age_certification,runtime,genres,production_countries,seasons,imdb_id,imdb_score,imdb_votes,tmdb_popularity,tmdb_score,primary_genre
0,tm87233,It's a Wonderful Life,MOVIE,A holiday favourite for generations... George...,1946,PG,130,"['drama', 'family', 'fantasy', 'romance', 'com...",['US'],0.0,tt0038650,8.6,467766.0,27.611,8.261,drama
1,tm143047,Duck Soup,MOVIE,Rufus T. Firefly is named president/dictator o...,1933,,69,"['comedy', 'war']",['US'],0.0,tt0023969,7.8,60933.0,9.013,7.357,comedy
2,tm83884,His Girl Friday,MOVIE,"Hildy, the journalist former wife of newspaper...",1940,,92,"['drama', 'romance', 'comedy']",['US'],0.0,tt0032599,7.8,60244.0,14.759,7.433,drama
3,ts20945,The Three Stooges,SHOW,The Three Stooges were an American vaudeville ...,1934,TV-PG,19,"['comedy', 'family']",['US'],26.0,tt0850645,8.5,1149.0,15.424,7.6,comedy
4,tm5012,Red River,MOVIE,Headstrong Thomas Dunson starts a thriving Tex...,1948,,133,"['western', 'drama', 'romance', 'action']",['US'],0.0,tt0040724,7.8,32210.0,12.4,7.4,western


In [15]:
amazon_df['production_countries'].unique()

array(["['US']", "['GB']", "['SU']", "['DE', 'GB']", "['DE', 'US']",
       "['MX']", "['CA', 'US']", "['US', 'CA']", "['IN']", '[]', "['SE']",
       "['IT', 'US']", "['GB', 'US', 'PA', 'ES']", "['MA', 'GB']",
       "['IT', 'FR']", "['HK']", "['FR', 'DE', 'IT']",
       "['US', 'FR', 'DE']", "['DE']", "['GB', 'US']", "['CA']",
       "['IT', 'US', 'ES']", "['JP']", "['IT', 'GB', 'US']",
       "['CA', 'GB']", "['IT', 'ES']", "['DE', 'IT']", "['IT']",
       "['US', 'NL']", "['US', 'GB']", "['PR', 'US']", "['US', 'MX']",
       "['FR']", "['US', 'DE']", "['CA', 'FR']", "['ES']", "['US', 'PR']",
       "['FR', 'IT']", "['ES', 'IT']", "['IE', 'US']", "['PH', 'US']",
       "['CH', 'DE']", "['RO']", "['US', 'FR']", "['YU', 'XC']",
       "['US', 'SU']", "['JP', 'US']", "['FR', 'US']",
       "['ES', 'GB', 'US']", "['DE', 'LI', 'IT', 'ES']", "['KR', 'US']",
       "['MX', 'IT']", "['GB', 'FR']", "['DE', 'IT', 'FR']",
       "['KR', 'JP']", "['YU', 'US']", "['US', 'IT']", "['IT', 'CA']",
 

In [16]:
# get the primary country code from the list as production country
amazon_df['production_country'] = amazon_df['production_countries'].apply(lambda x: x.strip("[ ").strip("]").split(",")[0])
amazon_df['production_country'] = amazon_df['production_country'].str.replace("'", "")
amazon_df.head()

Unnamed: 0,id,title,type,description,release_year,age_certification,runtime,genres,production_countries,seasons,imdb_id,imdb_score,imdb_votes,tmdb_popularity,tmdb_score,primary_genre,production_country
0,tm87233,It's a Wonderful Life,MOVIE,A holiday favourite for generations... George...,1946,PG,130,"['drama', 'family', 'fantasy', 'romance', 'com...",['US'],0.0,tt0038650,8.6,467766.0,27.611,8.261,drama,US
1,tm143047,Duck Soup,MOVIE,Rufus T. Firefly is named president/dictator o...,1933,,69,"['comedy', 'war']",['US'],0.0,tt0023969,7.8,60933.0,9.013,7.357,comedy,US
2,tm83884,His Girl Friday,MOVIE,"Hildy, the journalist former wife of newspaper...",1940,,92,"['drama', 'romance', 'comedy']",['US'],0.0,tt0032599,7.8,60244.0,14.759,7.433,drama,US
3,ts20945,The Three Stooges,SHOW,The Three Stooges were an American vaudeville ...,1934,TV-PG,19,"['comedy', 'family']",['US'],26.0,tt0850645,8.5,1149.0,15.424,7.6,comedy,US
4,tm5012,Red River,MOVIE,Headstrong Thomas Dunson starts a thriving Tex...,1948,,133,"['western', 'drama', 'romance', 'action']",['US'],0.0,tt0040724,7.8,32210.0,12.4,7.4,western,US


In [17]:
# print unique values of country code from the production country list
abbreviated_countries = amazon_df['production_country']
abbreviated_countries.unique()

array(['US', 'GB', 'SU', 'DE', 'MX', 'CA', 'IN', '', 'SE', 'IT', 'MA',
       'HK', 'FR', 'JP', 'PR', 'ES', 'IE', 'PH', 'CH', 'RO', 'YU', 'KR',
       'TW', 'MC', 'IL', 'EG', 'RU', 'AU', 'NZ', 'AT', 'ZA', 'LI', 'BR',
       'DK', 'IR', 'PT', 'BE', 'FI', 'AR', 'NL', 'ID', 'VE', 'XC', 'NO',
       'BG', 'CN', 'TH', 'CL', 'LU', 'DO', 'CO', 'AE', 'PL', 'SK', 'JM',
       'IS', 'RS', 'CZ', 'SG', 'GR', 'HU', 'LT', 'UA', 'KZ', 'NG', 'UY',
       'MT', 'AF', 'TR', 'VN', 'KE', 'BO', 'GE', 'TT', 'BY', 'MY', 'PK',
       'PE', 'XK', 'TC', 'LB', 'NP', 'MN', 'NI', 'SB', 'CR', 'PA', 'UZ',
       'NA', 'KH', 'QA', 'CM', 'EE', 'CU', 'LV', 'SY', 'BM', 'IO', 'PF',
       'HN', 'TN', 'EC', 'BD', 'MK'], dtype=object)

In [18]:
full_countries = []
# for each production country code, get the full country name and store in a list
for abbreviation in abbreviated_countries:
    try:
        country = pycountry.countries.get(alpha_2 = abbreviation).name
        full_countries.append(country)
    except:
        full_countries.append(abbreviation)

In [19]:
len(full_countries)

10873

In [20]:
# create a new column with the full country name
amazon_df['full_country_name'] = full_countries
amazon_df

Unnamed: 0,id,title,type,description,release_year,age_certification,runtime,genres,production_countries,seasons,imdb_id,imdb_score,imdb_votes,tmdb_popularity,tmdb_score,primary_genre,production_country,full_country_name
0,tm87233,It's a Wonderful Life,MOVIE,A holiday favourite for generations... George...,1946,PG,130,"['drama', 'family', 'fantasy', 'romance', 'com...",['US'],0,tt0038650,8.6,467766.0,27.611,8.261,drama,US,United States
1,tm143047,Duck Soup,MOVIE,Rufus T. Firefly is named president/dictator o...,1933,,69,"['comedy', 'war']",['US'],0,tt0023969,7.8,60933.0,9.013,7.357,comedy,US,United States
2,tm83884,His Girl Friday,MOVIE,"Hildy, the journalist former wife of newspaper...",1940,,92,"['drama', 'romance', 'comedy']",['US'],0,tt0032599,7.8,60244.0,14.759,7.433,drama,US,United States
3,ts20945,The Three Stooges,SHOW,The Three Stooges were an American vaudeville ...,1934,TV-PG,19,"['comedy', 'family']",['US'],26.0,tt0850645,8.5,1149.0,15.424,7.600,comedy,US,United States
4,tm5012,Red River,MOVIE,Headstrong Thomas Dunson starts a thriving Tex...,1948,,133,"['western', 'drama', 'romance', 'action']",['US'],0,tt0040724,7.8,32210.0,12.400,7.400,western,US,United States
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10868,tm1292884,Hunt,MOVIE,"Arjun is the ACP of Hyderabad, who gets involv...",2023,,129,"['action', 'thriller', 'crime']",['IN'],0,tt21946058,5.8,1269.0,4.403,4.300,action,IN,India
10869,tm1284753,Ennalum Ente Aliya,MOVIE,It tells the story of a teen girl from a Musli...,2023,,113,"['drama', 'comedy']",['IN'],0,tt23805304,5.6,812.0,3.406,8.000,drama,IN,India
10870,tm1303380,Anubhav Singh Bassi: Bas Kar Bassi,MOVIE,"Fresh out of national law university, Bassi ar...",2023,,83,['comedy'],[],0,tt26548127,8.3,10.0,1.960,,comedy,,
10871,tm1307408,Pinkfong! Summer in Wonderville,MOVIE,It’s a hot summer day in Wonderville! Let’s jo...,2023,,47,[],[],0,,,,1.336,,,,


# Sentiment Analysis on Description

In [75]:
example = amazon_df['description'][0]
example

"A holiday favourite for generations...  George Bailey has spent his entire life giving to the people of Bedford Falls.  All that prevents rich skinflint Mr. Potter from taking over the entire town is George's modest building and loan company.  But on Christmas Eve the business's $8,000 is lost and George's troubles begin."

Hugging Face, RoBERTa: https://huggingface.co/docs/transformers/main/en/model_doc/roberta#transformers.RobertaForSequenceClassification

roberta-base-go_emotions: https://huggingface.co/SamLowe/roberta-base-go_emotions

In [76]:
# roberta-base-go_emotions transformer model for one example
tokenizer = AutoTokenizer.from_pretrained("SamLowe/roberta-base-go_emotions")
model = RobertaForSequenceClassification.from_pretrained("SamLowe/roberta-base-go_emotions")

inputs = tokenizer(example, return_tensors="pt")

with torch.no_grad():
    logits = model(**inputs).logits

# print the first emotion label with the highest probablity
predicted_class_id = logits.argmax().item()
print(model.config.id2label[predicted_class_id])

sadness


In [52]:
# need to make sure that descriptions are in string data type 
amazon_df['description'] = amazon_df['description'].astype('str')
amazon_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10873 entries, 0 to 10872
Data columns (total 18 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   id                    10873 non-null  object 
 1   title                 10873 non-null  object 
 2   type                  10873 non-null  object 
 3   description           10873 non-null  object 
 4   release_year          10873 non-null  int64  
 5   age_certification     3688 non-null   object 
 6   runtime               10873 non-null  int64  
 7   genres                10873 non-null  object 
 8   production_countries  10873 non-null  object 
 9   seasons               10873 non-null  object 
 10  imdb_id               10172 non-null  object 
 11  imdb_score            9765 non-null   float64
 12  imdb_votes            9753 non-null   float64
 13  tmdb_popularity       10302 non-null  float64
 14  tmdb_score            8747 non-null   float64
 15  primary_genre      

In [78]:
emotion_sentiments = {}

# tqdm library is used to make progress bar for the below operation
for i, row in tqdm(amazon_df.iterrows(), total = len(amazon_df)):
    try:
        # make emotion sentiment prediction for each description
        text = row['description']
        row_id = row['id']
        inputs = tokenizer(text, return_tensors="pt")

        with torch.no_grad():
            logits = model(**inputs).logits

        predicted_class_id = logits.argmax().item()
        emotion_sentiments[row_id] = model.config.id2label[predicted_class_id]
    except:
        print(row_id, "Error Occured")

  0%|          | 0/10873 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (723 > 512). Running this sequence through the model will result in indexing errors


ts69050 Error Occured


In [80]:
print(len(emotion_sentiments))

10872


In [81]:
emotion_sentiments.keys()

dict_keys(['tm87233', 'tm143047', 'tm83884', 'ts20945', 'tm5012', 'ts37076', 'tm82253', 'tm88469', 'tm82560', 'tm160494', 'tm146745', 'tm19248', 'tm97735', 'tm116781', 'tm83723', 'tm112424', 'tm120863', 'tm100333', 'tm17025', 'tm19424', 'tm74259', 'tm155610', 'tm63937', 'tm18385', 'tm89268', 'tm2838', 'tm5096', 'tm113731', 'tm127199', 'tm154', 'tm111987', 'tm74984', 'tm64852', 'tm264908', 'tm85417', 'tm78387', 'tm16141', 'tm67012', 'tm274240', 'tm137796', 'tm1227', 'tm131025', 'tm110888', 'tm119274', 'tm118318', 'tm53452', 'tm82637', 'tm116744', 'tm164354', 'tm6408', 'tm997', 'tm112135', 'tm7905', 'tm82402', 'tm88502', 'tm116534', 'tm131498', 'tm5162', 'tm209459', 'tm165135', 'tm84154', 'tm138416', 'tm56594', 'tm75528', 'tm264738', 'tm22806', 'tm84810', 'tm113646', 'tm165975', 'tm64369', 'tm2209', 'tm53874', 'tm163136', 'tm71213', 'tm69859', 'tm106873', 'tm140761', 'tm309343', 'tm8190', 'tm5661', 'tm101000', 'tm71517', 'tm67142', 'tm79526', 'tm192453', 'tm88001', 'tm9887', 'tm166542', 

In [83]:
emotion_sentiments.values()

dict_values(['sadness', 'neutral', 'neutral', 'neutral', 'neutral', 'amusement', 'optimism', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'disappointment', 'neutral', 'disappointment', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'confusion', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'love', 'neutral', 'curiosity', 'sadness', 'joy', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'realization', 'neutral', 'surprise', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'joy', 'neutral', 'neutral', 'disappointment', 'neutral', 'neutral', 'neutral', 'sadness', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'sadness', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'realization', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neu

In [85]:
# create emotion sentiments dataframe
emotion_df = pd.DataFrame({
    'id': emotion_sentiments.keys(),
    'description_emotion': emotion_sentiments.values()
})
emotion_df.head()

Unnamed: 0,id,description_emotion
0,tm87233,sadness
1,tm143047,neutral
2,tm83884,neutral
3,ts20945,neutral
4,tm5012,neutral


In [89]:
merged_df = emotion_df.merge(amazon_df, on = 'id', how = 'left')
merged_df

Unnamed: 0,id,description_emotion,title,type,description,release_year,age_certification,runtime,genres,production_countries,seasons,imdb_id,imdb_score,imdb_votes,tmdb_popularity,tmdb_score,primary_genre,production_country,full_country_name
0,tm87233,sadness,It's a Wonderful Life,MOVIE,A holiday favourite for generations... George...,1946,PG,130,"['drama', 'family', 'fantasy', 'romance', 'com...",['US'],0,tt0038650,8.6,467766.0,27.611,8.261,drama,US,United States
1,tm143047,neutral,Duck Soup,MOVIE,Rufus T. Firefly is named president/dictator o...,1933,,69,"['comedy', 'war']",['US'],0,tt0023969,7.8,60933.0,9.013,7.357,comedy,US,United States
2,tm83884,neutral,His Girl Friday,MOVIE,"Hildy, the journalist former wife of newspaper...",1940,,92,"['drama', 'romance', 'comedy']",['US'],0,tt0032599,7.8,60244.0,14.759,7.433,drama,US,United States
3,ts20945,neutral,The Three Stooges,SHOW,The Three Stooges were an American vaudeville ...,1934,TV-PG,19,"['comedy', 'family']",['US'],26.0,tt0850645,8.5,1149.0,15.424,7.600,comedy,US,United States
4,tm5012,neutral,Red River,MOVIE,Headstrong Thomas Dunson starts a thriving Tex...,1948,,133,"['western', 'drama', 'romance', 'action']",['US'],0,tt0040724,7.8,32210.0,12.400,7.400,western,US,United States
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10867,tm1292884,neutral,Hunt,MOVIE,"Arjun is the ACP of Hyderabad, who gets involv...",2023,,129,"['action', 'thriller', 'crime']",['IN'],0,tt21946058,5.8,1269.0,4.403,4.300,action,IN,India
10868,tm1284753,neutral,Ennalum Ente Aliya,MOVIE,It tells the story of a teen girl from a Musli...,2023,,113,"['drama', 'comedy']",['IN'],0,tt23805304,5.6,812.0,3.406,8.000,drama,IN,India
10869,tm1303380,neutral,Anubhav Singh Bassi: Bas Kar Bassi,MOVIE,"Fresh out of national law university, Bassi ar...",2023,,83,['comedy'],[],0,tt26548127,8.3,10.0,1.960,,comedy,,
10870,tm1307408,excitement,Pinkfong! Summer in Wonderville,MOVIE,It’s a hot summer day in Wonderville! Let’s jo...,2023,,47,[],[],0,,,,1.336,,,,


In [90]:
merged_df.isnull().sum()

id                         0
description_emotion        0
title                      0
type                       0
description                0
release_year               0
age_certification       7184
runtime                    0
genres                     0
production_countries       0
seasons                    0
imdb_id                  701
imdb_score              1108
imdb_votes              1120
tmdb_popularity          571
tmdb_score              2125
primary_genre              0
production_country         0
full_country_name          0
dtype: int64

In [91]:
# replace No Data to null values
merged_df['age_certification'].replace(np.nan, 'No Data', inplace = True)
merged_df['production_country'].replace('', 'No Data', inplace = True)
merged_df['primary_genre'].replace('', 'No Data', inplace = True)
merged_df['full_country_name'].replace('', 'No Data', inplace = True)
merged_df

Unnamed: 0,id,description_emotion,title,type,description,release_year,age_certification,runtime,genres,production_countries,seasons,imdb_id,imdb_score,imdb_votes,tmdb_popularity,tmdb_score,primary_genre,production_country,full_country_name
0,tm87233,sadness,It's a Wonderful Life,MOVIE,A holiday favourite for generations... George...,1946,PG,130,"['drama', 'family', 'fantasy', 'romance', 'com...",['US'],0,tt0038650,8.6,467766.0,27.611,8.261,drama,US,United States
1,tm143047,neutral,Duck Soup,MOVIE,Rufus T. Firefly is named president/dictator o...,1933,No Data,69,"['comedy', 'war']",['US'],0,tt0023969,7.8,60933.0,9.013,7.357,comedy,US,United States
2,tm83884,neutral,His Girl Friday,MOVIE,"Hildy, the journalist former wife of newspaper...",1940,No Data,92,"['drama', 'romance', 'comedy']",['US'],0,tt0032599,7.8,60244.0,14.759,7.433,drama,US,United States
3,ts20945,neutral,The Three Stooges,SHOW,The Three Stooges were an American vaudeville ...,1934,TV-PG,19,"['comedy', 'family']",['US'],26.0,tt0850645,8.5,1149.0,15.424,7.600,comedy,US,United States
4,tm5012,neutral,Red River,MOVIE,Headstrong Thomas Dunson starts a thriving Tex...,1948,No Data,133,"['western', 'drama', 'romance', 'action']",['US'],0,tt0040724,7.8,32210.0,12.400,7.400,western,US,United States
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10867,tm1292884,neutral,Hunt,MOVIE,"Arjun is the ACP of Hyderabad, who gets involv...",2023,No Data,129,"['action', 'thriller', 'crime']",['IN'],0,tt21946058,5.8,1269.0,4.403,4.300,action,IN,India
10868,tm1284753,neutral,Ennalum Ente Aliya,MOVIE,It tells the story of a teen girl from a Musli...,2023,No Data,113,"['drama', 'comedy']",['IN'],0,tt23805304,5.6,812.0,3.406,8.000,drama,IN,India
10869,tm1303380,neutral,Anubhav Singh Bassi: Bas Kar Bassi,MOVIE,"Fresh out of national law university, Bassi ar...",2023,No Data,83,['comedy'],[],0,tt26548127,8.3,10.0,1.960,,comedy,No Data,No Data
10870,tm1307408,excitement,Pinkfong! Summer in Wonderville,MOVIE,It’s a hot summer day in Wonderville! Let’s jo...,2023,No Data,47,[],[],0,,,,1.336,,No Data,No Data,No Data


In [92]:
merged_df.to_excel("Amazon_Prime_Video_with_Sentiments.xlsx", index = False)