In [1]:
# import libraries
import pandas as pd
from imdb import IMDb
import warnings
warnings.filterwarnings('ignore')

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
# read the final aggregated movie data with additional information 
movie_data = pd.read_csv("../data/with_additional_data/military_hollywood_with_additional_data.csv")

In [4]:
# take only the subset of data where an IMDB ID exists
movie_data = movie_data.loc[(movie_data.IMDB_ID.notna()) & (movie_data.IMDB_ID!="Never Made")].copy().reset_index(drop=True)

In [5]:
movie_data.shape

(783, 15)

In [6]:
reviews_all = []
ia = IMDb()
for index, row in movie_data.iterrows():
    movie_id = str(row.IMDB_ID)[2:]
    m_id = str(row.IMDB_ID)
    try:
        movie = ia.get_movie(movie_id, ['reviews'])
        movie['reviews'] = [dict(item, **{'idmb_id': m_id}) for item in movie['reviews']]
    except:
        continue
    reviews_all.append(movie['reviews'])

In [5]:
len(reviews_all)

681

In [6]:
list_df_reviews = [pd.DataFrame(reviews_all[i]) for i in range(len(reviews_all))]

In [7]:
df_reviews = pd.concat(list_df_reviews)
df_reviews = df_reviews.reset_index(drop=True)

In [8]:
df_reviews.shape

(11823, 8)

In [12]:
df_reviews.head()

Unnamed: 0,content,helpful,title,author,date,rating,not_helpful,idmb_id
0,I accidently came across this movie while surf...,0,,ur0516638,24 June 2000,,0,tt0133231
1,"This is a tearjerker, and knowing that this is...",0,,ur14569527,14 April 2007,1.0,0,tt0133231
2,This is one of those movies that will probably...,0,,ur2105110,20 December 2002,1.0,0,tt0133231
3,This is one of my favorite movies in the past ...,0,,ur1294947,4 August 2001,1.0,0,tt0133231
4,"The movie is pretty good, and relatively accur...",0,,ur97163633,1 January 2019,,0,tt0133231


In [11]:
# save the scraped reviews to a csv file 
df_reviews.to_csv('df_reviews.csv')

In [2]:
# read reviews data
df_reviews = pd.read_csv("df_reviews.csv", usecols=['content', 'date', 'idmb_id'])
df_reviews.head()

Unnamed: 0,content,date,idmb_id
0,I accidently came across this movie while surf...,24 June 2000,tt0133231
1,"This is a tearjerker, and knowing that this is...",14 April 2007,tt0133231
2,This is one of those movies that will probably...,20 December 2002,tt0133231
3,This is one of my favorite movies in the past ...,4 August 2001,tt0133231
4,"The movie is pretty good, and relatively accur...",1 January 2019,tt0133231


In [3]:
# read movie_data and join it with review data
# read the final aggregated movie data with additional information 
movie_data = pd.read_csv("../data/with_additional_data/military_hollywood_with_additional_data.csv")
movie_data['Media Type'].replace({'OTH': 'TV'}, inplace=True)
movie_data.drop(['directors'], axis=1, inplace=True)
movie_data = movie_data[['Title', 'IMDB_ID', 'Status', 'Media Type', 'Remarks', 'Year', 'genre', 'plot', 'release_date']]

In [4]:
movie_data.head()

Unnamed: 0,Title,IMDB_ID,Status,Media Type,Remarks,Year,genre,plot,release_date
0,"""1968""",Never Made,OTH,FILM,THE FILM STARTED OUT VERY NEGATIVE FOR THE ARM...,,,,
1,"1,000 MEN AND A BABY",tt0133231,APP,TV,VERY POSITIVE DEPICTION OF NAVY IN THIS KOREAN...,1997.0,Drama,A baby in a foreign land is adopted by the men...,07 Dec 1997
2,1ST FORCE,Never Made,OTH,FILM,INITIALLY DOD AND USMC WERE INCLINED TO SUPPOR...,,,,
3,24,tt0502209,APP,TV,APPROVED FILMING FOR ONE DAY WITH TWO MARINE C...,2004.0,"Action, Crime, Drama, Thriller",Jack and Tony clash as they wait for the time ...,11 May 2004
4,3RD DEGREE,tt0098469,APP,TV,PERSONNEL APPEARED ON THIS GAME SHOW AT THE EX...,1989.0,"Crime, Drama, Thriller",Scott Weston is a private investigator who is ...,28 May 1989


In [5]:
merged = df_reviews.merge(movie_data, left_on='idmb_id', right_on='IMDB_ID', how='outer')
no_reviews = merged[(merged.content.isna()) & ((merged.IMDB_ID!='Never Made') & (merged.IMDB_ID.notna()))].reset_index(drop=True)
no_reviews

Unnamed: 0,content,date,idmb_id,Title,IMDB_ID,Status,Media Type,Remarks,Year,genre,plot,release_date
0,,,,ABOVE THE CLOUDS,tt0024811,APP,FILM,"NAVY PROVIDED USE OF BATTLESHIPS, SUBMARINES, ...",1933.0,"Action, Adventure, Drama, Romance",Another story of the newsreel cameramen and th...,13 Dec 1933
1,,,,AIR WOLF,tt0507183,DEN,TV,AIR FORCE ADVISED PRODUCTION THAT ASSISTANCE W...,1987.0,"Action, Adventure, Sci-Fi, Thriller",While returning from the Soviet Union with cla...,30 Jan 1987
2,,,,ALL-STAR SALUTE TO OUR TROOPS,tt1193546,APP,TV,THE DEPARTMENT OF DEFENSE APPROVED THE PROJECT...,1991.0,"Comedy, Music",Gala homecoming celebration for the troops ret...,03 Apr 1991
3,,,,"AMERICA, YOU'RE TOO YOUNG TO DIE",tt12220558,APP,TV,LIMITED ASSISTANCE WAS APPROVED. IT WAS NO COS...,1986.0,"Short, Documentary",An historical and patriotic film focusing on t...,
4,,,,ANGEL‘S FLIGHT,tt13891322,APP,FILM,APPROVED BY THE DEPARTMENT AFTER BEING REVIEWE...,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
97,,,,TOUR OF DUTY,tt0732287,LIM,TV,AIR FORCE PROVIDED USE OF F-4 TOCK FOOTAGE. EP...,1988.0,"Action, Drama, War",,
98,,,,TRIBECA STORIES,tt0250783,APP,TV,APPROVED NAVY SHIP AND MARINE HONOR GUARD FOR ...,1992.0,,,
99,,,,UNAUTHORIZED BIO OF JANE FONDA,tt14114510,LIM,TV,THE DEPARTMENT AGREED AUTHORIZED THE PRODUCTIO...,1988.0,Documentary,,01 Nov 1988
100,,,,"UNKNOWN SOLDIER, THE",tt9020314,APP,TV,DOCUMENTARY WAS FACTUAL AND WELL PUT TOGETHER....,1985.0,Documentary,,11 Nov 1985


In [11]:
# from urllib.request import urlopen
# from bs4 import BeautifulSoup
# 
# find_missing_reviews_if_exist = []
# for _, row in no_reviews.iterrows():
#     print(row['IMDB_ID'])
#     url = urlopen('http://www.imdb.com/title/'+row['IMDB_ID']+'/reviews?ref_=tt_ov_rt').read()
#     soup = BeautifulSoup(url,"html.parser")
#     for item in soup.find_all(class_="review-container"):
#         try:
#             review_title = item.find(class_="title").text
#             print(review_title)
#             review = item.find(class_="text").text
#         except:
#             continue
#         find_missing_reviews_if_exist.append(pd.concat(row['IMDB_ID'], review_title, review))

In [12]:
# url = urlopen('https://www.imdb.com/title/tt0133231/reviews?ref_=tt_ql_3').read()
# soup = BeautifulSoup(url,"html.parser")
# for item in soup.find_all(class_="review-container"):
#     try:
#         review_title = item.find(class_="title").text
#         review = item.find(class_="text").text
#     except:
#         continue
#     print(row['IMDB_ID'], review_title, review)

In [7]:
yes_reviews = merged[merged.content.notna()].reset_index(drop=True)
yes_reviews

Unnamed: 0,content,date,idmb_id,Title,IMDB_ID,Status,Media Type,Remarks,Year,genre,plot,release_date
0,I accidently came across this movie while surf...,24 June 2000,tt0133231,"1,000 MEN AND A BABY",tt0133231,APP,TV,VERY POSITIVE DEPICTION OF NAVY IN THIS KOREAN...,1997.0,Drama,A baby in a foreign land is adopted by the men...,07 Dec 1997
1,"This is a tearjerker, and knowing that this is...",14 April 2007,tt0133231,"1,000 MEN AND A BABY",tt0133231,APP,TV,VERY POSITIVE DEPICTION OF NAVY IN THIS KOREAN...,1997.0,Drama,A baby in a foreign land is adopted by the men...,07 Dec 1997
2,This is one of those movies that will probably...,20 December 2002,tt0133231,"1,000 MEN AND A BABY",tt0133231,APP,TV,VERY POSITIVE DEPICTION OF NAVY IN THIS KOREAN...,1997.0,Drama,A baby in a foreign land is adopted by the men...,07 Dec 1997
3,This is one of my favorite movies in the past ...,4 August 2001,tt0133231,"1,000 MEN AND A BABY",tt0133231,APP,TV,VERY POSITIVE DEPICTION OF NAVY IN THIS KOREAN...,1997.0,Drama,A baby in a foreign land is adopted by the men...,07 Dec 1997
4,"The movie is pretty good, and relatively accur...",1 January 2019,tt0133231,"1,000 MEN AND A BABY",tt0133231,APP,TV,VERY POSITIVE DEPICTION OF NAVY IN THIS KOREAN...,1997.0,Drama,A baby in a foreign land is adopted by the men...,07 Dec 1997
...,...,...,...,...,...,...,...,...,...,...,...,...
11818,A great movie starring Barbara Eden and her re...,8 June 2001,tt0098696,YOUR MOMMA WEARS COMBAT BOOTS,tt0098696,APP,TV,ARMY AND AIR FORCE ASSISTANCE WAS GRANTED. THE...,1989.0,Comedy,An overprotective single mother tries to stop ...,27 Mar 1989
11819,"While I was in the Army, I received a call fro...",11 July 2020,tt0098696,YOUR MOMMA WEARS COMBAT BOOTS,tt0098696,APP,TV,ARMY AND AIR FORCE ASSISTANCE WAS GRANTED. THE...,1989.0,Comedy,An overprotective single mother tries to stop ...,27 Mar 1989
11820,YOUR MOTHER WEARS COMBAT BOOTS *May Contain Mi...,6 September 2016,tt0098696,YOUR MOMMA WEARS COMBAT BOOTS,tt0098696,APP,TV,ARMY AND AIR FORCE ASSISTANCE WAS GRANTED. THE...,1989.0,Comedy,An overprotective single mother tries to stop ...,27 Mar 1989
11821,One of Jeannie's main traits is that her color...,3 October 2012,tt0098696,YOUR MOMMA WEARS COMBAT BOOTS,tt0098696,APP,TV,ARMY AND AIR FORCE ASSISTANCE WAS GRANTED. THE...,1989.0,Comedy,An overprotective single mother tries to stop ...,27 Mar 1989
