In [1]:
# Importing Libraries
import pandas as pd
import numpy as np
import requests

# Reading the basic dataset

The _military-hollywood-full_imdbidAdded.csv_ contains the rows of movies that US DOD has supported. The data has 7 columns.

|Column name | Description |
| :-:        | -:-:    |
|Title|The title of the film or TV show requesting assistance.|
|Subtitle|An alternate name for the film, or the episode title of the TV show.|
|Status| The military's response to the assistance request.|
|Media Type| The type of media requesting assistance, a Film or TV show.|
|Year| The year that the film or TV episode was released.|
|Remarks| A description of the request from the military's perspective.|      
 

**Note:** This is not an exhaustive list, it only contains the movies list that DoD has choose to release.

In [2]:
# reading the basic dataset
dod_movies = pd.read_csv("./../primary_data/military-hollywood-full_imdbidAdded.csv")
dod_movies

Unnamed: 0,Title,IMDB_ID,Subtitle,Status,Media Type,Year,Remarks
0,"""1968""",Never Made,,OTH,FILM,,THE FILM STARTED OUT VERY NEGATIVE FOR THE ARM...
1,"1,000 MEN AND A BABY",tt0133231,,APP,TV,1997.0,VERY POSITIVE DEPICTION OF NAVY IN THIS KOREAN...
2,1ST FORCE,Never Made,,OTH,FILM,,INITIALLY DOD AND USMC WERE INCLINED TO SUPPOR...
3,24,tt0502209,22,APP,TV,2004.0,APPROVED FILMING FOR ONE DAY WITH TWO MARINE C...
4,3RD DEGREE,tt0098469,,APP,TV,1989.0,PERSONNEL APPEARED ON THIS GAME SHOW AT THE EX...
...,...,...,...,...,...,...,...
852,"WONDER YEARS, THE",tt0094582,ANGEL,LIM,TV,1988.0,THE UNITED STATES AIR FORCE GRANTED STOCK FOOT...
853,X-15,tt0055627,,APP,FILM,1961.0,AIRFORCE AND NASA PROVIDED FULL COOPERATION ON...
854,"YEAR IN THE LIFE, A",tt0092488,ACTS OF FAITH,DEN,TV,1987.0,THE PROJECT WAS DENIED ASSISTANCE.
855,"YOUNG LIONS, THE",tt0052415,,APP,FILM,1958.0,PENTAGON AND STATE DEPARTMENT WENT THROUGH LON...


### Remarks on the basic dataset.

For the movies that were not produced, the IMDB ID is Never made and for the movies we were not able to find online the IMDB ID is left empty.

The Year column is currently filled using the data from the offical document released by the US [Department of Defence](https://drive.google.com/file/d/1NeDVYu_gvEhtdQVtSFPRIapHDxJx6842/view). Later, we shall update it from the IMDB data.

The subtitle is NaN for most cases and contains previous name in case of films and episode name in case of TV series.

In the status column, APP means the assistance was approved by US DoD, DEN means it was denied. LIM means only limited assistance was provided. OTH mean either the film did not request the assistance or has withdrew the request. RSCH means only research assistance was provided.

# Additional Data

Now, we source and append the following additional data to the above data frame: Year, Release Date, Directors, Plot, Awards, Runtime, IMDB Ratings and generes. This data is obtained from [OMDB API](http://www.omdbapi.com/).

The steps involved are
1. Provide the API key and parameter to get full plot.
2. Check if there is a response or not, as some movies are not existing.
3. For the movies that have a response, we collected the required data and return it.
4. Then we shall replace the old year column with new year column and append the remaining columns.

In [3]:
def get_movie_additional_data(imdb_id, curr_year):
    """
    Function to obtain the additional data of the movie such as Year, Release Date, Directors, Plot, Awards, Runtime, IMDB Ratings and generes.

    :param imdb_id(str): The IMDB id of the movie
    :param curr_year(str): The year of release of the movie according to the basic dataset.
    :return addl_data(List): The additional data as a list with elements in the following order [year, genre, runtime, director, plot, award, imdb_rating, release_date]
    """
    # obtaining additional data from OMDB API

    params = {'plot': 'full'}
    apiKey = '7d4700e0'  #  OMDB api key here

    try:
        data_URL = 'http://www.omdbapi.com/?i='+imdb_id+'&apikey='+apiKey
        response = requests.get(data_URL, params=params).json()
    except:
        response = {}

    year = response.get("Year", curr_year)
    if "–" in str(year):
        # In case of tv series, where the episode is not identified, we get the year as an range.
        # In that case, we shall the use the Year provided in the basic dataset.
        year = curr_year
    genre = response.get("Genre")
    runtime = response.get("Runtime")
    director = response.get("Director")
    plot = response.get("Plot")
    award = response.get("Awards")
    imdb_rating = response.get("imdbRating")
    release_date = response.get("Released")

    return [year, genre, runtime, director, plot, award, imdb_rating, release_date]

In [4]:
additional_movie_data = dod_movies.apply(lambda row: get_movie_additional_data(row.IMDB_ID, row.Year), axis='columns', result_type='expand')
additional_movie_data.columns = ["Year_omdb", "genre", "runtime", "director", "plot", "award", "imdb_rating", "release_date"]
dod_movies_with_addl_data = pd.concat([dod_movies, additional_movie_data], axis='columns')
dod_movies_with_addl_data.drop(columns='Year', inplace=True)
dod_movies_with_addl_data.rename(columns={"Year_omdb":"Year"}, inplace=True)
dod_movies_with_addl_data

Unnamed: 0,Title,IMDB_ID,Subtitle,Status,Media Type,Remarks,Year,genre,runtime,director,plot,award,imdb_rating,release_date
0,"""1968""",Never Made,,OTH,FILM,THE FILM STARTED OUT VERY NEGATIVE FOR THE ARM...,,,,,,,,
1,"1,000 MEN AND A BABY",tt0133231,,APP,TV,VERY POSITIVE DEPICTION OF NAVY IN THIS KOREAN...,1997,Drama,96 min,Marcus Cole,A baby in a foreign land is adopted by the men...,,6.9,07 Dec 1997
2,1ST FORCE,Never Made,,OTH,FILM,INITIALLY DOD AND USMC WERE INCLINED TO SUPPOR...,,,,,,,,
3,24,tt0502209,22,APP,TV,APPROVED FILMING FOR ONE DAY WITH TWO MARINE C...,2004,"Action, Crime, Drama, Thriller",42 min,Frederick King Keller,Jack and Tony clash as they wait for the time ...,,9.0,11 May 2004
4,3RD DEGREE,tt0098469,,APP,TV,PERSONNEL APPEARED ON THIS GAME SHOW AT THE EX...,1989,"Crime, Drama, Thriller",100 min,Roger Spottiswoode,Scott Weston is a private investigator who is ...,,5.7,28 May 1989
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
852,"WONDER YEARS, THE",tt0094582,ANGEL,LIM,TV,THE UNITED STATES AIR FORCE GRANTED STOCK FOOT...,1988,"Comedy, Drama, Family, Romance",22 min,,An adult Kevin Arnold reminisces on his teenag...,Won 1 Golden Globe. Another 24 wins & 70 nomin...,8.3,31 Jan 1988
853,X-15,tt0055627,,APP,FILM,AIRFORCE AND NASA PROVIDED FULL COOPERATION ON...,1961,"Drama, History",107 min,Richard Donner,At the height of the Cold War during the 1960s...,,5.8,22 Dec 1961
854,"YEAR IN THE LIFE, A",tt0092488,ACTS OF FAITH,DEN,TV,THE PROJECT WAS DENIED ASSISTANCE.,1987,Drama,60 min,,"Joe Gardner, a child of the Depression, is a s...",Won 1 Golden Globe. Another 3 wins & 3 nominat...,8.8,16 Sep 1987
855,"YOUNG LIONS, THE",tt0052415,,APP,FILM,PENTAGON AND STATE DEPARTMENT WENT THROUGH LON...,1958,"Action, Drama, War",167 min,Edward Dmytryk,The destiny of three soldiers during World War...,Nominated for 3 Oscars. Another 1 win & 4 nomi...,7.2,02 Apr 1958


### Remarks on the updated dataset

We see that certain values in the columns are 'N/A' obtained from the OMDB API. Thus we shall replace them with NaN.

In [5]:
dod_movies_with_addl_data.replace(regex={'N/A': np.nan}, inplace=True)

We see that dates(year and release data) values in the columns are strings and integers. Thus we shall convert them into datetime.

In [6]:
dod_movies_with_addl_data['release_date'] = pd.to_datetime(dod_movies_with_addl_data['release_date'], errors='ignore', format='%Y%m%d')
dod_movies_with_addl_data['Year'] = pd.to_datetime(dod_movies_with_addl_data['Year'], errors='ignore', format='%Y')

In [7]:
dod_movies_with_addl_data

Unnamed: 0,Title,IMDB_ID,Subtitle,Status,Media Type,Remarks,Year,genre,runtime,director,plot,award,imdb_rating,release_date
0,"""1968""",Never Made,,OTH,FILM,THE FILM STARTED OUT VERY NEGATIVE FOR THE ARM...,,,,,,,,
1,"1,000 MEN AND A BABY",tt0133231,,APP,TV,VERY POSITIVE DEPICTION OF NAVY IN THIS KOREAN...,1997,Drama,96 min,Marcus Cole,A baby in a foreign land is adopted by the men...,,6.9,07 Dec 1997
2,1ST FORCE,Never Made,,OTH,FILM,INITIALLY DOD AND USMC WERE INCLINED TO SUPPOR...,,,,,,,,
3,24,tt0502209,22,APP,TV,APPROVED FILMING FOR ONE DAY WITH TWO MARINE C...,2004,"Action, Crime, Drama, Thriller",42 min,Frederick King Keller,Jack and Tony clash as they wait for the time ...,,9.0,11 May 2004
4,3RD DEGREE,tt0098469,,APP,TV,PERSONNEL APPEARED ON THIS GAME SHOW AT THE EX...,1989,"Crime, Drama, Thriller",100 min,Roger Spottiswoode,Scott Weston is a private investigator who is ...,,5.7,28 May 1989
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
852,"WONDER YEARS, THE",tt0094582,ANGEL,LIM,TV,THE UNITED STATES AIR FORCE GRANTED STOCK FOOT...,1988,"Comedy, Drama, Family, Romance",22 min,,An adult Kevin Arnold reminisces on his teenag...,Won 1 Golden Globe. Another 24 wins & 70 nomin...,8.3,31 Jan 1988
853,X-15,tt0055627,,APP,FILM,AIRFORCE AND NASA PROVIDED FULL COOPERATION ON...,1961,"Drama, History",107 min,Richard Donner,At the height of the Cold War during the 1960s...,,5.8,22 Dec 1961
854,"YEAR IN THE LIFE, A",tt0092488,ACTS OF FAITH,DEN,TV,THE PROJECT WAS DENIED ASSISTANCE.,1987,Drama,60 min,,"Joe Gardner, a child of the Depression, is a s...",Won 1 Golden Globe. Another 3 wins & 3 nominat...,8.8,16 Sep 1987
855,"YOUNG LIONS, THE",tt0052415,,APP,FILM,PENTAGON AND STATE DEPARTMENT WENT THROUGH LON...,1958,"Action, Drama, War",167 min,Edward Dmytryk,The destiny of three soldiers during World War...,Nominated for 3 Oscars. Another 1 win & 4 nomi...,7.2,02 Apr 1958


### Remarks on the directors for TV shows

For some tvseries, which do not have the episode information in the US DoD document, we have used the tvseries IMDB ID instead of episode IMDB ID. Thus, the OMDB API did not return a director name as there can be multiple directors to the series. Nevertheless, some series have only director. Such information can be found at [source](https://datasets.imdbws.com/), specifically [crew_dataset](title.crew.tsv.gz) and [name_dataset](name.basics.tsv.gz).

Below, the directors name where there is no ambiguity is filled. We defined the ambiguous directors for the TV shows if there is more than one director ids are presented. Hence, we will only replace the directors' names where there is only one director id available.

In [8]:
# Source: https://datasets.imdbws.com/

# read the datasets
names = pd.read_csv('./../primary_data/imdb_data/name.basics.tsv.gz', compression='gzip', header=0, sep='\t')
crews = pd.read_csv('./../primary_data/imdb_data/title.crew.tsv.gz', compression='gzip', header=0, sep='\t')

In [16]:
# join the dod_movies_with_addl_data created above with crews dataset for directors informations 
dod_movies_with_addl_data_with_directors = pd.merge(dod_movies_with_addl_data, 
                                                    crews[['tconst', 'directors']], 
                                                    how ='left', 
                                                    left_on='IMDB_ID', 
                                                    right_on='tconst').drop('tconst', axis=1)

# In directors column from crews dataset, the NaN values are represented with '\N', so we should replace it with NaN 
directors_dict_clean = {'\\N': np.NaN}
dod_movies_with_addl_data_with_directors.directors.replace(directors_dict_clean, inplace=True)


# create a dictionary with director names and their ids
dict_director = names[['nconst', 'primaryName']].set_index('nconst').to_dict()['primaryName']
dod_movies_with_addl_data_with_directors

Unnamed: 0,Title,IMDB_ID,Subtitle,Status,Media Type,Remarks,Year,genre,runtime,director,plot,award,imdb_rating,release_date,directors
0,"""1968""",Never Made,,OTH,FILM,THE FILM STARTED OUT VERY NEGATIVE FOR THE ARM...,,,,,,,,,
1,"1,000 MEN AND A BABY",tt0133231,,APP,TV,VERY POSITIVE DEPICTION OF NAVY IN THIS KOREAN...,1997,Drama,96 min,Marcus Cole,A baby in a foreign land is adopted by the men...,,6.9,07 Dec 1997,nm0170680
2,1ST FORCE,Never Made,,OTH,FILM,INITIALLY DOD AND USMC WERE INCLINED TO SUPPOR...,,,,,,,,,
3,24,tt0502209,22,APP,TV,APPROVED FILMING FOR ONE DAY WITH TWO MARINE C...,2004,"Action, Crime, Drama, Thriller",42 min,Frederick King Keller,Jack and Tony clash as they wait for the time ...,,9.0,11 May 2004,nm0445631
4,3RD DEGREE,tt0098469,,APP,TV,PERSONNEL APPEARED ON THIS GAME SHOW AT THE EX...,1989,"Crime, Drama, Thriller",100 min,Roger Spottiswoode,Scott Weston is a private investigator who is ...,,5.7,28 May 1989,nm0006854
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
852,"WONDER YEARS, THE",tt0094582,ANGEL,LIM,TV,THE UNITED STATES AIR FORCE GRANTED STOCK FOOT...,1988,"Comedy, Drama, Family, Romance",22 min,,An adult Kevin Arnold reminisces on his teenag...,Won 1 Golden Globe. Another 24 wins & 70 nomin...,8.3,31 Jan 1988,"nm0085209,nm0601963,nm0066439,nm0227781,nm0440..."
853,X-15,tt0055627,,APP,FILM,AIRFORCE AND NASA PROVIDED FULL COOPERATION ON...,1961,"Drama, History",107 min,Richard Donner,At the height of the Cold War during the 1960s...,,5.8,22 Dec 1961,nm0001149
854,"YEAR IN THE LIFE, A",tt0092488,ACTS OF FAITH,DEN,TV,THE PROJECT WAS DENIED ASSISTANCE.,1987,Drama,60 min,,"Joe Gardner, a child of the Depression, is a s...",Won 1 Golden Globe. Another 3 wins & 3 nominat...,8.8,16 Sep 1987,"nm0372133,nm0722493,nm0393661,nm0881306,nm0064..."
855,"YOUNG LIONS, THE",tt0052415,,APP,FILM,PENTAGON AND STATE DEPARTMENT WENT THROUGH LON...,1958,"Action, Drama, War",167 min,Edward Dmytryk,The destiny of three soldiers during World War...,Nominated for 3 Oscars. Another 1 win & 4 nomi...,7.2,02 Apr 1958,nm0229424


In [23]:
# check only if director is NaN but directors includes one director id for corresponding movie
dod_movies_with_addl_data_with_directors.loc[(dod_movies_with_addl_data_with_directors.director.isna()) & (dod_movies_with_addl_data_with_directors.directors.notna())]

Unnamed: 0,Title,IMDB_ID,Subtitle,Status,Media Type,Remarks,Year,genre,runtime,director,plot,award,imdb_rating,release_date,directors
145,CHINA BEACH,tt0094433,,DEN,TV,DOD CONCLUDED THAT THE SERIES DID NOT PORTRAY ...,1988,"Drama, History, War",120 min,,"Dateline: November 1967, within klicks of Dana...",Won 1 Golden Globe. Another 25 wins & 46 nomin...,8.2,26 Apr 1988,"nm0001460,nm0949722,nm0500609,nm0294350,nm0314..."
156,CODE 3,tt0050005,"PILOT FOR NEW TV SERIES BASED ON ""COPS"" CONCEP...",APP,TV,CONCURRED WITH USARPAC/ARMY RECOMMENDATION TO ...,1957,"Crime, Drama",30 min,,Code 3 is an American crime drama that aired i...,,7.5,02 Apr 1957,"nm0004573,nm0905729,nm0324013,nm0190096,nm0524..."
217,DIRT WATER DYNASTY,tt0095020,,LIM,TV,THE DEPARTMENT APPROVED USE STOCK FOOTAGE TO T...,1988,Drama,100 min,,"""Dirtwater Dynasty"" is the story of embittered...",2 nominations.,8.3,10 Apr 1988,"nm0420924,nm0694411"
254,FAMILY OF SPIES,tt0099543,,LIM,TV,THE REQUEST FOR EQUIPMENT AND FILMING ON SHIPS...,1990,Drama,175 min,,Park any subtlety at the threshold. The spits ...,Nominated for 2 Golden Globes. Another 1 win &...,6.5,04 Feb 1990,nm0350455
256,FATAL VISION,tt0087244,,OTH,TV,THE FILM WAS QUITE ACCURATE AND FACTUAL. ALL T...,1984,"Crime, Drama, History, Thriller",181 min,,"In 1970, military doctor Jeffrey McDonald repo...",Won 1 Primetime Emmy. Another 2 wins & 8 nomin...,7.8,18 Nov 1984,nm0338719
267,FINDER OF LOST LOVES,tt0086712,,LIM,TV,THE DEPARTMENT FELT THE SCRIPT WOULD BE ADVANT...,1985,Drama,60 min,,Cary Maxwell is a private investigator and own...,,6.7,22 Sep 1984,"nm0113617,nm0082676,nm0681604,nm0676336,nm0494..."
313,GEORGE WASHINGTON,tt0086720,,APP,TV,THE DOD APPROVED USE OF FORT BELVOIR FOR FILMI...,1984,"Biography, Drama, History, War",480 min,,The early life and career of the American Gene...,,7.9,08 Apr 1984,nm0474539
408,JAG,tt0112022,,DEN,TV,QUESTIONABLE WHETHER OR NOT PRODUCTION COMPANY...,1995,"Action, Crime, Drama, Mystery, Thriller",60 min,,"Commander Harmon Rabb, Jr. and Lieutenant Colo...",Won 3 Primetime Emmys. Another 9 wins & 17 nom...,6.6,23 Sep 1995,"nm0426687,nm0004895,nm0181445,nm0621137,nm0844..."
462,"KENNEDYS OF MASSACHUSETTS, THE",tt0098838,,APP,TV,"THE PRODUCTION WAS GRANTED ITS REQUEST, PROVID...",1990,Drama,278 min,,This sweeping mini-series profiling the Kenned...,Won 1 Golden Globe. Another 2 wins & 9 nominat...,7.0,18 Feb 1990,nm0425593
486,M*A*S*H*,tt0068098,,OTH,TV,NOW CLASSIC TALE OF DOCTORS IN KOREAN WAR FIEL...,1972,"Comedy, Drama, War",25 min,,The 4077th Mobile Army Surgical Hospital is st...,Won 8 Golden Globes. Another 53 wins & 153 nom...,8.4,17 Sep 1972,"nm0239291,nm0000257,nm0582401,nm0721728,nm0002..."


We see that for the IMDB_IDs _tt0099543, tt0087244, tt0086720, tt0098838, tt0115309, tt6349394, tt0088594, tt0102879, tt0108941, tt0096725, and tt0096447,_ we have exactly one corresponding director ids which we can match it with director name without any ambiguity.

In [33]:
imdb_ids_for_one_director_id = ['tt0099543', 'tt0087244', 'tt0086720', 'tt0098838', 'tt0115309', 'tt6349394', 'tt0088594', 'tt0102879', 'tt0108941', 'tt0096725', 'tt0096447']
dod_movies_with_addl_data_with_directors.loc[(dod_movies_with_addl_data_with_directors.director.isna()) & (dod_movies_with_addl_data_with_directors.directors.notna()) & (dod_movies_with_addl_data_with_directors.IMDB_ID.isin(imdb_ids_for_one_director_id))]

Unnamed: 0,Title,IMDB_ID,Subtitle,Status,Media Type,Remarks,Year,genre,runtime,director,plot,award,imdb_rating,release_date,directors
254,FAMILY OF SPIES,tt0099543,,LIM,TV,THE REQUEST FOR EQUIPMENT AND FILMING ON SHIPS...,1990,Drama,175 min,,Park any subtlety at the threshold. The spits ...,Nominated for 2 Golden Globes. Another 1 win &...,6.5,04 Feb 1990,nm0350455
256,FATAL VISION,tt0087244,,OTH,TV,THE FILM WAS QUITE ACCURATE AND FACTUAL. ALL T...,1984,"Crime, Drama, History, Thriller",181 min,,"In 1970, military doctor Jeffrey McDonald repo...",Won 1 Primetime Emmy. Another 2 wins & 8 nomin...,7.8,18 Nov 1984,nm0338719
313,GEORGE WASHINGTON,tt0086720,,APP,TV,THE DOD APPROVED USE OF FORT BELVOIR FOR FILMI...,1984,"Biography, Drama, History, War",480 min,,The early life and career of the American Gene...,,7.9,08 Apr 1984,nm0474539
462,"KENNEDYS OF MASSACHUSETTS, THE",tt0098838,,APP,TV,"THE PRODUCTION WAS GRANTED ITS REQUEST, PROVID...",1990,Drama,278 min,,This sweeping mini-series profiling the Kenned...,Won 1 Golden Globe. Another 2 wins & 9 nominat...,7.0,18 Feb 1990,nm0425593
567,PANDORA'S CLOCK,tt0115309,,DEN,TV,PRODUCTION COMPANY REQUESTED USE OF AIR FORCE ...,1996,"Action, Drama, Thriller",176 min,,Quantum Airlines flight 66 has just taken off ...,2 nominations.,6.2,10 Nov 1996,nm0485637
638,"REAGAN YEARS, THE",tt6349394,,APP,TV,APPROVED PROVIDING THE PRODUCTION FORWARDS A L...,1992,,,,,,,,nm0244620
649,ROBERT KENNEDY AND HIS TIMES (PART 1 AND 2),tt0088594,,APP,TV,"INITIALLY, FILMMAKERS DID NOT APPROACH MILITAR...",1985,Drama,309 min,,A personal portrait of one of the most controv...,,7.6,27 Jan 1985,nm0159007
669,SEPARATE BUT EQUAL,tt0102879,,APP,TV,THE ARMY ALLOWED USE OF WALTER REED ARMY HOSPI...,1991,"Drama, History",190 min,,This film follows the true story of the NAACP ...,Nominated for 3 Golden Globes. Another 3 wins ...,7.4,07 Apr 1991,nm0828211
684,"STAND, THE",tt0108941,,DEN,TV,REQUESTED USE OF MILITARY EQUIPMENT AND INSTAL...,1994,"Adventure, Drama, Fantasy, Horror, Sci-Fi",361 min,,When a government-run lab accidentally lets lo...,Won 2 Primetime Emmys. Another 1 win & 7 nomin...,7.2,08 May 1994,nm0308376
827,WAR AND REMEMBRANCE (MINI SERIES),tt0096725,SEQUEL TO THE WINDS OF WAR,APP,TV,APPROVED IN THE BEST INTEREST OF THE DEPARTMEN...,1988,"Drama, War","1,620 min",,"The saga of the Henry family, begun in ""The Wi...",Won 3 Golden Globes. Another 9 wins & 19 nomin...,8.3,13 Nov 1988,nm0193303


In [30]:
one_director_id = dod_movies_with_addl_data_with_directors.loc[(dod_movies_with_addl_data_with_directors.director.isna()) & (dod_movies_with_addl_data_with_directors.directors.notna()) & (dod_movies_with_addl_data_with_directors.IMDB_ID.isin(imdb_ids_for_one_director_id))].directors.to_list()
one_director_id

['nm0350455',
 'nm0338719',
 'nm0474539',
 'nm0425593',
 'nm0485637',
 'nm0244620',
 'nm0159007',
 'nm0828211',
 'nm0308376',
 'nm0193303',
 'nm0680097']

In [31]:
for index, data in enumerate(one_director_id):
    for key, value in dict_director.items():
        if key in data:
            one_director_id[index]=data.replace(key, dict_director[key])

In [32]:
one_director_id

['Stephen Gyllenhaal',
 'David Greene',
 'Buzz Kulik',
 'Lamont Johnson',
 'Eric Laneuville',
 'Phil Dusenberry',
 'Marvin J. Chomsky',
 'George Stevens Jr.',
 'Mick Garris',
 'Dan Curtis',
 'Lee Philips']

In [41]:
interested_indexes = dod_movies_with_addl_data_with_directors.loc[(dod_movies_with_addl_data_with_directors.director.isna()) & (dod_movies_with_addl_data_with_directors.directors.notna()) & (dod_movies_with_addl_data_with_directors.IMDB_ID.isin(imdb_ids_for_one_director_id))].index

In [42]:
# assign the director names found above to the corresponding director id
for i in range(len(one_director_id)):
    dod_movies_with_addl_data_with_directors.loc[interested_indexes[i],'director'] = one_director_id[i]

# Saving the Data

Since we would require this data frame in the analysis further we shall store it on the harddrive. We will store both a csv file and a binary pickle file. The CSV file will be used as human readable format while pickle file will be useful to quick reading.

In [46]:
dod_movies_with_addl_data_with_directors.to_csv("./../with_additional_data/military_hollywood_with_additional_data.csv", index=False)
dod_movies_with_addl_data_with_directors.to_pickle("./../with_additional_data/military_hollywood_with_additional_data.pkl")