## Reading the CSV IMDB data using Pandas 

In [27]:
import pandas as pd
import pickle

In [28]:
df = pd.read_csv('../Dataset/imdb_top_1000.csv')

In [29]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Poster_Link    1000 non-null   object 
 1   Series_Title   1000 non-null   object 
 2   Released_Year  1000 non-null   object 
 3   Certificate    899 non-null    object 
 4   Runtime        1000 non-null   object 
 5   Genre          1000 non-null   object 
 6   IMDB_Rating    1000 non-null   float64
 7   Overview       1000 non-null   object 
 8   Meta_score     843 non-null    float64
 9   Director       1000 non-null   object 
 10  Star1          1000 non-null   object 
 11  Star2          1000 non-null   object 
 12  Star3          1000 non-null   object 
 13  Star4          1000 non-null   object 
 14  No_of_Votes    1000 non-null   int64  
 15  Gross          831 non-null    object 
dtypes: float64(2), int64(1), object(13)
memory usage: 125.1+ KB


### Basic Data Clean up 

In [30]:
df['Genre'] = df['Genre'].str.split(',')

In [31]:
df.rename(columns={'Series_Title': 'Title', 'IMDB_Rating':'Rating'}, inplace=True)

In [32]:
df['Actors'] = (df['Star1'] + ', ' + df['Star2'] + ', ' + df['Star3'] + ', ' + df['Star4'])

In [33]:
df['Actors'] = df['Actors'].str.split(',')

In [34]:
df.head()

Unnamed: 0,Poster_Link,Title,Released_Year,Certificate,Runtime,Genre,Rating,Overview,Meta_score,Director,Star1,Star2,Star3,Star4,No_of_Votes,Gross,Actors
0,https://m.media-amazon.com/images/M/MV5BMDFkYT...,The Shawshank Redemption,1994,A,142 min,[Drama],9.3,Two imprisoned men bond over a number of years...,80.0,Frank Darabont,Tim Robbins,Morgan Freeman,Bob Gunton,William Sadler,2343110,28341469,"[Tim Robbins, Morgan Freeman, Bob Gunton, W..."
1,https://m.media-amazon.com/images/M/MV5BM2MyNj...,The Godfather,1972,A,175 min,"[Crime, Drama]",9.2,An organized crime dynasty's aging patriarch t...,100.0,Francis Ford Coppola,Marlon Brando,Al Pacino,James Caan,Diane Keaton,1620367,134966411,"[Marlon Brando, Al Pacino, James Caan, Dian..."
2,https://m.media-amazon.com/images/M/MV5BMTMxNT...,The Dark Knight,2008,UA,152 min,"[Action, Crime, Drama]",9.0,When the menace known as the Joker wreaks havo...,84.0,Christopher Nolan,Christian Bale,Heath Ledger,Aaron Eckhart,Michael Caine,2303232,534858444,"[Christian Bale, Heath Ledger, Aaron Eckhart..."
3,https://m.media-amazon.com/images/M/MV5BMWMwMG...,The Godfather: Part II,1974,A,202 min,"[Crime, Drama]",9.0,The early life and career of Vito Corleone in ...,90.0,Francis Ford Coppola,Al Pacino,Robert De Niro,Robert Duvall,Diane Keaton,1129952,57300000,"[Al Pacino, Robert De Niro, Robert Duvall, ..."
4,https://m.media-amazon.com/images/M/MV5BMWU4N2...,12 Angry Men,1957,U,96 min,"[Crime, Drama]",9.0,A jury holdout attempts to prevent a miscarria...,96.0,Sidney Lumet,Henry Fonda,Lee J. Cobb,Martin Balsam,John Fiedler,689845,4360000,"[Henry Fonda, Lee J. Cobb, Martin Balsam, J..."


In [35]:
cols = ['Title', 'Released_Year', 'Runtime', 'Rating', 'Genre', 'Director', 'Actors','Overview']

In [36]:
df = df[cols]

In [37]:
df.to_csv('clean_movie_data.csv')

## Convert from a Pandas DataFrame 

In [38]:
movie_db = df.to_dict(orient='records')

## Storing the `Movie_db` dictionary using Pickle (Pickling)

In [39]:
movie_nested_dict = {}

for i in movie_db:
    entry = {
        'Released_Year': i['Released_Year'],
    'Runtime': i['Runtime'],
    'Rating': i['Rating'],
    'Genre': i['Genre'],
    'Director': i['Director'],
    'Actors': i['Actors'],
    'Overview': i['Overview']
    }

    movie_nested_dict[i['Title']] = entry
    
# with open('movie_nested_dict.pickle', 'wb') as file:
#     pickle.dump(movie_nested_dict, file, protocol=pickle.HIGHEST_PROTOCOL)

In [41]:
# %whos

In [50]:
movie_db = movie_nested_dict


# with open('../movie_nested_dict.pickle', 'rb') as file:
#     movie_db = pickle.load(file)

In [43]:
def search_genre(genre):
    title = []
    for k,v in movie_db.items():
        if v['Genre'] != [None]:
            for g in v['Genre']:
                if g.lstrip().upper() == genre.upper():
                    title.append(k)
    print(f"There are {len(title)} movies for {genre} Genre.")
                
    if len(title) > 0:
        print("Here are some movies you might enjoy: \n ")
        for i, t in enumerate(title):
            print(f"{i+1}. {t}")
    else:
         print("No Results Found")
            

In [44]:
# for k,v in movie_db.items():
#     if v['Genre'] != [None]:
#         for g in v['Genre']:
#             if g.lstrip().upper() == 'horror'.upper():
#                 print(k)

In [45]:
def search_title(title):
    for m in movie_db.keys():
        tmp = m.upper()
        title = title.upper()
        if tmp.find(title) > -1:
            print('#'*30)
            print(f"Movie Title: {m}")
            print(f"Genre: {movie_db[m]['Genre']}")  
            print(f"Year: {movie_db[m]['Released_Year']}") 
            print(f"Director: {movie_db[m]['Director']}")
            print(f"Actors: {movie_db[m]['Actors']}")  

In [46]:
def search_rating(rating):
    title = []
    for k,v in movie_db.items():
        if v['Rating'] == rating:
            title.append(k)
    print(f"There are {len(title)} movies with {rating} rating.")
                
    if len(title) > 0:
        print("Here are some movies you might enjoy: \n ")
        for i, t in enumerate(title):
            print(f"{i+1}. {t}")
    else:
         print("No Results Found")

In [47]:
def search_year(year):
    title = []
    for k,v in movie_db.items():
        if v['Released_Year'] == year:
            title.append(k)
    print(f"There are {len(title)} movies released in {year}.")
                
    if len(title) > 0:
        print("Here are some movies you might enjoy: \n ")
        for i, t in enumerate(title):
            print(f"{i+1}. {t}")
    else:
         print("No Results Found")

In [48]:
def add_movie(title, rating, director, year, actors, genre):
    rating = None if len(rating) == 0 else rating
    director = None if len(director) == 0 else director
    year = None if len(year) == 0 else year
    actors = None if len(actors) == 0 else actors
    genre = None if len(genre) == 0 else genre
    
    record = {
        'Rating': rating,
        'Director': director,
        'Released_Year': year,
        'Actors': actors,
        'Genre': [genre],
    }
    movie_db[title] = record
    print('Movie added')

In [49]:
if __name__=="__main__":
    
    selection = int(input('''
        Select a choice (1 - 5):
        1. Search by Genre
        2. Search by Movie Title
        3. Search by Rating
        4. Search by Year
        5. Add a Movie

        Enter: '''))

    # calls the specific function based on user selection 
    
    if selection == 1:
        # Searching by Genre
        sub_selection = input('Enter Genre: ')
        search_genre(sub_selection)

                    
    elif selection == 2:
        # Searching by Text in Title (with a loop)
        sub_selection = None
        
        while sub_selection != 'EXIT':
            sub_selection = input("""
            Enter Movie Title to Search. 
            Else type 'EXIT' to terminate

            Title Search: """)
            
            movies = search_title(sub_selection)
            
    elif selection == 3:
        # Searching by Rating
        rating = input('Enter a Rating (ex. 8.0, 8.3): ')
        try:
            rating = float(rating)
            search_rating(rating)
        except:
            print('Rating needs to be numeric')


    elif selection == 4:
        # Searching by Year
        year = input('Enter a Movie Year (ex: 1993, 2002): ')
        search_year(year)
    

    elif selection == 5:
        # Add a new Movie 
        mov_title = input('Enter a Movie Title: ')
        mov_genre = input('Enter a Genre: ')
        mov_rating = input('Enter a Movie Rating: ')
        mov_director = input('Enter a Movie Director: ')
        mov_year = input('Enter Movie Year: ')
        mov_actors = input("Enter Actors e.g. ['A. Smith', 'Tom Cruise', 'Jennifer W']: ")


        if len(mov_title) == 0:
            while len(mov_title) == 0 or title == 'EXIT':
                print('Title Cannot be Empty. Please try again. To stop type "EXIT"')
                mov_title = input('Enter a Movie Title: ')
            
        if mov_title != 'EXIT':
            add_movie(mov_title, mov_rating, mov_director, mov_year, mov_actors, mov_genre)



        Select a choice (1 - 5):
        1. Search by Genre
        2. Search by Movie Title
        3. Search by Rating
        4. Search by Year
        5. Add a Movie

        Enter:  1
Enter Genre:  crazy


There are 0 movies for crazy Genre.
No Results Found
