In [2]:
from csv import reader

def explore_data(dataset, start, end, rows_and_columns=False):
    """Explore the elements of a list.
    
    Print the elements of a list starting from the index 'start'(included) upto the index 'end'         (excluded).
    
    Keyword arguments:
    dataset -- list of which we want to see the elements
    start -- index of the first element we want to see, this is included
    end -- index of the stopping element, this is excluded 
    rows_and_columns -- this parameter is optional while calling the function. It takes binary          values, either True or False. If true, print the dimension of the list, else dont.
    """
    
    
    dataset_slice = dataset[start:end]    
    for row in dataset_slice:
        print(row)
        print('\n') # adds a new (empty) line between rows
        
    if rows_and_columns:
        print('Number of rows:', len(dataset))
        print('Number of columns:', len(dataset[0]))
        
     


def duplicate_and_unique_movies(dataset, index_):
    """Check the duplicate and unique entries.
    
    We have nested list. This function checks if the rows in the list is unique or duplicated based     on the element at index 'index_'.
    It prints the Number of duplicate entries, along with some examples of duplicated entry.
    
    Keyword arguments:
    dataset -- two dimensional list which we want to explore
    index_ -- column index at which the element in each row would be checked for duplicacy 
    
    """
    
    duplicate = []
    unique = []

    for movie in dataset:
        name = movie[index_]
        if name in unique:
            duplicate.append(name)
        else:
            unique.append(name)

    print('Number of duplicate Movies:', len(duplicate))
    print('\n')
    print('Examples of duplicate Movies:', duplicate[:15])
    



def movies_lang(dataset, index_, lang_):
    """Extract the movies of a particular language.
    
    Of all the movies available in all languages, this function extracts all the movies in a            particular laguage.
    Once you ahve extracted the movies, call the explore_data() to print first few rows.
    
    Keyword arguments:
    dataset -- list containing the details of the movie
    index_ -- index which is to be compared for langauges
    lang_ -- desired language for which we want to filter out the movies
    
    Returns:
    movies_ -- list with details of the movies in selected language
    
    """
    movies_ = []

    for movie in movies:
        lang = movie[index_]
        if lang == lang_:
            movies_.append(movie)

    print("Examples of Movies in English Language:")    
    explore_data(movies_, 0, 3, True)
    return movies_
    


def rate_bucket(dataset, rate_low, rate_high):
    """Extract the movies within the specified ratings.
    
    This function extracts all the movies that has rating between rate_low and high_rate.
    Once you ahve extracted the movies, call the explore_data() to print first few rows.
    
    Keyword arguments:
    dataset -- list containing the details of the movie
    rate_low -- lower range of rating
    rate_high -- higher range of rating
    
    Returns:
    rated_movies -- list of the details of the movies with required ratings
    """

    rated_movies = []

    for movie in dataset:
        vote_avg = float(movie[-4])
        if ((vote_avg >= rate_low) & (vote_avg <= rate_high)):
            rated_movies.append(movie)

    print("Examples of Movies in required rating bucket:")    
    explore_data(rated_movies, 0, 3, True)
    return rated_movies


# Read the data file and store it as a list 'movies'
path=r"C:\Users\praya\Documents\Python Scripts\file (2).csv"
opened_file = open(path, encoding="utf8")
read_file = reader(opened_file)
movies = list(read_file)

# The first row is header. Extract and store it in 'movies_header'.
movies_header = movies[0]
print("Movies Header:\n", movies_header)

# Subset the movies dataset such that the header is removed from the list and store it back in movies
movies = movies[1:]





Movies Header:
 ['budget', 'genres', 'id', 'original_language', 'overview', 'popularity', 'production_countries', 'release_date', 'revenue', 'runtime', 'status', 'vote_average', 'vote_count', 'title_movies', 'Director']


In [3]:
# Delete wrong data
# Explore the row #4553. You will see that as apart from the id, description, status and title, no other information is available.
# Hence drop this row.

print("Entry at index 4553:")
explore_data(movies, 4553, 4554)

del movies[4553]

Entry at index 4553:
['0', '[]', '380097', 'en', '1971 post civil rights San Francisco seemed like the perfect place for a black Korean War veteran and his family to realize their dream of economic independence and his own chance to be his a "boss". Charlie Walker would soon find out how naive he was. In a city full of impostors and naysayers, he refused to take "No" for an answer. Until a catastrophic disaster opened a door that had never been open to a black man before. This is a story about what happened when he stepped through that door, with both feet!.', '0.0', '[]', '', '0', '0.0', 'Released', '0.0', '0', 'America Is Still the Place', '']




In [4]:
# Using explore_data() with appropriate parameters, view the details of the first 5 movies.
print("First 5 Entries:")
explore_data(movies, 0, 5, True)

First 5 Entries:
['237000000', "['Action', 'Adventure', 'Fantasy', 'Science Fiction']", '19995', 'en', 'In the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, but becomes torn between following orders and protecting an alien civilization.', '150.437577', "['United States of America', 'United Kingdom']", '2009-12-10', '2787965087', '162.0', 'Released', '7.2', '11800', 'Avatar', 'James Cameron']


['300000000', "['Adventure', 'Fantasy', 'Action']", '285', 'en', 'Captain Barbossa, long believed to be dead, has come back to life and is headed to the edge of the Earth with Will Turner and Elizabeth Swann. But nothing is quite as it seems.', '139.082615', "['United States of America']", '2007-05-19', '961000000', '169.0', 'Released', '6.9', '4500', "Pirates of the Caribbean: At World's End", 'Gore Verbinski']


['245000000', "['Action', 'Adventure', 'Crime']", '206647', 'en', 'A cryptic message from Bond’s past sends him on a trail to uncover a sinist

In [5]:
# Our dataset might have more than one entry for a movie. Call duplicate_and_unique_movies() with index of the name to check the same.

duplicate_and_unique_movies(movies, 13)


Number of duplicate Movies: 3


Examples of duplicate Movies: ['The Host', 'Out of the Blue', 'Batman']


In [6]:
# We saw that there are 3 movies for which the there are multiple entries. 
# Create a dictionary, 'reviews_max' that will have the name of the movie as key, and the maximum number of reviews as values.

reviews_max = {}

for movie in movies:
    name = movie[13]
    n_reviews = float(movie[12])
    
    if name in reviews_max and reviews_max[name] < n_reviews:
        reviews_max[name] = n_reviews
        
    elif name not in reviews_max:
        reviews_max[name] = n_reviews
        
len(reviews_max)



4799

In [8]:
# Create a list 'movies_clean', which will filter out the duplicate movies and contain the rows with maximum number of reviews for duplicate movies, as stored in 'review_max'. 

movies_clean = []
already_added = []

for movie in movies:
    name = movie[13]
    n_reviews = float(movie[12])
    
    if (reviews_max[name] == n_reviews) and (name not in already_added):
        movies_clean.append(movie)
        already_added.append(name)
        
len(movies_clean)

4799

In [10]:
# Calling movies_lang(), extract all the english movies and store it in movies_en.

movies_en = movies_lang(movies_clean, 3, 'en')






Examples of Movies in English Language:
['237000000', "['Action', 'Adventure', 'Fantasy', 'Science Fiction']", '19995', 'en', 'In the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, but becomes torn between following orders and protecting an alien civilization.', '150.437577', "['United States of America', 'United Kingdom']", '2009-12-10', '2787965087', '162.0', 'Released', '7.2', '11800', 'Avatar', 'James Cameron']


['300000000', "['Adventure', 'Fantasy', 'Action']", '285', 'en', 'Captain Barbossa, long believed to be dead, has come back to life and is headed to the edge of the Earth with Will Turner and Elizabeth Swann. But nothing is quite as it seems.', '139.082615', "['United States of America']", '2007-05-19', '961000000', '169.0', 'Released', '6.9', '4500', "Pirates of the Caribbean: At World's End", 'Gore Verbinski']


['245000000', "['Action', 'Adventure', 'Crime']", '206647', 'en', 'A cryptic message from Bond’s past sends him on a tr

In [11]:
# Call the rate_bucket function to see the movies with rating higher than 8.

high_rated_movies = rate_bucket(movies_en, 8, 10)


Examples of Movies in required rating bucket:
['185000000', "['Drama', 'Action', 'Crime', 'Thriller']", '155', 'en', 'Batman raises the stakes in his war on crime. With the help of Lt. Jim Gordon and District Attorney Harvey Dent, Batman sets out to dismantle the remaining criminal organizations that plague the streets. The partnership proves to be effective, but they soon find themselves prey to a reign of chaos unleashed by a rising criminal mastermind known to the terrified citizens of Gotham as the Joker.', '187.322927', "['United Kingdom', 'United States of America']", '2008-07-16', '1004558444', '152.0', 'Released', '8.2', '12002', 'The Dark Knight', 'Christopher Nolan']


['175000000', "['Drama', 'Comedy', 'Animation', 'Family']", '150540', 'en', "Growing up can be a bumpy road, and it's no exception for Riley, who is uprooted from her Midwest life when her father starts a new job in San Francisco. Like all of us, Riley is guided by her emotions - Joy, Fear, Anger, Disgust and S