In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('./data/input/IMDB-Movie-Data.csv')

## Challenge 1

We want to create bins of movies according to the number of votes they've received. For that matter, we will create a new column named 'bin' which will tag every movie as follow:

From 0 to 999 ==> 1
From 1000 to 9999 ==> 2
From 10000 to 99999 ==> 3
From 100000 to 999999 ==> 4
More than 1000000 ==> 5

In [3]:
def bins(votes):
    if len(str(votes)) > 6:
        return 5
    elif len(str(votes)) <= 3:
        return 1
    else:
        return len(str(votes)) - 2

From 0 to 999 ==> 1
From 1000 to 9999 ==> 2
From 10000 to 99999 ==> 3
From 100000 to 999999 ==> 4
More than 1000000 ==> 5

In [4]:
df['bin'] = df.apply(lambda x: bins(x['Votes']), axis=1)

## Challenge 2. Using axis concept
We want to know how much is the revenue per minute for every movie.

In [5]:
df_revenue = df.groupby(['Title'])['Revenue (Millions)'].sum().sort_values(ascending = False)
df_revenue.head()

Title
Star Wars: Episode VII - The Force Awakens    936.63
Avatar                                        760.51
Jurassic World                                652.18
The Avengers                                  623.28
The Dark Knight                               533.32
Name: Revenue (Millions), dtype: float64

## Challenge 3. Using the lambda
We want to create a new ranking where we add 1 point if the genre is thriller but subtract 1 point if the genre is comedy.

In [6]:
df_nr = df.copy()

In [7]:
df_nr['Metascore'] = df_nr.apply(lambda x: x['Metascore'] + 1 if 'thriller' in x['Genre'].lower()\
                                 else x['Metascore'] - 1 if 'comedy' in x['Genre'].lower() else x['Metascore'], axis = 1)

In [8]:
df_nr.sort_values(['bin', 'Metascore'], ascending = [False, False], inplace = True, ignore_index=True)

df_nr.head(3)

Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore,bin
0,55,The Dark Knight,"Action,Crime,Drama",When the menace known as the Joker wreaks havo...,Christopher Nolan,"Christian Bale, Heath Ledger, Aaron Eckhart,Mi...",2008,152,9.0,1791916,533.32,82.0,5
1,145,Django Unchained,"Drama,Western","With the help of a German bounty hunter , a fr...",Quentin Tarantino,"Jamie Foxx, Christoph Waltz, Leonardo DiCaprio...",2012,165,8.4,1039115,162.8,81.0,5
2,125,The Dark Knight Rises,"Action,Thriller",Eight years after the Joker's reign of anarchy...,Christopher Nolan,"Christian Bale, Tom Hardy, Anne Hathaway,Gary ...",2012,164,8.5,1222645,448.13,79.0,5


## Challenge 4. Now the real stuff
We want to know if the sum of the ASCII value of every character of the movie title divided by the number of votes retrieve a prime number...remember, prime numbers are integers.

In [16]:
df_nr['Votes/ASCII'] = df_nr.apply(lambda x: x['Votes']/sum(ord(y) for y in x['Title']), axis=1)

In [17]:
def is_prime(n):
    if n != int or n < 2:
        return False
    elif n == 1 or n == 2:
        return True
    else:
        for i in range(2,n):
            if (n%i) == 0:
                return False
            return True

In [33]:
df_nr['Prime'] = df_nr.apply(lambda x: is_prime(x['Votes/ASCII']), axis=1 )

In [32]:
df_nr.drop('Prime', axis=1, inplace=True)

In [34]:
df_nr['Prime'].unique()

array([False])