In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from collections import Counter
import re

In [2]:
data = pd.read_csv('movie_bd_v5.xls')
data.sample(5)

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year
187,tt0458481,65000000,39407616,Sin City: A Dame to Kill For,Mickey Rourke|Jessica Alba|Josh Brolin|Joseph ...,Frank Miller|Robert Rodriguez,There is no justice without sin.,Some of Sin City's most hard-boiled citizens c...,102,Crime|Thriller,Miramax Films|Troublemaker Studios|AR Films|Al...,8/20/2014,6.2,2014
957,tt0275022,12000000,61141030,Crossroads,Britney Spears|Zoe Saldana|Taryn Manning|Anson...,Tamra Davis,Dreams change. Friends are forever,Three friends get together and bury a box maki...,93,Action|Adventure|Comedy|Drama|Family,Paramount Pictures|MTV Films,2/15/2002,5.1,2002
71,tt3488710,35000000,61181942,The Walk,Joseph Gordon-Levitt|Ben Kingsley|Charlotte Le...,Robert Zemeckis,Dream High.,The story of French high-wire artist Philippe ...,123,Adventure|Drama|Thriller,TriStar Pictures|Sony Pictures Entertainment|I...,9/30/2015,6.8,2015
1887,tt0162983,40000000,36037909,Hanging Up,Meg Ryan|Diane Keaton|Lisa Kudrow|Walter Matth...,Diane Keaton,Every family has a few hang-ups.,A trio of sisters bond over their ambivalence ...,94,Comedy|Drama,Laurence Mark Productions|Columbia Pictures Co...,2/16/2000,5.2,2000
1711,tt0389722,30000000,75505973,30 Days of Night,Josh Hartnett|Melissa George|Ben Foster|Danny ...,David Slade,They're Coming!,This is the story of an isolated Alaskan town ...,113,Horror|Thriller,Columbia Pictures|Dark Horse Entertainment|Gho...,10/17/2007,6.1,2007


In [3]:
data.describe()

Unnamed: 0,budget,revenue,runtime,vote_average,release_year
count,1889.0,1889.0,1889.0,1889.0,1889.0
mean,54310830.0,155365300.0,109.658549,6.140762,2007.860773
std,48587210.0,214669800.0,18.017041,0.764763,4.468841
min,5000000.0,2033165.0,63.0,3.3,2000.0
25%,20000000.0,34560580.0,97.0,5.6,2004.0
50%,38000000.0,83615410.0,107.0,6.1,2008.0
75%,72000000.0,178262600.0,120.0,6.6,2012.0
max,380000000.0,2781506000.0,214.0,8.1,2015.0


# Предобработка

In [4]:
answers = {}  # создадим словарь для ответов

# изменение формата даты с mm/dd/yyyy на dd/mm/yyyy
data['release_date'] = pd.to_datetime(
    data['release_date']).dt.strftime('%d/%m/%Y')

# разбить жанры
data['genres'] = data['genres'].apply(lambda x: x.split('|'))
# разбить кинокомпании
data['production_companies'] = data['production_companies'].apply(
    lambda x: x.split('|'))
# разбить режисеров
data['director'] = data['director'].apply(lambda x: x.split('|'))
# разбить актеров
data['cast'] = data['cast'].apply(lambda x: x.split('|'))

# 1. У какого фильма из списка самый большой бюджет?

In [5]:
# +
answers['1'] = '723 Pirates of the Caribbean: On Stranger Tides (tt1298650)'

In [6]:
data[data.budget == data.budget.max()]

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year
723,tt1298650,380000000,1021683000,Pirates of the Caribbean: On Stranger Tides,"[Johnny Depp, PenÃ©lope Cruz, Geoffrey Rush, I...",[Rob Marshall],Live Forever Or Die Trying.,Captain Jack Sparrow crosses paths with a woma...,136,"[Adventure, Action, Fantasy]","[Walt Disney Pictures, Jerry Bruckheimer Films...",11/05/2011,6.3,2011


ВАРИАНТ 2

In [7]:
# для поиска только имени
data[data.budget == data.budget.max()].original_title

723    Pirates of the Caribbean: On Stranger Tides
Name: original_title, dtype: object

In [8]:
# с применением сортировки по столбцу 'budget'
data.sort_values(['budget'], ascending=False).head(1)

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year
723,tt1298650,380000000,1021683000,Pirates of the Caribbean: On Stranger Tides,"[Johnny Depp, PenÃ©lope Cruz, Geoffrey Rush, I...",[Rob Marshall],Live Forever Or Die Trying.,Captain Jack Sparrow crosses paths with a woma...,136,"[Adventure, Action, Fantasy]","[Walt Disney Pictures, Jerry Bruckheimer Films...",11/05/2011,6.3,2011


# 2. Какой из фильмов самый длительный (в минутах)?

In [9]:
answers['2'] = '1157 Gods and Generals (tt0279111)'  # +

In [10]:
data[data.runtime == data.runtime.max()]

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year
1157,tt0279111,56000000,12923936,Gods and Generals,"[Stephen Lang, Jeff Daniels, Robert Duvall, Ke...",[Ronald F. Maxwell],The nations heart was touched by...,The film centers mostly around the personal an...,214,"[Drama, History, War]","[Turner Pictures, Antietam Filmworks]",21/02/2003,5.8,2003


# 3. Какой из фильмов самый короткий (в минутах)?





In [11]:
answers['3'] = '768 Winnie the Pooh (tt1449283)'  # +

In [12]:
data[data.runtime == data.runtime.min()]

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year
768,tt1449283,30000000,14460000,Winnie the Pooh,"[Jim Cummings, Travis Oates, Jim Cummings, Bud...","[Stephen Anderson, Don Hall]",Oh Pooh.,"During an ordinary day in Hundred Acre Wood, W...",63,"[Animation, Family]","[Walt Disney Pictures, Walt Disney Animation S...",13/04/2011,6.8,2011


# 4. Какова средняя длительность фильмов?


In [13]:
answers['4'] = '110'  # +

In [14]:
# с использованием функции mean()
round(data.runtime.mean())

110

In [15]:
# с помощью метода describe получить статистические характеристики для фрейма, в данном случае значение mean для 'runtime'
data.describe()

Unnamed: 0,budget,revenue,runtime,vote_average,release_year
count,1889.0,1889.0,1889.0,1889.0,1889.0
mean,54310830.0,155365300.0,109.658549,6.140762,2007.860773
std,48587210.0,214669800.0,18.017041,0.764763,4.468841
min,5000000.0,2033165.0,63.0,3.3,2000.0
25%,20000000.0,34560580.0,97.0,5.6,2004.0
50%,38000000.0,83615410.0,107.0,6.1,2008.0
75%,72000000.0,178262600.0,120.0,6.6,2012.0
max,380000000.0,2781506000.0,214.0,8.1,2015.0


# 5. Каково медианное значение длительности фильмов? 

In [16]:
answers['5'] = '107'  # +

In [17]:
# с помощью функции mean()
data.runtime.median()

107.0

In [18]:
# с помощью метода describe получить статистические характеристики для фрейма, в данном случае значение 50% для 'runtime'
data.describe()

Unnamed: 0,budget,revenue,runtime,vote_average,release_year
count,1889.0,1889.0,1889.0,1889.0,1889.0
mean,54310830.0,155365300.0,109.658549,6.140762,2007.860773
std,48587210.0,214669800.0,18.017041,0.764763,4.468841
min,5000000.0,2033165.0,63.0,3.3,2000.0
25%,20000000.0,34560580.0,97.0,5.6,2004.0
50%,38000000.0,83615410.0,107.0,6.1,2008.0
75%,72000000.0,178262600.0,120.0,6.6,2012.0
max,380000000.0,2781506000.0,214.0,8.1,2015.0


# 6. Какой самый прибыльный фильм?
#### Внимание! Здесь и далее под «прибылью» или «убытками» понимается разность между сборами и бюджетом фильма. (прибыль = сборы - бюджет) в нашем датасете это будет (profit = revenue - budget) 

In [19]:
answers['6'] = '239 Avatar (tt0499549)'  # +

In [20]:
data['profit'] = data['revenue'] - data['budget']
data[data.profit == data.profit.max()]

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year,profit
239,tt0499549,237000000,2781505847,Avatar,"[Sam Worthington, Zoe Saldana, Sigourney Weave...",[James Cameron],Enter the World of Pandora.,"In the 22nd century, a paraplegic Marine is di...",162,"[Action, Adventure, Fantasy, Science Fiction]","[Ingenious Film Partners, Twentieth Century Fo...",10/12/2009,7.1,2009,2544505847


# 7. Какой фильм самый убыточный? 

In [21]:
answers['7'] = '1245 The Lone Ranger (tt1210819)'  # +

In [22]:
data['profit'] = data['revenue'] - data['budget']
data[data.profit == data.profit.min()]

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year,profit
1245,tt1210819,255000000,89289910,The Lone Ranger,"[Johnny Depp, Armie Hammer, William Fichtner, ...",[Gore Verbinski],Never Take Off the Mask,The Texas Rangers chase down a gang of outlaws...,149,"[Action, Adventure, Western]","[Walt Disney Pictures, Jerry Bruckheimer Films...",03/07/2013,6.0,2013,-165710090


# 8. У скольких фильмов из датасета объем сборов оказался выше бюджета?

In [23]:
answers['8'] = '1478'  # +

In [24]:
len(data[data.revenue > data.budget])

1478

# 9. Какой фильм оказался самым кассовым в 2008 году?

In [25]:
answers['9'] = '599 The Dark Knight (tt0468569)'  # +

In [26]:
# сортируем по значению 'revenue' все фильмы 2008 г.
data[data.release_year == 2008].sort_values(
    ['revenue'], ascending=False).head(1)

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year,profit
599,tt0468569,185000000,1001921825,The Dark Knight,"[Christian Bale, Michael Caine, Heath Ledger, ...",[Christopher Nolan],Why So Serious?,Batman raises the stakes in his war on crime. ...,152,"[Drama, Action, Crime, Thriller]","[DC Comics, Legendary Pictures, Warner Bros., ...",16/07/2008,8.1,2008,816921825


In [27]:
# среди записей удовлетворяющих условию: год выхода 2008, находим ту, в которой сборы максимальны
data[data.revenue == data[data.release_year == 2008].revenue.max()]

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year,profit
599,tt0468569,185000000,1001921825,The Dark Knight,"[Christian Bale, Michael Caine, Heath Ledger, ...",[Christopher Nolan],Why So Serious?,Batman raises the stakes in his war on crime. ...,152,"[Drama, Action, Crime, Thriller]","[DC Comics, Legendary Pictures, Warner Bros., ...",16/07/2008,8.1,2008,816921825


# 10. Самый убыточный фильм за период с 2012 по 2014 г. (включительно)?


In [28]:
answers['10'] = '1245 The Lone Ranger (tt1210819)'  # +

In [29]:
data['profit'] = data['revenue'] - data['budget']
data[(data.release_year >= 2012) & (data.release_year <= 2014)
     ].sort_values(['profit'], ascending=True).head(1)

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year,profit
1245,tt1210819,255000000,89289910,The Lone Ranger,"[Johnny Depp, Armie Hammer, William Fichtner, ...",[Gore Verbinski],Never Take Off the Mask,The Texas Rangers chase down a gang of outlaws...,149,"[Action, Adventure, Western]","[Walt Disney Pictures, Jerry Bruckheimer Films...",03/07/2013,6.0,2013,-165710090


In [30]:
data['profit'] = data['revenue'] - data['budget']
data[data.profit == data[(data.release_year >= 2012) & (
    data.release_year <= 2014)].profit.min()]

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year,profit
1245,tt1210819,255000000,89289910,The Lone Ranger,"[Johnny Depp, Armie Hammer, William Fichtner, ...",[Gore Verbinski],Never Take Off the Mask,The Texas Rangers chase down a gang of outlaws...,149,"[Action, Adventure, Western]","[Walt Disney Pictures, Jerry Bruckheimer Films...",03/07/2013,6.0,2013,-165710090


# 11. Какого жанра фильмов больше всего?

In [31]:
answers['11'] = 'Drama'  # +

In [32]:
# функция для подсчета жанров
genres_dict = {}
def genres_count(x):
    for i in x:
        if i in genres_dict:
            genres_dict[i] += 1
        else:
            genres_dict[i] = 1
    return genres_dict


data['genres'].apply(genres_count)
pd.Series(genres_dict).sort_values(ascending=False).index[0]    # head(1)

'Drama'

In [33]:
# разложить столбец 'genres', с использованием collections и метода most_common() найти наиболее часто встречающиеся элементы в порядке убывания
Counter(data.explode('genres')['genres']).most_common(1)

[('Drama', 782)]

In [34]:
# разбить жанры, из списка всех упоминаний подсчитать частоту встречи
pd.Series(data.explode('genres')['genres']).value_counts().index[0]   # head(1)

'Drama'

# 12. Фильмы какого жанра чаще всего становятся прибыльными? 

In [35]:
answers['12'] = 'Drama'  # +

In [36]:
# прибыльные фильмы
data_profit = data[data['profit'] > 0]

Counter(data_profit.explode('genres')['genres']).most_common()

# pd.Series(data_profit.explode('genres')['genres']).value_counts()#.index[0]

# pd.Series(data_profit['genres'].sum()).value_counts()

[('Drama', 560),
 ('Comedy', 551),
 ('Thriller', 446),
 ('Action', 444),
 ('Adventure', 337),
 ('Romance', 242),
 ('Crime', 231),
 ('Family', 226),
 ('Science Fiction', 195),
 ('Fantasy', 188),
 ('Horror', 150),
 ('Animation', 120),
 ('Mystery', 119),
 ('Music', 47),
 ('History', 46),
 ('War', 41),
 ('Western', 12),
 ('Documentary', 7)]

# 13. У какого режиссера самые большие суммарные кассовые сборы?

In [37]:
answers['13'] = 'Peter Jackson'  # +

In [38]:
all_director = data.explode('director')
# посчитать сумму revenue для каждого режисера
all_director.groupby(['director'])['revenue'].sum(
).sort_values(ascending=False).head(1)

director
Peter Jackson    6490593685
Name: revenue, dtype: int64

# 14. Какой режисер снял больше всего фильмов в стиле Action?

In [39]:
answers['14'] = 'Robert Rodriguez'  # +

In [40]:
# создать словарь, где ключем будет режисер, а содержанием - количество фильмов
mas = {}
for index, row in data.explode('director').iterrows():
    if not row.director in mas:
        mas[row.director] = 0
    if 'Action' in row.genres:
        mas[row.director] += 1

print(max(mas, key=mas.get))

Robert Rodriguez


# 15. Фильмы с каким актером принесли самые высокие кассовые сборы в 2012 году? 

In [41]:
answers['15'] = 'Chris Hemsworth'  # +

In [42]:
table = data.explode('cast')
# с использованием регулярных выражений находим фильмы за 2012
table = table[table.release_date.str.contains('^\d+/\d+/2012$', regex=True)]
# сортируем по кассовым сборам
display(table[table.revenue == table.revenue.max()])

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year,profit
970,tt0848228,220000000,1519557910,The Avengers,Robert Downey Jr.,[Joss Whedon],Some assembly required.,When an unexpected enemy emerges and threatens...,143,"[Science Fiction, Action, Adventure]",[Marvel Studios],25/04/2012,7.3,2012,1299557910
970,tt0848228,220000000,1519557910,The Avengers,Chris Evans,[Joss Whedon],Some assembly required.,When an unexpected enemy emerges and threatens...,143,"[Science Fiction, Action, Adventure]",[Marvel Studios],25/04/2012,7.3,2012,1299557910
970,tt0848228,220000000,1519557910,The Avengers,Mark Ruffalo,[Joss Whedon],Some assembly required.,When an unexpected enemy emerges and threatens...,143,"[Science Fiction, Action, Adventure]",[Marvel Studios],25/04/2012,7.3,2012,1299557910
970,tt0848228,220000000,1519557910,The Avengers,Chris Hemsworth,[Joss Whedon],Some assembly required.,When an unexpected enemy emerges and threatens...,143,"[Science Fiction, Action, Adventure]",[Marvel Studios],25/04/2012,7.3,2012,1299557910
970,tt0848228,220000000,1519557910,The Avengers,Scarlett Johansson,[Joss Whedon],Some assembly required.,When an unexpected enemy emerges and threatens...,143,"[Science Fiction, Action, Adventure]",[Marvel Studios],25/04/2012,7.3,2012,1299557910


# 16. Какой актер снялся в большем количестве высокобюджетных фильмов?

In [43]:
answers['16'] = 'Matt Damon'  # +

In [44]:
# фильмы, чей бюджет выше среднего
data_top = data[data.budget > data.budget.mean()]

# подсчет наиболее часто упоминаемого актера
pd.Series(data_top['cast'].sum()).value_counts().index[0]  # head(1)

'Matt Damon'

# 17. В фильмах какого жанра больше всего снимался Nicolas Cage? 

In [45]:
answers['17'] = 'Action'  # +

In [46]:
# "взорвать" столбец 'cast'
data_cast = data.explode('cast')
# оставить фильмы, где есть Nicolas Cage
data_cage = data_cast[data_cast.cast == 'Nicolas Cage']
# посчитать жанры
print(pd.Series(data_cage.genres.sum()).value_counts().index[0])

Action


# 18. Самый убыточный фильм от Paramount Pictures

In [47]:
answers['18'] = 'K-19: The Widowmaker'  # +

In [48]:
data_PC = data.explode('production_companies')
data_PP = data_PC[data_PC.production_companies == 'Paramount Pictures']
display(data_PP[data_PP.profit == data_PP.profit.min()])

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year,profit
925,tt0267626,100000000,35168966,K-19: The Widowmaker,"[Harrison Ford, Liam Neeson, Peter Sarsgaard, ...",[Kathryn Bigelow],Fate has found its hero.,When Russia's first nuclear submarine malfunct...,138,"[Thriller, Drama, History]",Paramount Pictures,19/07/2002,6.0,2002,-64831034


# 19. Какой год стал самым успешным по суммарным кассовым сборам?

In [49]:
answers['19'] = '2015'  # +

In [50]:
# с помощью перебора создать словарь, в котором ключом является год, значением - кассовый сбор
mas_years = {}
for index, row in data.iterrows():
    if not row.release_year in mas_years:
        mas_years[row.release_year] = 0
    mas_years[row.release_year] += row.revenue

print(max(mas_years, key=mas_years.get))

2015


# 20. Какой самый прибыльный год для студии Warner Bros?

In [51]:
answers['20'] = '2014'  # +

In [52]:
data_PC = data.explode('production_companies')
# оставить в фрейме фильмы, названия которых содержат 'Warner Bros'
data_WB = data_PC[data_PC.production_companies.str.contains('Warner Bros')]
# с помощью перебора создать словарь, в котором ключем будет год, а значением - прибыль
mas_years = {}
for index, row in data_WB.iterrows():
    if not row.release_year in mas_years:
        mas_years[row.release_year] = 0
    mas_years[row.release_year] += row.profit

print(max(mas_years, key=mas_years.get))

2014


# 21. В каком месяце за все годы суммарно вышло больше всего фильмов?

In [53]:
answers['21'] = '09'  # +

In [54]:
# словарь, в котором ключом является месяц, значением - колличество упоминаний этого месяца(колличество фильмов)
mas_month = {'01': 0, '02': 0, '03': 0, '04': 0, '05': 0,
             '06': 0, '07': 0, '08': 0, '09': 0, '10': 0, '11': 0, '12': 0}
for index, row in data.iterrows():
    mas_month[row.release_date.split('/')[1]] += 1

print(max(mas_month, key=mas_month.get))

09


# 22. Сколько суммарно вышло фильмов летом? (за июнь, июль, август)

In [55]:
answers['22'] = '450'  # +

In [56]:
mas_month = {'01': 0, '02': 0, '03': 0, '04': 0, '05': 0,
             '06': 0, '07': 0, '08': 0, '09': 0, '10': 0, '11': 0, '12': 0}
for index, row in data.iterrows():
    mas_month[row.release_date.split('/')[1]] += 1

print(mas_month['06'] + mas_month['07'] + mas_month['08'])

450


# 23. Для какого режиссера зима – самое продуктивное время года? 

In [57]:
answers['23'] = 'Peter Jackson'  # +

In [58]:
# создаем словарь, в котором ключом является имя режиссера, а значением - количество фильмов, снятых за зимние месяцы
data_D = data.explode('director')
mas_D = {}
for index, row in data_D.iterrows():
    if not row.director in mas_D:
        mas_D[row.director] = 0
    if row.release_date.split('/')[1] in ['12', '01', '02']:
        mas_D[row.director] += 1

print(max(mas_D, key=mas_D.get))

Peter Jackson


# 24. Какая студия дает самые длинные названия своим фильмам по количеству символов?

In [59]:
answers['24'] = 'Four By Two Productions'  # +

In [60]:
data_24 = data.explode('production_companies')
# подсчитываем количесвто символов в названии фильмов
data_24['name_len'] = data_24['original_title'].apply(lambda x: len(x))
# посчитать среднее значение количества символов всех фильмов для каждой компании, отсортировать по убыванию
data_24.groupby(by=['production_companies'])[['name_len']].mean(
).sort_values(by=['name_len'], ascending=False).head(1)

Unnamed: 0_level_0,name_len
production_companies,Unnamed: 1_level_1
Four By Two Productions,83.0


# 25. Описание фильмов какой студии в среднем самые длинные по количеству слов?

In [61]:
answers['25'] = 'Midnight Picture Show'  # +

In [62]:
data_PC = data.explode('production_companies')
# подсчитываем количесвто слов в описании
data_PC['description_words_len'] = data_PC['overview'].apply(
    lambda x: len(x.split(' ')))
# посчитать среднее значение количества слов всех фильмов для каждой компании, отсортировать по убыванию
data_PC.groupby(by=['production_companies'])[['description_words_len']].mean(
).sort_values(by=['description_words_len'], ascending=False).head(1)

Unnamed: 0_level_0,description_words_len
production_companies,Unnamed: 1_level_1
Midnight Picture Show,175.0


# 26. Какие фильмы входят в 1 процент лучших по рейтингу? 
по vote_average

In [63]:
answers['26'] = 'Inside Out, The Dark Knight, 12 Years a Slave'  # +

In [64]:
totalFilms = data.original_title.count() / 100
data.sort_values(by=['vote_average'], ascending=False).head(int(totalFilms))

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year,profit
599,tt0468569,185000000,1001921825,The Dark Knight,"[Christian Bale, Michael Caine, Heath Ledger, ...",[Christopher Nolan],Why So Serious?,Batman raises the stakes in his war on crime. ...,152,"[Drama, Action, Crime, Thriller]","[DC Comics, Legendary Pictures, Warner Bros., ...",16/07/2008,8.1,2008,816921825
118,tt0816692,165000000,621752480,Interstellar,"[Matthew McConaughey, Jessica Chastain, Anne H...",[Christopher Nolan],Mankind was born on Earth. It was never meant ...,Interstellar chronicles the adventures of a gr...,169,"[Adventure, Drama, Science Fiction]","[Paramount Pictures, Legendary Pictures, Warne...",05/11/2014,8.0,2014,456752480
125,tt2084970,14000000,233555708,The Imitation Game,"[Benedict Cumberbatch, Keira Knightley, Matthe...",[Morten Tyldum],The true enigma was the man who cracked the code.,Based on the real life story of legendary cryp...,113,"[History, Drama, Thriller, War]","[Black Bear Pictures, Bristol Automotive]",14/11/2014,8.0,2014,219555708
9,tt2096673,175000000,853708609,Inside Out,"[Amy Poehler, Phyllis Smith, Richard Kind, Bil...",[Pete Docter],Meet the little voices inside your head.,"Growing up can be a bumpy road, and it's no ex...",94,"[Comedy, Animation, Family]","[Walt Disney Pictures, Pixar Animation Studios...",09/06/2015,8.0,2015,678708609
34,tt3170832,6000000,35401758,Room,"[Brie Larson, Jacob Tremblay, Joan Allen, Sean...",[Lenny Abrahamson],Love knows no boundaries,Jack is a young boy of 5 years old who has liv...,117,"[Drama, Thriller]","[Element Pictures, No Trace Camping, A24, Dupe...",16/10/2015,8.0,2015,29401758
1183,tt0993846,100000000,392000694,The Wolf of Wall Street,"[Leonardo DiCaprio, Jonah Hill, Margot Robbie,...",[Martin Scorsese],EARN. SPEND. PARTY.,A New York stockbroker refuses to cooperate in...,180,"[Crime, Drama, Comedy]","[Paramount Pictures, Appian Way, EMJAG Product...",25/12/2013,7.9,2013,292000694
128,tt2267998,61000000,369330363,Gone Girl,"[Ben Affleck, Rosamund Pike, Carrie Coon, Neil...",[David Fincher],You don't know what you've got 'til it's...,With his wife's disappearance having become th...,145,"[Mystery, Thriller, Drama]","[Twentieth Century Fox Film Corporation, Regen...",01/10/2014,7.9,2014,308330363
1191,tt2024544,20000000,187000000,12 Years a Slave,"[Chiwetel Ejiofor, Michael Fassbender, Lupita ...",[Steve McQueen],The extraordinary true story of Solomon Northup,"In the pre-Civil War United States, Solomon No...",134,"[Drama, History]","[Plan B Entertainment, Regency Enterprises, Ri...",18/10/2013,7.9,2013,167000000
119,tt2015381,170000000,773312399,Guardians of the Galaxy,"[Chris Pratt, Zoe Saldana, Dave Bautista, Vin ...",[James Gunn],All heroes start somewhere.,"Light years from Earth, 26 years after being a...",121,"[Action, Science Fiction, Adventure]","[Marvel Studios, Moving Picture Company (MPC),...",30/07/2014,7.9,2014,603312399
1081,tt0167260,94000000,1118888979,The Lord of the Rings: The Return of the King,"[Elijah Wood, Ian McKellen, Viggo Mortensen, L...",[Peter Jackson],The eye of the enemy is moving.,Aragorn is revealed as the heir to the ancient...,201,"[Adventure, Fantasy, Action]","[WingNut Films, New Line Cinema]",01/12/2003,7.9,2003,1024888979


# 27. Какие актеры чаще всего снимаются в одном фильме вместе?


In [65]:
answers['27'] = 'Daniel Radcliffe|Rupert Grint'  # +

In [66]:
# с помощью функции и перебора создаем массив пар актеров, в котором ключом будут пары актеров, а значением - частота упоминания
mas_pairs = {}

def create_pairs(people):
    for i in range(len(people)):
        for j in range(i+1, len(people)):
            pair_key = people[i] + '|' + people[j]
            if not pair_key in mas_pairs:
                mas_pairs[pair_key] = 1
            else:
                mas_pairs[pair_key] += 1


for item, row in data.iterrows():
    create_pairs(row.cast)
# вывести ключ, у котрого значение максимально
print(max(mas_pairs, key=mas_pairs.get))

Daniel Radcliffe|Rupert Grint


# Submission

In [67]:
# в конце можно посмотреть свои ответы к каждому вопросу
answers

{'1': '723 Pirates of the Caribbean: On Stranger Tides (tt1298650)',
 '2': '1157 Gods and Generals (tt0279111)',
 '3': '768 Winnie the Pooh (tt1449283)',
 '4': '110',
 '5': '107',
 '6': '239 Avatar (tt0499549)',
 '7': '1245 The Lone Ranger (tt1210819)',
 '8': '1478',
 '9': '599 The Dark Knight (tt0468569)',
 '10': '1245 The Lone Ranger (tt1210819)',
 '11': 'Drama',
 '12': 'Drama',
 '13': 'Peter Jackson',
 '14': 'Robert Rodriguez',
 '15': 'Chris Hemsworth',
 '16': 'Matt Damon',
 '17': 'Action',
 '18': 'K-19: The Widowmaker',
 '19': '2015',
 '20': '2014',
 '21': '09',
 '22': '450',
 '23': 'Peter Jackson',
 '24': 'Four By Two Productions',
 '25': 'Midnight Picture Show',
 '26': 'Inside Out, The Dark Knight, 12 Years a Slave',
 '27': 'Daniel Radcliffe|Rupert Grint'}

In [68]:
# и убедиться что ни чего не пропустил)
len(answers)

27