In [1]:
import pandas as pd

In [2]:
# Reading csv file
IMDB_movies = pd.read_csv('IMDB_movies_and_ratings.csv')
# Calling columns needed
IMDB_movies = IMDB_movies[['title','year','actors']]
IMDB_movies

Unnamed: 0,title,year,actors
0,Metropolis,1927,"Alfred Abel, Gustav Fröhlich, Rudolf Klein-Rog..."
1,City Lights,1931,"Virginia Cherrill, Florence Lee, Harry Myers, ..."
2,Modern Times,1936,"Charles Chaplin, Paulette Goddard, Henry Bergm..."
3,Snow White and the Seven Dwarfs,1937,"Roy Atwell, Stuart Buchanan, Adriana Caselotti..."
4,Gone with the Wind,1939,"Thomas Mitchell, Barbara O'Neil, Vivien Leigh,..."
...,...,...,...
4176,Child's Play,2019,"Tim Matheson, Ben Andrusco-Daon, Zahra Anderso..."
4177,Midsommar,2019,"Florence Pugh, Jack Reynor, Vilhelm Blomgren, ..."
4178,Unplanned,2019,"Ashley Bratcher, Brooks Ryan, Robia Scott, Jar..."
4179,Cold Blood Legacy,2019,"Jean Reno, Sarah Lind, Joe Anderson, David Gya..."


In [3]:
# Splitting actors column
IMDB_movies[['actor_1','actor_2', 'actor_3','rest']] = IMDB_movies['actors'].str.split(",", n=3, expand=True)
IMDB_movies

Unnamed: 0,title,year,actors,actor_1,actor_2,actor_3,rest
0,Metropolis,1927,"Alfred Abel, Gustav Fröhlich, Rudolf Klein-Rog...",Alfred Abel,Gustav Fröhlich,Rudolf Klein-Rogge,"Fritz Rasp, Theodor Loos, Erwin Biswanger, He..."
1,City Lights,1931,"Virginia Cherrill, Florence Lee, Harry Myers, ...",Virginia Cherrill,Florence Lee,Harry Myers,"Al Ernest Garcia, Hank Mann, Charles Chaplin"
2,Modern Times,1936,"Charles Chaplin, Paulette Goddard, Henry Bergm...",Charles Chaplin,Paulette Goddard,Henry Bergman,"Tiny Sandford, Chester Conklin, Hank Mann, St..."
3,Snow White and the Seven Dwarfs,1937,"Roy Atwell, Stuart Buchanan, Adriana Caselotti...",Roy Atwell,Stuart Buchanan,Adriana Caselotti,"Eddie Collins, Pinto Colvig, Marion Darlingto..."
4,Gone with the Wind,1939,"Thomas Mitchell, Barbara O'Neil, Vivien Leigh,...",Thomas Mitchell,Barbara O'Neil,Vivien Leigh,"Evelyn Keyes, Ann Rutherford, George Reeves, ..."
...,...,...,...,...,...,...,...
4176,Child's Play,2019,"Tim Matheson, Ben Andrusco-Daon, Zahra Anderso...",Tim Matheson,Ben Andrusco-Daon,Zahra Anderson,"Serge Jaswal, Ariana Nica, Phoenix Ly, Johnso..."
4177,Midsommar,2019,"Florence Pugh, Jack Reynor, Vilhelm Blomgren, ...",Florence Pugh,Jack Reynor,Vilhelm Blomgren,"William Jackson Harper, Will Poulter, Ellora ..."
4178,Unplanned,2019,"Ashley Bratcher, Brooks Ryan, Robia Scott, Jar...",Ashley Bratcher,Brooks Ryan,Robia Scott,"Jared Lotz, Emma Elle Roberts, Robin DeMarco,..."
4179,Cold Blood Legacy,2019,"Jean Reno, Sarah Lind, Joe Anderson, David Gya...",Jean Reno,Sarah Lind,Joe Anderson,"David Gyasi, Ihor Ciszkewycz, François Guétar..."


In [4]:
# Dropping unneeded columns
IMDB_actors = IMDB_movies.drop(columns = ['actors', 'rest'])
IMDB_actors

Unnamed: 0,title,year,actor_1,actor_2,actor_3
0,Metropolis,1927,Alfred Abel,Gustav Fröhlich,Rudolf Klein-Rogge
1,City Lights,1931,Virginia Cherrill,Florence Lee,Harry Myers
2,Modern Times,1936,Charles Chaplin,Paulette Goddard,Henry Bergman
3,Snow White and the Seven Dwarfs,1937,Roy Atwell,Stuart Buchanan,Adriana Caselotti
4,Gone with the Wind,1939,Thomas Mitchell,Barbara O'Neil,Vivien Leigh
...,...,...,...,...,...
4176,Child's Play,2019,Tim Matheson,Ben Andrusco-Daon,Zahra Anderson
4177,Midsommar,2019,Florence Pugh,Jack Reynor,Vilhelm Blomgren
4178,Unplanned,2019,Ashley Bratcher,Brooks Ryan,Robia Scott
4179,Cold Blood Legacy,2019,Jean Reno,Sarah Lind,Joe Anderson


In [5]:
# Finding columns with missing values
# is_NaN = IMDB_actors.isnull()
# row_has_NaN = is_NaN.any(axis=1)
# rows_with_NaN = IMDB_actors[row_has_NaN]
# rows_with_NaN

# Actor 1 Rankings

In [6]:
# Calling columns needed for actor_1
actor1_df = IMDB_actors[['title', 'year', 'actor_1']]
actor1_df

Unnamed: 0,title,year,actor_1
0,Metropolis,1927,Alfred Abel
1,City Lights,1931,Virginia Cherrill
2,Modern Times,1936,Charles Chaplin
3,Snow White and the Seven Dwarfs,1937,Roy Atwell
4,Gone with the Wind,1939,Thomas Mitchell
...,...,...,...
4176,Child's Play,2019,Tim Matheson
4177,Midsommar,2019,Florence Pugh
4178,Unplanned,2019,Ashley Bratcher
4179,Cold Blood Legacy,2019,Jean Reno


In [7]:
# Reading rankings csv
actor_rankings = pd.read_csv('actor_rankings.csv')
# Renaming columns for merge
actor_rankings = actor_rankings.rename(columns = {'Actor':'actor_1'})
actor_rankings

Unnamed: 0.1,Unnamed: 0,actor_1,Ranking
0,0,Robert Downey Jr.,A
1,1,Samuel L. Jackson,A
2,2,Scarlett Johansson,A
3,3,Chris Hemsworth,A
4,4,Chris Evans,A
...,...,...,...
595,595,Garrett Hedlund,C
596,596,Russell Brand,C
597,597,Jessica Biel,C
598,598,Edward Asner,C


In [8]:
# Merging first actor and rankings
actor_rank1=pd.merge(actor1_df, actor_rankings, on=['actor_1'], how='outer')
# Dropping unnamed:0 column
actor_rank1 = actor_rank1.drop(columns=['Unnamed: 0'])
actor_rank1

Unnamed: 0,title,year,actor_1,Ranking
0,Metropolis,1927.0,Alfred Abel,
1,City Lights,1931.0,Virginia Cherrill,
2,Modern Times,1936.0,Charles Chaplin,
3,Snow White and the Seven Dwarfs,1937.0,Roy Atwell,
4,Gone with the Wind,1939.0,Thomas Mitchell,
...,...,...,...,...
4360,,,Theo James,C
4361,,,Snoop Dogg,C
4362,,,Lea Seydoux,C
4363,,,Garrett Hedlund,C


In [9]:
# Filling missing rankings with D ranking
actor_rank1['Ranking'] = actor_rank1['Ranking'].fillna('D')
actor_rank1

Unnamed: 0,title,year,actor_1,Ranking
0,Metropolis,1927.0,Alfred Abel,D
1,City Lights,1931.0,Virginia Cherrill,D
2,Modern Times,1936.0,Charles Chaplin,D
3,Snow White and the Seven Dwarfs,1937.0,Roy Atwell,D
4,Gone with the Wind,1939.0,Thomas Mitchell,D
...,...,...,...,...
4360,,,Theo James,C
4361,,,Snoop Dogg,C
4362,,,Lea Seydoux,C
4363,,,Garrett Hedlund,C


In [10]:
# Dropping missing values
actor_rank1= actor_rank1.dropna()
actor_rank1

Unnamed: 0,title,year,actor_1,Ranking
0,Metropolis,1927.0,Alfred Abel,D
1,City Lights,1931.0,Virginia Cherrill,D
2,Modern Times,1936.0,Charles Chaplin,D
3,Snow White and the Seven Dwarfs,1937.0,Roy Atwell,D
4,Gone with the Wind,1939.0,Thomas Mitchell,D
...,...,...,...,...
4176,Climax,2018.0,Sofia Boutella,C
4177,Last Christmas,2019.0,Madison Ingoldsby,D
4178,Child's Play,2019.0,Tim Matheson,D
4179,Unplanned,2019.0,Ashley Bratcher,D


# Actor 2 Rankings

In [11]:
# Calling columns needed for actor_2
IMDB_actor2 = IMDB_actors[['title', 'year','actor_2']]
IMDB_actor2

Unnamed: 0,title,year,actor_2
0,Metropolis,1927,Gustav Fröhlich
1,City Lights,1931,Florence Lee
2,Modern Times,1936,Paulette Goddard
3,Snow White and the Seven Dwarfs,1937,Stuart Buchanan
4,Gone with the Wind,1939,Barbara O'Neil
...,...,...,...
4176,Child's Play,2019,Ben Andrusco-Daon
4177,Midsommar,2019,Jack Reynor
4178,Unplanned,2019,Brooks Ryan
4179,Cold Blood Legacy,2019,Sarah Lind


In [12]:
# Reading ranking csv file
actor_rankings2 = pd.read_csv('actor_rankings.csv')
# Renaming actor column for merge
actor_rankings2 = actor_rankings2.rename(columns = {'Actor':'actor_2'})
actor_rankings2

Unnamed: 0.1,Unnamed: 0,actor_2,Ranking
0,0,Robert Downey Jr.,A
1,1,Samuel L. Jackson,A
2,2,Scarlett Johansson,A
3,3,Chris Hemsworth,A
4,4,Chris Evans,A
...,...,...,...
595,595,Garrett Hedlund,C
596,596,Russell Brand,C
597,597,Jessica Biel,C
598,598,Edward Asner,C


In [13]:
# Removing space(first spot) in actor column in each row
IMDB_actor2['actor_2'] = IMDB_actor2['actor_2'].str[1:]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [14]:
# Merging second actor and ranking 
actor_rank2=pd.merge(IMDB_actor2, actor_rankings2, on=['actor_2'], how='outer')
# Dropping unnamed:0 column
actor_rank2 = actor_rank2.drop(columns = ['Unnamed: 0'])
actor_rank2

Unnamed: 0,title,year,actor_2,Ranking
0,Metropolis,1927.0,Gustav Fröhlich,
1,City Lights,1931.0,Florence Lee,
2,Modern Times,1936.0,Paulette Goddard,
3,Snow White and the Seven Dwarfs,1937.0,Stuart Buchanan,
4,Gone with the Wind,1939.0,Barbara O'Neil,
...,...,...,...,...
4337,,,Gina Rodriguez,C
4338,,,Karen Fukuhara,C
4339,,,Lea Seydoux,C
4340,,,Edward Asner,C


In [15]:
# Filling missing rankings with D ranking
actor_rank2['Ranking'] = actor_rank2['Ranking'].fillna('D')
actor_rank2

Unnamed: 0,title,year,actor_2,Ranking
0,Metropolis,1927.0,Gustav Fröhlich,D
1,City Lights,1931.0,Florence Lee,D
2,Modern Times,1936.0,Paulette Goddard,D
3,Snow White and the Seven Dwarfs,1937.0,Stuart Buchanan,D
4,Gone with the Wind,1939.0,Barbara O'Neil,D
...,...,...,...,...
4337,,,Gina Rodriguez,C
4338,,,Karen Fukuhara,C
4339,,,Lea Seydoux,C
4340,,,Edward Asner,C


In [16]:
# Dropping missing values
actor_rank2 = actor_rank2.dropna()
actor_rank2

Unnamed: 0,title,year,actor_2,Ranking
0,Metropolis,1927.0,Gustav Fröhlich,D
1,City Lights,1931.0,Florence Lee,D
2,Modern Times,1936.0,Paulette Goddard,D
3,Snow White and the Seven Dwarfs,1937.0,Stuart Buchanan,D
4,Gone with the Wind,1939.0,Barbara O'Neil,D
...,...,...,...,...
4176,Gongjak,2018.0,Sung-min Lee,D
4177,Climax,2018.0,Romain Guillermic,D
4178,Child's Play,2019.0,Ben Andrusco-Daon,D
4179,Unplanned,2019.0,Brooks Ryan,D


# Actor 3 Rankings

In [17]:
# Calling columns needed for actor_3
IMDB_actor3 = IMDB_actors[['title', 'year', 'actor_3']]
IMDB_actor3

Unnamed: 0,title,year,actor_3
0,Metropolis,1927,Rudolf Klein-Rogge
1,City Lights,1931,Harry Myers
2,Modern Times,1936,Henry Bergman
3,Snow White and the Seven Dwarfs,1937,Adriana Caselotti
4,Gone with the Wind,1939,Vivien Leigh
...,...,...,...
4176,Child's Play,2019,Zahra Anderson
4177,Midsommar,2019,Vilhelm Blomgren
4178,Unplanned,2019,Robia Scott
4179,Cold Blood Legacy,2019,Joe Anderson


In [18]:
# Reading rankings csv file
actor_rankings3 = pd.read_csv('actor_rankings.csv')
# Renaming actor column for merge
actor_rankings3 = actor_rankings3.rename(columns = {'Actor':'actor_3'})
actor_rankings3

Unnamed: 0.1,Unnamed: 0,actor_3,Ranking
0,0,Robert Downey Jr.,A
1,1,Samuel L. Jackson,A
2,2,Scarlett Johansson,A
3,3,Chris Hemsworth,A
4,4,Chris Evans,A
...,...,...,...
595,595,Garrett Hedlund,C
596,596,Russell Brand,C
597,597,Jessica Biel,C
598,598,Edward Asner,C


In [19]:
# Removing space(first spot) in actor column in each row
IMDB_actor3['actor_3'] = IMDB_actor3['actor_3'].str[1:]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [20]:
# Merging third actor and ranking 
actor_rank3=pd.merge(IMDB_actor3, actor_rankings3, on=['actor_3'], how='outer')
# Dropping unnamed:0 column
actor_rank3 = actor_rank3.drop(columns = ['Unnamed: 0'])
actor_rank3

Unnamed: 0,title,year,actor_3,Ranking
0,Metropolis,1927.0,Rudolf Klein-Rogge,
1,City Lights,1931.0,Harry Myers,
2,Modern Times,1936.0,Henry Bergman,
3,Snow White and the Seven Dwarfs,1937.0,Adriana Caselotti,
4,Gone with the Wind,1939.0,Vivien Leigh,
...,...,...,...,...
4383,,,Karen Fukuhara,C
4384,,,Theo James,C
4385,,,Sean Penn,C
4386,,,Lea Seydoux,C


In [21]:
# Filling missing rankings with D ranking
actor_rank3['Ranking'] = actor_rank3['Ranking'].fillna('D')
actor_rank3

Unnamed: 0,title,year,actor_3,Ranking
0,Metropolis,1927.0,Rudolf Klein-Rogge,D
1,City Lights,1931.0,Harry Myers,D
2,Modern Times,1936.0,Henry Bergman,D
3,Snow White and the Seven Dwarfs,1937.0,Adriana Caselotti,D
4,Gone with the Wind,1939.0,Vivien Leigh,D
...,...,...,...,...
4383,,,Karen Fukuhara,C
4384,,,Theo James,C
4385,,,Sean Penn,C
4386,,,Lea Seydoux,C


In [22]:
# Dropping missing values
actor_rank3 = actor_rank3.dropna()
actor_rank3

Unnamed: 0,title,year,actor_3,Ranking
0,Metropolis,1927.0,Rudolf Klein-Rogge,D
1,City Lights,1931.0,Harry Myers,D
2,Modern Times,1936.0,Henry Bergman,D
3,Snow White and the Seven Dwarfs,1937.0,Adriana Caselotti,D
4,Gone with the Wind,1939.0,Vivien Leigh,D
...,...,...,...,...
4176,A Dog's Journey,2019.0,Kathryn Prescott,D
4177,Last Christmas,2019.0,Boris Isakovic,D
4178,Child's Play,2019.0,Zahra Anderson,D
4179,Midsommar,2019.0,Vilhelm Blomgren,D


In [23]:
# Merging actor 1 and actor 2 rankings
merge_rank1 = pd.merge(actor_rank1, actor_rank2, on=['title','year'])
merge_rank1

Unnamed: 0,title,year,actor_1,Ranking_x,actor_2,Ranking_y
0,Metropolis,1927.0,Alfred Abel,D,Gustav Fröhlich,D
1,City Lights,1931.0,Virginia Cherrill,D,Florence Lee,D
2,Modern Times,1936.0,Charles Chaplin,D,Paulette Goddard,D
3,Snow White and the Seven Dwarfs,1937.0,Roy Atwell,D,Stuart Buchanan,D
4,Gone with the Wind,1939.0,Thomas Mitchell,D,Barbara O'Neil,D
...,...,...,...,...,...,...
4175,Climax,2018.0,Sofia Boutella,C,Romain Guillermic,D
4176,Last Christmas,2019.0,Madison Ingoldsby,D,Emma Thompson,A
4177,Child's Play,2019.0,Tim Matheson,D,Ben Andrusco-Daon,D
4178,Unplanned,2019.0,Ashley Bratcher,D,Brooks Ryan,D


In [24]:
# Merging the first merge with actor 3 rankings
merge_rank2 = pd.merge(merge_rank1, actor_rank3, on = ['title', 'year'])
merge_rank2

Unnamed: 0,title,year,actor_1,Ranking_x,actor_2,Ranking_y,actor_3,Ranking
0,Metropolis,1927.0,Alfred Abel,D,Gustav Fröhlich,D,Rudolf Klein-Rogge,D
1,City Lights,1931.0,Virginia Cherrill,D,Florence Lee,D,Harry Myers,D
2,Modern Times,1936.0,Charles Chaplin,D,Paulette Goddard,D,Henry Bergman,D
3,Snow White and the Seven Dwarfs,1937.0,Roy Atwell,D,Stuart Buchanan,D,Adriana Caselotti,D
4,Gone with the Wind,1939.0,Thomas Mitchell,D,Barbara O'Neil,D,Vivien Leigh,D
...,...,...,...,...,...,...,...,...
4174,Climax,2018.0,Sofia Boutella,C,Romain Guillermic,D,Souheila Yacoub,D
4175,Last Christmas,2019.0,Madison Ingoldsby,D,Emma Thompson,A,Boris Isakovic,D
4176,Child's Play,2019.0,Tim Matheson,D,Ben Andrusco-Daon,D,Zahra Anderson,D
4177,Unplanned,2019.0,Ashley Bratcher,D,Brooks Ryan,D,Robia Scott,D


In [25]:
# Reading csv with the rest of IMDB data
df = pd.read_csv('formatted_df.csv')
df = df.drop(columns = ['Unnamed: 0'])
df.head()

Unnamed: 0,title,year,country,language,duration,genre,director,production_company,description,budget,...,negative_votes,percentage_of_positive_reviews,overall_eval,genre_1,genre_2,genre_3,country_1,country_2,country_3,budget_USD
0,Metropolis,1927,Germany,German,153,"Drama, Sci-Fi",Fritz Lang,Universum Film (UFA),In a futuristic city sharply divided between t...,DEM 6000000,...,5775,0.86,1,Drama,Sci-Fi,,Germany,,,10440.0
1,City Lights,1931,USA,English,87,"Comedy, Drama, Romance",Charles Chaplin,Charles Chaplin Productions,"With the aid of a wealthy erratic tippler, a d...",$ 1500000,...,4453,0.9,1,Comedy,Drama,Romance,USA,,,1500000.0
2,Modern Times,1936,USA,English,87,"Comedy, Drama, Family",Charles Chaplin,Charles Chaplin Productions,The Tramp struggles to live in modern industri...,$ 1500000,...,4951,0.91,1,Comedy,Drama,Family,USA,,,1500000.0
3,Snow White and the Seven Dwarfs,1937,USA,English,83,"Animation, Family, Fantasy","William Cottrell, David Hand",Walt Disney Productions,Exiled into the dangerous forest by her wicked...,$ 1499000,...,3000,0.81,1,Animation,Family,Fantasy,USA,,,1499000.0
4,Gone with the Wind,1939,USA,English,238,"Drama, History, Romance","Victor Fleming, George Cukor",Selznick International Pictures,A manipulative woman and a roguish man conduct...,$ 3977000,...,9580,0.85,1,Drama,History,Romance,USA,,,3977000.0


In [26]:
# Merging imdb data with ranking data
final_df = pd.merge(df, merge_rank2, on = ['title', 'year'])
# Renaming each rankings column
final_df = final_df.rename(columns = {'Ranking_x':'listing_1', 'Ranking_y':'listing_2', 'Ranking':'listing_3'})
# final_df = final_df.fillna('None')
# final_df = final_df.drop(columns=['Unnamed: 0.1'])
final_df

Unnamed: 0,title,year,country,language,duration,genre,director,production_company,description,budget,...,country_1,country_2,country_3,budget_USD,actor_1,listing_1,actor_2,listing_2,actor_3,listing_3
0,Metropolis,1927,Germany,German,153,"Drama, Sci-Fi",Fritz Lang,Universum Film (UFA),In a futuristic city sharply divided between t...,DEM 6000000,...,Germany,,,10440.0,Alfred Abel,D,Gustav Fröhlich,D,Rudolf Klein-Rogge,D
1,City Lights,1931,USA,English,87,"Comedy, Drama, Romance",Charles Chaplin,Charles Chaplin Productions,"With the aid of a wealthy erratic tippler, a d...",$ 1500000,...,USA,,,1500000.0,Virginia Cherrill,D,Florence Lee,D,Harry Myers,D
2,Modern Times,1936,USA,English,87,"Comedy, Drama, Family",Charles Chaplin,Charles Chaplin Productions,The Tramp struggles to live in modern industri...,$ 1500000,...,USA,,,1500000.0,Charles Chaplin,D,Paulette Goddard,D,Henry Bergman,D
3,Snow White and the Seven Dwarfs,1937,USA,English,83,"Animation, Family, Fantasy","William Cottrell, David Hand",Walt Disney Productions,Exiled into the dangerous forest by her wicked...,$ 1499000,...,USA,,,1499000.0,Roy Atwell,D,Stuart Buchanan,D,Adriana Caselotti,D
4,Gone with the Wind,1939,USA,English,238,"Drama, History, Romance","Victor Fleming, George Cukor",Selznick International Pictures,A manipulative woman and a roguish man conduct...,$ 3977000,...,USA,,,3977000.0,Thomas Mitchell,D,Barbara O'Neil,D,Vivien Leigh,D
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4174,Child's Play,2019,"France, Canada, USA","English, Vietnamese",90,Horror,Lars Klevberg,Orion Pictures,A mother gives her 13-year-old son a toy doll ...,$ 10000000,...,France,Canada,USA,10000000.0,Tim Matheson,D,Ben Andrusco-Daon,D,Zahra Anderson,D
4175,Midsommar,2019,"USA, Sweden, Hungary","English, Swedish",147,"Drama, Horror, Mystery",Ari Aster,Proton Cinema,A couple travels to Sweden to visit a rural ho...,$ 10000000,...,USA,Sweden,Hungary,10000000.0,Florence Pugh,D,Jack Reynor,D,Vilhelm Blomgren,D
4176,Unplanned,2019,USA,English,109,"Biography, Drama","Chuck Konzelman, Cary Solomon",Unplanned Movie,All Abby Johnson ever wanted to do was help wo...,$ 6000000,...,USA,,,6000000.0,Ashley Bratcher,D,Brooks Ryan,D,Robia Scott,D
4177,Cold Blood Legacy,2019,"France, Ukraine, Belgium","French, English",91,"Action, Thriller",Frédéric Petitjean,Ascot Elite Entertainment Group,A hit-man lives isolated in a cabin at the edg...,$ 2700000,...,France,Ukraine,Belgium,2700000.0,Jean Reno,D,Sarah Lind,D,Joe Anderson,D


In [27]:
final_df = final_df[final_df['year'] >= 1960]  
final_df = final_df.sort_values(by=['year'])
final_df

Unnamed: 0,title,year,country,language,duration,genre,director,production_company,description,budget,...,country_1,country_2,country_3,budget_USD,actor_1,listing_1,actor_2,listing_2,actor_3,listing_3
33,Psycho,1960,USA,English,109,"Horror, Mystery, Thriller",Alfred Hitchcock,Shamley Productions,A Phoenix secretary embezzles forty thousand d...,$ 806947,...,USA,,,806947.0,Anthony Perkins,D,Vera Miles,D,John Gavin,D
34,Spartacus,1960,USA,English,197,"Adventure, Biography, Drama",Stanley Kubrick,Bryna Productions,The slave Spartacus leads a violent revolt aga...,$ 12000000,...,USA,,,12000000.0,Kirk Douglas,D,Laurence Olivier,D,Jean Simmons,D
35,Swiss Family Robinson,1960,USA,English,126,"Adventure, Family",Ken Annakin,Walt Disney Productions,A Swiss family must survive being shipwrecked ...,$ 5000000,...,USA,,,5000000.0,John Mills,D,Dorothy McGuire,D,James MacArthur,D
36,One Hundred and One Dalmatians,1961,USA,English,79,"Animation, Adventure, Comedy","Clyde Geronimi, Hamilton Luske",Walt Disney Productions,When a litter of Dalmatian puppies are abducte...,$ 4000000,...,USA,,,4000000.0,Rod Taylor,D,J. Pat O'Malley,D,Betty Lou Gerson,D
37,Une femme est une femme,1961,"France, Italy",French,85,"Comedy, Drama, Romance",Jean-Luc Godard,Euro International Film (EIA),A French striptease artist is desperate to bec...,$ 160000,...,France,Italy,,160000.0,Jean-Claude Brialy,D,Anna Karina,D,Jean-Paul Belmondo,D
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4051,Captive State,2019,USA,English,109,"Drama, Sci-Fi, Thriller",Rupert Wyatt,DreamWorks,Set in a Chicago neighborhood nearly a decade ...,$ 25000000,...,USA,,,25000000.0,John Goodman,A,Ashton Sanders,D,Jonathan Majors,D
4057,The Angry Birds Movie 2,2019,"Finland, USA",English,97,"Animation, Adventure, Comedy","Thurop Van Orman, John Rice",Sony Pictures Animation,The flightless birds and scheming green pigs t...,$ 65000000,...,Finland,USA,,65000000.0,Jason Sudeikis,B,Josh Gad,A,Leslie Jones,D
4058,The Lion King,2019,USA,English,118,"Animation, Adventure, Drama",Jon Favreau,Fairview Entertainment,"After the murder of his father, a young lion p...",$ 260000000,...,USA,,,260000000.0,Chiwetel Ejiofor,A,John Oliver,B,James Earl Jones,B
4040,The Kitchen,2019,USA,English,102,"Action, Crime, Drama",Andrea Berloff,BRON Studios,The wives of New York gangsters in Hell's Kitc...,$ 38000000,...,USA,,,38000000.0,Melissa McCarthy,B,Tiffany Haddish,C,Elisabeth Moss,D


In [28]:
# final_df['worlwide_gross_income']=final_df['worlwide_gross_income'].str.replace("$",'').astype(float)
# final_df

In [29]:
# Export dataframe to csv
final_df.to_csv('final_df.csv')

In [30]:
# final_df2 = final_df[['title','country_2']]
# final_df2.dropna()

In [31]:
# WHY ARE ROWS DUPLICATING
# pd.set_option('display.max_rows', 10)
# merge_rank2['title'].value_counts()

In [32]:
# df_actor1 = pd.read_csv('actor_rank1.csv')
# df_actor2 = pd.read_csv('actor_rank2.csv')
# # df_actor1

In [33]:
# merge_rank1 = pd.merge(df_actor1, df_actor2, on=['title'], how='inner')
# merge_rank1

In [34]:
# test_rank = pd.read_csv('actor_rankings.csv')

In [35]:
# test = pd.read_csv('./untitled folder/IMDB_actor2.csv')
# test

In [36]:
# test['Actor'] = test['Actor'].str[1:]
# test

In [37]:
# testing=pd.merge(test, test_rank, on=['Actor'], how='outer')
# testing

In [38]:
# testing = testing.drop(columns={'Unnamed: 0_x','Unnamed: 0_y'})
# testing

In [39]:
# testing['Ranking'] = testing['Ranking'].fillna('D')
# testing

In [40]:
# testing = testing.dropna()
# testing

In [41]:
# testing.to_csv('test.csv')