In [50]:
import pandas as pd

In [51]:
df = pd.read_csv("movies.csv")
df.head(3)

Unnamed: 0,title,industry,release_year,imdb_rating,studio,budget,revenue,unit,currency,language
0,Pather Panchali,Bollywood,1955,8.3,Government of West Bengal,70000.0,100000.0,Thousands,INR,Bengali
1,Doctor Strange in the Multiverse of Madness,Hollywood,2022,7.0,Marvel Studios,200.0,954.8,Millions,USD,English
2,Thor: The Dark World,Hollywood,2013,6.8,Marvel Studios,165.0,644.8,Millions,USD,English


In [52]:
df.columns

Index(['title', 'industry', 'release_year', 'imdb_rating', 'studio', 'budget',
       'revenue', 'unit', 'currency', 'language'],
      dtype='object')

In [53]:
df.industry.unique()

array(['Bollywood', 'Hollywood'], dtype=object)

In [54]:
df['language'].unique()

array(['Bengali', 'English', 'Hindi', 'Kannada', 'Telugu'], dtype=object)

In [55]:
df.industry.value_counts()

industry
Hollywood    20
Bollywood    17
Name: count, dtype: int64

In [56]:
# how many movies are there for each of these languages?
language_counts = df.language.value_counts()

print(f'Movies in English: {language_counts.English}')
print(f'Movies in Hindi: {language_counts.Hindi}')
print(f'Movies in Telugu: {language_counts.Telugu}')
print(f'Movies in Bengali: {language_counts.Bengali}')
print(f'Movies in Kannada: {language_counts.Kannada}')

Movies in English: 20
Movies in Hindi: 12
Movies in Telugu: 3
Movies in Bengali: 1
Movies in Kannada: 1


In [57]:
# new data frame
new_df = df[['title', 'industry', 'imdb_rating', 'release_year']]
new_df.head(3)

Unnamed: 0,title,industry,imdb_rating,release_year
0,Pather Panchali,Bollywood,8.3,1955
1,Doctor Strange in the Multiverse of Madness,Hollywood,7.0,2022
2,Thor: The Dark World,Hollywood,6.8,2013


In [58]:
# only movies between 2000 and 2010
df[(df.release_year >= 2000) & (df.release_year <= 2010)]

Unnamed: 0,title,industry,release_year,imdb_rating,studio,budget,revenue,unit,currency,language
7,The Pursuit of Happyness,Hollywood,2006,8.0,Columbia Pictures,55.0,307.1,Millions,USD,English
8,Gladiator,Hollywood,2000,8.5,Universal Pictures,103.0,460.5,Millions,USD,English
11,Avatar,Hollywood,2009,7.8,20th Century Fox,237.0,2847.0,Millions,USD,English
13,The Dark Knight,Hollywood,2008,9.0,Syncopy,185.0,1006.0,Millions,USD,English
22,3 Idiots,Bollywood,2009,8.4,Vinod Chopra Films,550.0,4000.0,Millions,INR,Hindi
23,Kabhi Khushi Kabhie Gham,Bollywood,2001,7.4,Dharma Productions,390.0,1360.0,Millions,INR,Hindi
25,Taare Zameen Par,Bollywood,2007,8.3,,120.0,1350.0,Millions,INR,Hindi
26,Munna Bhai M.B.B.S.,Bollywood,2003,8.1,Vinod Chopra Productions,100.0,410.0,Millions,INR,Hindi


In [59]:
# movies only from Marvel Studios
df[df.studio == 'Marvel Studios']

Unnamed: 0,title,industry,release_year,imdb_rating,studio,budget,revenue,unit,currency,language
1,Doctor Strange in the Multiverse of Madness,Hollywood,2022,7.0,Marvel Studios,200.0,954.8,Millions,USD,English
2,Thor: The Dark World,Hollywood,2013,6.8,Marvel Studios,165.0,644.8,Millions,USD,English
3,Thor: Ragnarok,Hollywood,2017,7.9,Marvel Studios,180.0,854.0,Millions,USD,English
4,Thor: Love and Thunder,Hollywood,2022,6.8,Marvel Studios,250.0,670.0,Millions,USD,English
17,Avengers: Endgame,Hollywood,2019,8.4,Marvel Studios,400.0,2798.0,Millions,USD,English
18,Avengers: Infinity War,Hollywood,2018,8.4,Marvel Studios,400.0,2048.0,Millions,USD,English
19,Captain America: The First Avenger,Hollywood,2011,6.9,Marvel Studios,216.7,370.6,Millions,USD,English
20,Captain America: The Winter Soldier,Hollywood,2014,7.8,Marvel Studios,177.0,714.4,Millions,USD,English


In [60]:
# add a new column - using one column
# lambda x: a way of quickly writing python functions
df['age'] = df['release_year'].apply(lambda x: 2024 - x)
df.head()

Unnamed: 0,title,industry,release_year,imdb_rating,studio,budget,revenue,unit,currency,language,age
0,Pather Panchali,Bollywood,1955,8.3,Government of West Bengal,70000.0,100000.0,Thousands,INR,Bengali,69
1,Doctor Strange in the Multiverse of Madness,Hollywood,2022,7.0,Marvel Studios,200.0,954.8,Millions,USD,English,2
2,Thor: The Dark World,Hollywood,2013,6.8,Marvel Studios,165.0,644.8,Millions,USD,English,11
3,Thor: Ragnarok,Hollywood,2017,7.9,Marvel Studios,180.0,854.0,Millions,USD,English,7
4,Thor: Love and Thunder,Hollywood,2022,6.8,Marvel Studios,250.0,670.0,Millions,USD,English,2


In [61]:
# using two columns to create a new one
df['profit'] = df.apply(lambda x: x['revenue'] - x['budget'], axis=1) # axis=1 refers to column
df.head()

Unnamed: 0,title,industry,release_year,imdb_rating,studio,budget,revenue,unit,currency,language,age,profit
0,Pather Panchali,Bollywood,1955,8.3,Government of West Bengal,70000.0,100000.0,Thousands,INR,Bengali,69,30000.0
1,Doctor Strange in the Multiverse of Madness,Hollywood,2022,7.0,Marvel Studios,200.0,954.8,Millions,USD,English,2,754.8
2,Thor: The Dark World,Hollywood,2013,6.8,Marvel Studios,165.0,644.8,Millions,USD,English,11,479.8
3,Thor: Ragnarok,Hollywood,2017,7.9,Marvel Studios,180.0,854.0,Millions,USD,English,7,674.0
4,Thor: Love and Thunder,Hollywood,2022,6.8,Marvel Studios,250.0,670.0,Millions,USD,English,2,420.0


In [62]:
df.index

RangeIndex(start=0, stop=37, step=1)

In [63]:
df.set_index("title", inplace=True) # gonna change index to title

In [64]:
df.head(3)

Unnamed: 0_level_0,industry,release_year,imdb_rating,studio,budget,revenue,unit,currency,language,age,profit
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Pather Panchali,Bollywood,1955,8.3,Government of West Bengal,70000.0,100000.0,Thousands,INR,Bengali,69,30000.0
Doctor Strange in the Multiverse of Madness,Hollywood,2022,7.0,Marvel Studios,200.0,954.8,Millions,USD,English,2,754.8
Thor: The Dark World,Hollywood,2013,6.8,Marvel Studios,165.0,644.8,Millions,USD,English,11,479.8


In [67]:
# now, the index is gonna be the titles
df.index

Index(['Pather Panchali', 'Doctor Strange in the Multiverse of Madness',
       'Thor: The Dark World ', 'Thor: Ragnarok ', 'Thor: Love and Thunder ',
       'The Shawshank Redemption', 'Interstellar', 'The Pursuit of Happyness',
       'Gladiator', 'Titanic', 'It's a Wonderful Life', 'Avatar',
       'The Godfather', 'The Dark Knight', 'Schindler's List', 'Jurassic Park',
       'Parasite', 'Avengers: Endgame', 'Avengers: Infinity War',
       'Captain America: The First Avenger',
       'Captain America: The Winter Soldier', 'Dilwale Dulhania Le Jayenge',
       '3 Idiots', 'Kabhi Khushi Kabhie Gham', 'Bajirao Mastani ',
       'Taare Zameen Par', 'Munna Bhai M.B.B.S.', 'PK', 'Sanju',
       'The Kashmir Files', 'Bajrangi Bhaijaan', 'Race 3', 'Shershaah',
       'K.G.F: Chapter 2', 'Pushpa: The Rise - Part 1', 'RRR',
       'Baahubali: The Beginning'],
      dtype='object', name='title')

In [69]:
#loc = work w/ labels (name of rows and columns)
df.loc['Pather Panchali'] # all informations

industry                        Bollywood
release_year                         1955
imdb_rating                           8.3
studio          Government of West Bengal
budget                            70000.0
revenue                          100000.0
unit                            Thousands
currency                              INR
language                          Bengali
age                                    69
profit                            30000.0
Name: Pather Panchali, dtype: object

In [71]:
df.loc[['Pather Panchali', 'Doctor Strange in the Multiverse of Madness']]

Unnamed: 0_level_0,industry,release_year,imdb_rating,studio,budget,revenue,unit,currency,language,age,profit
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Pather Panchali,Bollywood,1955,8.3,Government of West Bengal,70000.0,100000.0,Thousands,INR,Bengali,69,30000.0
Doctor Strange in the Multiverse of Madness,Hollywood,2022,7.0,Marvel Studios,200.0,954.8,Millions,USD,English,2,754.8


In [72]:
# iloc: integer based location
df.iloc[0]

industry                        Bollywood
release_year                         1955
imdb_rating                           8.3
studio          Government of West Bengal
budget                            70000.0
revenue                          100000.0
unit                            Thousands
currency                              INR
language                          Bengali
age                                    69
profit                            30000.0
Name: Pather Panchali, dtype: object

In [75]:
df.reset_index(inplace=True)
df.head(3)

Unnamed: 0,index,title,industry,release_year,imdb_rating,studio,budget,revenue,unit,currency,language,age,profit
0,0,Pather Panchali,Bollywood,1955,8.3,Government of West Bengal,70000.0,100000.0,Thousands,INR,Bengali,69,30000.0
1,1,Doctor Strange in the Multiverse of Madness,Hollywood,2022,7.0,Marvel Studios,200.0,954.8,Millions,USD,English,2,754.8
2,2,Thor: The Dark World,Hollywood,2013,6.8,Marvel Studios,165.0,644.8,Millions,USD,English,11,479.8
