# Pandas

In [1]:
import numpy as np
import pandas as pd
import os

![image.png](attachment:image.png)

>\[*pandas*\] is derived from the term "**pan**el **da**ta", an econometrics term for data sets that include observations over multiple time periods for the same individuals. — [Wikipedia](https://en.wikipedia.org/wiki/Pandas_%28software%29)

- pandas is package / library in Python and the most important tool for data analysts / data scientists
- Powerful ML and visualization tools work on the back of pandas
    - **pandas is the backbone of most data projects**

### Core components of pandas: Series & DataFrame

- A series is essentially a `column`, and a DataFrame is a multi-dimensional table made up of collection of series

![image.png](attachment:image.png)

## Create Series

#### From ndarray

In [3]:
arr = np.random.randn(5)
print(arr)

[-2.15983289 -0.62710126 -0.32549202  2.0598737   0.08156173]


`pd.Series()` : `Do not miss the capital 'S'`

In [5]:
pd_series = pd.Series(arr) #convert an array into Pandas series

In [6]:
pd_series

0   -2.159833
1   -0.627101
2   -0.325492
3    2.059874
4    0.081562
dtype: float64

In [7]:
print(type(pd_series))

<class 'pandas.core.series.Series'>


#### Set the own index

In [8]:
pd_series = pd.Series(arr,index=['a', 'b', 'c','d','e'] ) #convert an array into Pandas series
pd_series


a   -2.159833
b   -0.627101
c   -0.325492
d    2.059874
e    0.081562
dtype: float64

In [9]:
pd_series = pd.Series(arr,index=['a', 'b', 'c','d','e'] , name="My_First_Random_Pandas_Series") #convert an array into Pandas series
pd_series

a   -2.159833
b   -0.627101
c   -0.325492
d    2.059874
e    0.081562
Name: My_First_Random_Pandas_Series, dtype: float64

In [11]:
pd_series = pd.Series(arr,index=['a', 'b', 'c','d','e'] , name="My_First_Random_Pandas_Series") #convert an array into Pandas series
pd_series

a   -2.159833
b   -0.627101
c   -0.325492
d    2.059874
e    0.081562
Name: My_First_Random_Pandas_Series, dtype: float64

In [16]:
arr1 = np.array(list(range(10,20,2)))
print(arr1)

[10 12 14 16 18]


In [18]:
pd_series_1 = pd.Series(arr1, dtype='float64')
print(pd_series_1)

0    10.0
1    12.0
2    14.0
3    16.0
4    18.0
dtype: float64


In [2]:
madict={'madhu':27,'arun':28,'madhu':28}

In [3]:
madict

{'madhu': 28, 'arun': 28}

In [7]:
madict('madhu')

TypeError: 'dict' object is not callable

In [21]:
score={'madhu':98,'akash':75,'ramesh':72,'stalin':75,'abdul':87}

In [22]:
import pandas as pd

In [23]:
score

{'madhu': 98, 'akash': 75, 'ramesh': 72, 'stalin': 75, 'abdul': 87}

In [30]:
pd_score=pd.Series(score)

In [31]:
pd_score

madhu     98
akash     75
ramesh    72
stalin    75
abdul     87
dtype: int64

In [32]:
max(pd_score)

98

In [45]:
creteria=(pd_score>70) & (pd_score<80)

In [46]:
creteria


madhu     False
akash      True
ramesh     True
stalin     True
abdul     False
dtype: bool

In [47]:
pd_score["madhu"]

98

In [54]:
print(pd_score.max())
print(pd_score.min())
print(pd_score.mean())
print(pd_score.median())
print(pd_score.var())
print(pd_score.std())
print(pd_score.mode())

98
72
81.4
75.0
119.30000000000003
10.92245393673052
0    75
dtype: int64


In [50]:
pd_score.max()

98

In [56]:
pd_score.index

Index(['madhu', 'akash', 'ramesh', 'stalin', 'abdul'], dtype='object')

In [57]:
pd_score.shape

(5,)

In [58]:
pd_score.dtype

dtype('int64')

In [59]:
pd_score.size

5

In [60]:
pd_score.empty

False

In [61]:
pd_score.hasnans

False

In [62]:
pd_score.nbytes

40

In [63]:
pd_score.ndim

1

In [64]:
score1={'madhu':98,'akash':75,'ramesh':72,'stalin':75,'abdul':" "}

In [65]:
pd1=pd.Series(score1)

In [66]:
pd1

madhu     98
akash     75
ramesh    72
stalin    75
abdul       
dtype: object

In [69]:
pd1.empty

False

In [70]:
pd1.hasnans

False

In [72]:
pd1.isna()

madhu     False
akash     False
ramesh    False
stalin    False
abdul     False
dtype: bool

In [73]:
pd1.isnull()

madhu     False
akash     False
ramesh    False
stalin    False
abdul     False
dtype: bool

In [5]:
import os
import pandas as pd

In [6]:
os.getcwd()

'C:\\Users\\Madhu Sudhan\\Downloads'

In [7]:
os.listdir()

['.ipynb_checkpoints',
 '.opera',
 '02-05_Numpy HandsOn.ipynb',
 '5th-AprilPython-basics (1).ipynb',
 '5th-AprilPython-basics (1).zip',
 '5th-AprilPython-basics.ipynb',
 '5th-AprilPython-basics.zip',
 '6 Healthy Habits That Make You Mentally Strong.mp4',
 '6th April(Python basics).ipynb',
 '7th April(Python basics).ipynb',
 '7th-AprilPython-basics.zip',
 'assignment 4.zip',
 'Assignment-SQL-Queries-1 (1).pdf',
 'Assignment-SQL-Queries-1.pdf',
 'assinment 3.rar',
 'bike-sharing-demand.csv',
 'BraveBrowserSetup-BRV010.exe',
 'Create-use-drop-database.pdf',
 'Data-Set-2.zip',
 'Dec17-Handson (1).zip',
 'Dec17-Handson.zip',
 'Demo1.zip',
 'Demo2.zip',
 'desktop.ini',
 'DiscordSetup.exe',
 'download.jpg',
 'FIT.jpg',
 'for yt.jpg',
 'Funny Baby Videos eating  # Short.mp4',
 'hq720.webp',
 'IIF-function-in-sql.pdf',
 'IMDB-Movie-Data.csv',
 'Inheritance diagrams - Copy.pdf',
 'intellipaat-certificate.pdf',
 'Intro_to_Numpy.pdf',
 'Invoice -ISSPL21-22041224.pdf',
 'Invoice-B2C2223IN18617.pdf'

In [8]:
df_imdb=pd.read_csv("IMDB-Movie-Data.csv")
df_imdb

Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
0,1,Guardians of the Galaxy,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074,333.13,76.0
1,2,Prometheus,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820,126.46,65.0
2,3,Split,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,7.3,157606,138.12,62.0
3,4,Sing,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,7.2,60545,270.32,59.0
4,5,Suicide Squad,"Action,Adventure,Fantasy",A secret government agency recruits some of th...,David Ayer,"Will Smith, Jared Leto, Margot Robbie, Viola D...",2016,123,6.2,393727,325.02,40.0
...,...,...,...,...,...,...,...,...,...,...,...,...
995,996,Secret in Their Eyes,"Crime,Drama,Mystery","A tight-knit team of rising investigators, alo...",Billy Ray,"Chiwetel Ejiofor, Nicole Kidman, Julia Roberts...",2015,111,6.2,27585,,45.0
996,997,Hostel: Part II,Horror,Three American college students studying abroa...,Eli Roth,"Lauren German, Heather Matarazzo, Bijou Philli...",2007,94,5.5,73152,17.54,46.0
997,998,Step Up 2: The Streets,"Drama,Music,Romance",Romantic sparks occur between two dance studen...,Jon M. Chu,"Robert Hoffman, Briana Evigan, Cassie Ventura,...",2008,98,6.2,70699,58.01,50.0
998,999,Search Party,"Adventure,Comedy",A pair of friends embark on a mission to reuni...,Scot Armstrong,"Adam Pally, T.J. Miller, Thomas Middleditch,Sh...",2014,93,5.6,4881,,22.0


In [85]:
                df_imdb.head()

Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
0,1,Guardians of the Galaxy,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074,333.13,76.0
1,2,Prometheus,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820,126.46,65.0
2,3,Split,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,7.3,157606,138.12,62.0
3,4,Sing,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,7.2,60545,270.32,59.0
4,5,Suicide Squad,"Action,Adventure,Fantasy",A secret government agency recruits some of th...,David Ayer,"Will Smith, Jared Leto, Margot Robbie, Viola D...",2016,123,6.2,393727,325.02,40.0


In [88]:
df_imdb.head(3).T

Unnamed: 0,0,1,2
Rank,1,2,3
Title,Guardians of the Galaxy,Prometheus,Split
Genre,"Action,Adventure,Sci-Fi","Adventure,Mystery,Sci-Fi","Horror,Thriller"
Description,A group of intergalactic criminals are forced ...,"Following clues to the origin of mankind, a te...",Three girls are kidnapped by a man with a diag...
Director,James Gunn,Ridley Scott,M. Night Shyamalan
Actors,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...","Noomi Rapace, Logan Marshall-Green, Michael Fa...","James McAvoy, Anya Taylor-Joy, Haley Lu Richar..."
Year,2014,2012,2016
Runtime (Minutes),121,124,117
Rating,8.1,7.0,7.3
Votes,757074,485820,157606


In [9]:
df_imdb.tail().T

Unnamed: 0,995,996,997,998,999
Rank,996,997,998,999,1000
Title,Secret in Their Eyes,Hostel: Part II,Step Up 2: The Streets,Search Party,Nine Lives
Genre,"Crime,Drama,Mystery",Horror,"Drama,Music,Romance","Adventure,Comedy","Comedy,Family,Fantasy"
Description,"A tight-knit team of rising investigators, alo...",Three American college students studying abroa...,Romantic sparks occur between two dance studen...,A pair of friends embark on a mission to reuni...,A stuffy businessman finds himself trapped ins...
Director,Billy Ray,Eli Roth,Jon M. Chu,Scot Armstrong,Barry Sonnenfeld
Actors,"Chiwetel Ejiofor, Nicole Kidman, Julia Roberts...","Lauren German, Heather Matarazzo, Bijou Philli...","Robert Hoffman, Briana Evigan, Cassie Ventura,...","Adam Pally, T.J. Miller, Thomas Middleditch,Sh...","Kevin Spacey, Jennifer Garner, Robbie Amell,Ch..."
Year,2015,2007,2008,2014,2016
Runtime (Minutes),111,94,98,93,87
Rating,6.2,5.5,6.2,5.6,5.3
Votes,27585,73152,70699,4881,12435


In [93]:
df_imdb.sample(3)

Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
644,645,Viral,"Drama,Horror,Sci-Fi",Following the outbreak of a virus that wipes o...,Henry Joost,"Sofia Black-D'Elia, Analeigh Tipton,Travis Top...",2016,85,5.5,3564,,72.0
853,854,Vicky Cristina Barcelona,"Drama,Romance",Two girlfriends on a summer holiday in Spain b...,Woody Allen,"Rebecca Hall, Scarlett Johansson, Javier Barde...",2008,96,7.1,208770,23.21,70.0
20,21,Gold,"Adventure,Drama,Thriller","Kenny Wells, a prospector desperate for a luck...",Stephen Gaghan,"Matthew McConaughey, Edgar Ramírez, Bryce Dall...",2016,120,6.7,19053,7.22,49.0


In [94]:
df_imdb.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Rank                1000 non-null   int64  
 1   Title               1000 non-null   object 
 2   Genre               1000 non-null   object 
 3   Description         1000 non-null   object 
 4   Director            1000 non-null   object 
 5   Actors              1000 non-null   object 
 6   Year                1000 non-null   int64  
 7   Runtime (Minutes)   1000 non-null   int64  
 8   Rating              1000 non-null   float64
 9   Votes               1000 non-null   int64  
 10  Revenue (Millions)  872 non-null    float64
 11  Metascore           936 non-null    float64
dtypes: float64(3), int64(4), object(5)
memory usage: 93.9+ KB


In [95]:
df_imdb.isnull()

Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
0,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
995,False,False,False,False,False,False,False,False,False,False,True,False
996,False,False,False,False,False,False,False,False,False,False,False,False
997,False,False,False,False,False,False,False,False,False,False,False,False
998,False,False,False,False,False,False,False,False,False,False,True,False


In [96]:
df_imdb.isnull().sum()

Rank                    0
Title                   0
Genre                   0
Description             0
Director                0
Actors                  0
Year                    0
Runtime (Minutes)       0
Rating                  0
Votes                   0
Revenue (Millions)    128
Metascore              64
dtype: int64

In [10]:
df_imdb.isnull().sum()/len(df_imdb)*100

Rank                   0.0
Title                  0.0
Genre                  0.0
Description            0.0
Director               0.0
Actors                 0.0
Year                   0.0
Runtime (Minutes)      0.0
Rating                 0.0
Votes                  0.0
Revenue (Millions)    12.8
Metascore              6.4
dtype: float64

In [98]:
len(df_imdb)

1000

In [99]:
df_imdb['Revenue (Millions)']

0      333.13
1      126.46
2      138.12
3      270.32
4      325.02
        ...  
995       NaN
996     17.54
997     58.01
998       NaN
999     19.64
Name: Revenue (Millions), Length: 1000, dtype: float64

In [100]:
df_imdb['Revenue (Millions)'].mean()

82.95637614678897

In [101]:
df_imdb_copy=df_imdb.copy()

In [102]:
df_imdb_copy.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Rank                1000 non-null   int64  
 1   Title               1000 non-null   object 
 2   Genre               1000 non-null   object 
 3   Description         1000 non-null   object 
 4   Director            1000 non-null   object 
 5   Actors              1000 non-null   object 
 6   Year                1000 non-null   int64  
 7   Runtime (Minutes)   1000 non-null   int64  
 8   Rating              1000 non-null   float64
 9   Votes               1000 non-null   int64  
 10  Revenue (Millions)  872 non-null    float64
 11  Metascore           936 non-null    float64
dtypes: float64(3), int64(4), object(5)
memory usage: 93.9+ KB


In [106]:
df_imdb_copy.isnull().sum()

Rank                   0
Title                  0
Genre                  0
Description            0
Director               0
Actors                 0
Year                   0
Runtime (Minutes)      0
Rating                 0
Votes                  0
Revenue (Millions)     0
Metascore             64
dtype: int64

In [107]:
df_imdb_notnull=df_imdb.dropna()

In [108]:
df_imdb_notnull.isnull().sum()

Rank                  0
Title                 0
Genre                 0
Description           0
Director              0
Actors                0
Year                  0
Runtime (Minutes)     0
Rating                0
Votes                 0
Revenue (Millions)    0
Metascore             0
dtype: int64

In [109]:
df_imdb_notnull.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 838 entries, 0 to 999
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Rank                838 non-null    int64  
 1   Title               838 non-null    object 
 2   Genre               838 non-null    object 
 3   Description         838 non-null    object 
 4   Director            838 non-null    object 
 5   Actors              838 non-null    object 
 6   Year                838 non-null    int64  
 7   Runtime (Minutes)   838 non-null    int64  
 8   Rating              838 non-null    float64
 9   Votes               838 non-null    int64  
 10  Revenue (Millions)  838 non-null    float64
 11  Metascore           838 non-null    float64
dtypes: float64(3), int64(4), object(5)
memory usage: 85.1+ KB


In [110]:
df_imdb.describe()

Unnamed: 0,Rank,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
count,1000.0,1000.0,1000.0,1000.0,1000.0,872.0,936.0
mean,500.5,2012.783,113.172,6.7232,169808.3,82.956376,58.985043
std,288.819436,3.205962,18.810908,0.945429,188762.6,103.25354,17.194757
min,1.0,2006.0,66.0,1.9,61.0,0.0,11.0
25%,250.75,2010.0,100.0,6.2,36309.0,13.27,47.0
50%,500.5,2014.0,111.0,6.8,110799.0,47.985,59.5
75%,750.25,2016.0,123.0,7.4,239909.8,113.715,72.0
max,1000.0,2016.0,191.0,9.0,1791916.0,936.63,100.0


In [111]:
df_imdb.columns

Index(['Rank', 'Title', 'Genre', 'Description', 'Director', 'Actors', 'Year',
       'Runtime (Minutes)', 'Rating', 'Votes', 'Revenue (Millions)',
       'Metascore'],
      dtype='object')

In [112]:
df_imdb.max['Genre']

TypeError: 'method' object is not subscriptable

In [113]:
df_imdb['Genre'].max()

'Thriller,War'

In [115]:
df_imdb['Genre'].value_counts()

Action,Adventure,Sci-Fi    50
Drama                      48
Comedy,Drama,Romance       35
Comedy                     32
Drama,Romance              31
                           ..
Action,Comedy,Family        1
Action,Crime,Fantasy        1
Comedy,Mystery              1
Adventure,Comedy,Horror     1
Comedy,Family,Fantasy       1
Name: Genre, Length: 207, dtype: int64

In [117]:
df_imdb['Genre'].describe()

count                        1000
unique                        207
top       Action,Adventure,Sci-Fi
freq                           50
Name: Genre, dtype: object

In [118]:
df_imdb['Genre'].unique()

array(['Action,Adventure,Sci-Fi', 'Adventure,Mystery,Sci-Fi',
       'Horror,Thriller', 'Animation,Comedy,Family',
       'Action,Adventure,Fantasy', 'Comedy,Drama,Music', 'Comedy',
       'Action,Adventure,Biography', 'Adventure,Drama,Romance',
       'Adventure,Family,Fantasy', 'Biography,Drama,History',
       'Animation,Adventure,Comedy', 'Action,Comedy,Drama',
       'Action,Thriller', 'Biography,Drama', 'Drama,Mystery,Sci-Fi',
       'Adventure,Drama,Thriller', 'Drama', 'Crime,Drama,Horror',
       'Action,Adventure,Drama', 'Drama,Thriller',
       'Action,Adventure,Comedy', 'Action,Horror,Sci-Fi',
       'Adventure,Drama,Sci-Fi', 'Action,Adventure,Western',
       'Comedy,Drama', 'Horror', 'Adventure,Drama,Fantasy',
       'Action,Crime,Thriller', 'Action,Crime,Drama',
       'Adventure,Drama,History', 'Crime,Horror,Thriller',
       'Drama,Romance', 'Comedy,Drama,Romance', 'Horror,Mystery,Thriller',
       'Crime,Drama,Mystery', 'Drama,Romance,Thriller',
       'Drama,History,T

In [121]:
df_imdb.columns

Index(['Rank', 'Title', 'Genre', 'Description', 'Director', 'Actors', 'Year',
       'Runtime (Minutes)', 'Rating', 'Votes', 'Revenue (Millions)',
       'Metascore'],
      dtype='object')

In [124]:
df_imdb[['Rank','Title']]

Unnamed: 0,Rank,Title
0,1,Guardians of the Galaxy
1,2,Prometheus
2,3,Split
3,4,Sing
4,5,Suicide Squad
...,...,...
995,996,Secret in Their Eyes
996,997,Hostel: Part II
997,998,Step Up 2: The Streets
998,999,Search Party


In [125]:
new_col=['Runtime (Minutes)', 'Rating', 'Votes']

In [126]:
df_imdb[new_col]

Unnamed: 0,Runtime (Minutes),Rating,Votes
0,121,8.1,757074
1,124,7.0,485820
2,117,7.3,157606
3,108,7.2,60545
4,123,6.2,393727
...,...,...,...
995,111,6.2,27585
996,94,5.5,73152
997,98,6.2,70699
998,93,5.6,4881


In [129]:
print("revenue in $:",df_imdb['Revenue (Millions)'].sum())

revenue in $: 72337.95999999999


In [138]:
df_imdb.loc[df_imdb['Title']=='The Dark Knight']

Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
54,55,The Dark Knight,"Action,Crime,Drama",When the menace known as the Joker wreaks havo...,Christopher Nolan,"Christian Bale, Heath Ledger, Aaron Eckhart,Mi...",2008,152,9.0,1791916,533.32,82.0


In [199]:
df_imdb.iloc[54]

Rank                                                                 55
Title                                                   The Dark Knight
Genre                                                Action,Crime,Drama
Description           When the menace known as the Joker wreaks havo...
Director                                              Christopher Nolan
Actors                Christian Bale, Heath Ledger, Aaron Eckhart,Mi...
Year                                                               2008
Runtime (Minutes)                                                   152
Rating                                                              9.0
Votes                                                           1791916
Revenue (Millions)                                               533.32
Metascore                                                          82.0
Name: 54, dtype: object

In [200]:
df_imdb.iloc[0:10 ,:3]

Unnamed: 0,Rank,Title,Genre
0,1,Guardians of the Galaxy,"Action,Adventure,Sci-Fi"
1,2,Prometheus,"Adventure,Mystery,Sci-Fi"
2,3,Split,"Horror,Thriller"
3,4,Sing,"Animation,Comedy,Family"
4,5,Suicide Squad,"Action,Adventure,Fantasy"
5,6,The Great Wall,"Action,Adventure,Fantasy"
6,7,La La Land,"Comedy,Drama,Music"
7,8,Mindhorn,Comedy
8,9,The Lost City of Z,"Action,Adventure,Biography"
9,10,Passengers,"Adventure,Drama,Romance"


In [201]:
search=df_imdb['Title'].str.contains('titanic',case=False)

In [202]:
search

0      False
1      False
2      False
3      False
4      False
       ...  
995    False
996    False
997    False
998    False
999    False
Name: Title, Length: 1000, dtype: bool

In [203]:
df_imdb[search]

Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore


In [11]:
df_imdb.iloc[124,3]

"Eight years after the Joker's reign of anarchy, the Dark Knight, with the help of the enigmatic Selina, is forced from his imposed exile to save Gotham City, now on the edge of total annihilation, from the brutal guerrilla terrorist Bane."

In [20]:
search=df_imdb['Director'].str.contains('Nolan',case=False)

In [21]:
search

0      False
1      False
2      False
3      False
4      False
       ...  
995    False
996    False
997    False
998    False
999    False
Name: Director, Length: 1000, dtype: bool

In [22]:
df_imdb[search]

Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
36,37,Interstellar,"Adventure,Drama,Sci-Fi",A team of explorers travel through a wormhole ...,Christopher Nolan,"Matthew McConaughey, Anne Hathaway, Jessica Ch...",2014,169,8.6,1047747,187.99,74.0
54,55,The Dark Knight,"Action,Crime,Drama",When the menace known as the Joker wreaks havo...,Christopher Nolan,"Christian Bale, Heath Ledger, Aaron Eckhart,Mi...",2008,152,9.0,1791916,533.32,82.0
64,65,The Prestige,"Drama,Mystery,Sci-Fi",Two stage magicians engage in competitive one-...,Christopher Nolan,"Christian Bale, Hugh Jackman, Scarlett Johanss...",2006,130,8.5,913152,53.08,66.0
80,81,Inception,"Action,Adventure,Sci-Fi","A thief, who steals corporate secrets through ...",Christopher Nolan,"Leonardo DiCaprio, Joseph Gordon-Levitt, Ellen...",2010,148,8.8,1583625,292.57,74.0
124,125,The Dark Knight Rises,"Action,Thriller",Eight years after the Joker's reign of anarchy...,Christopher Nolan,"Christian Bale, Tom Hardy, Anne Hathaway,Gary ...",2012,164,8.5,1222645,448.13,78.0


In [31]:
filter_ch=(df_imdb['Director']=='Christopher Nolan') & (df_imdb['Year']>2010)

In [32]:
df_imdb[filter_ch]

Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
36,37,Interstellar,"Adventure,Drama,Sci-Fi",A team of explorers travel through a wormhole ...,Christopher Nolan,"Matthew McConaughey, Anne Hathaway, Jessica Ch...",2014,169,8.6,1047747,187.99,74.0
124,125,The Dark Knight Rises,"Action,Thriller",Eight years after the Joker's reign of anarchy...,Christopher Nolan,"Christian Bale, Tom Hardy, Anne Hathaway,Gary ...",2012,164,8.5,1222645,448.13,78.0


In [33]:
df_student = pd.DataFrame({
    'stu_id': [1,2,3,4],
    'name':['Akash', 'Kavitha', 'Varun','Nitish']
})
df_student_info = pd.DataFrame({
    'stu_id': [1,2,3,4,5,6,7],
    'age':[34,20,21,18,25,20,18],
    'sex': ['M','F','M','M','F','M','M']
})

In [34]:
df_student

Unnamed: 0,stu_id,name
0,1,Akash
1,2,Kavitha
2,3,Varun
3,4,Nitish


In [35]:
df_student_info

Unnamed: 0,stu_id,age,sex
0,1,34,M
1,2,20,F
2,3,21,M
3,4,18,M
4,5,25,F
5,6,20,M
6,7,18,M


In [36]:
pd.merge(df_student,df_student_info)

Unnamed: 0,stu_id,name,age,sex
0,1,Akash,34,M
1,2,Kavitha,20,F
2,3,Varun,21,M
3,4,Nitish,18,M


In [37]:
pd.merge(df_student_info,df_student)

Unnamed: 0,stu_id,age,sex,name
0,1,34,M,Akash
1,2,20,F,Kavitha
2,3,21,M,Varun
3,4,18,M,Nitish


In [3]:
import os
import pandas as pd

In [4]:
df_sample=pd.read_csv("Sample_-_Superstore.csv")

In [43]:
df_sample.head(5).T

Unnamed: 0,0,1,2,3,4
Category,Furniture,Furniture,Office Supplies,Furniture,Office Supplies
City,Henderson,Henderson,Los Angeles,Fort Lauderdale,Fort Lauderdale
Country,United States,United States,United States,United States,United States
Customer Name,Claire Gute,Claire Gute,Darrin Van Huff,Sean O'Donnell,Sean O'Donnell
Discount,0.0,0.0,0.0,0.45,0.2
Number of Records,1,1,1,1,1
Order Date,11/8/2017,11/8/2017,6/12/2017,10/11/2016,10/11/2016
Order ID,CA-2017-152156,CA-2017-152156,CA-2017-138688,US-2016-108966,US-2016-108966
Postal Code,42420.0,42420.0,90036.0,33311.0,33311.0
Manufacturer,Bush,Hon,Universal,Bretford,Eldon


In [44]:
df_sample.columns

Index(['Category', 'City', 'Country', 'Customer Name', 'Discount',
       'Number of Records', 'Order Date', 'Order ID', 'Postal Code',
       'Manufacturer', 'Product Name', 'Profit', 'Quantity', 'Region', 'Sales',
       'Segment', 'Ship Date', 'Ship Mode', 'State', 'Sub-Category'],
      dtype='object')

In [46]:
df_sample.groupby('Category')['Sales'].sum().reset_index()

Unnamed: 0,Category,Sales
0,Furniture,741999.7953
1,Office Supplies,719047.032
2,Technology,836154.033


In [47]:
df_sample.groupby('Sub-Category')['Sales'].sum().reset_index()

Unnamed: 0,Sub-Category,Sales
0,Accessories,167380.318
1,Appliances,107532.161
2,Art,27118.792
3,Binders,203412.733
4,Bookcases,114879.9963
5,Chairs,328449.103
6,Copiers,149528.03
7,Envelopes,16476.402
8,Fasteners,3024.28
9,Furnishings,91705.164


In [50]:
df_sample.groupby('Sub-Category')['Sales']['Profit'].sum()

IndexError: Column(s) Sales already selected

In [6]:
df_sample.sort_values(by='Profit', ascending=False) #High to Low

Unnamed: 0,Category,City,Country,Customer Name,Discount,Number of Records,Order Date,Order ID,Postal Code,Manufacturer,Product Name,Profit,Quantity,Region,Sales,Segment,Ship Date,Ship Mode,State,Sub-Category
8093,Technology,Lafayette,United States,Tamara Chand,0.0,1,10/2/2017,CA-2017-118689,47905.0,Canon,Canon imageCLASS 2200 Advanced Copier,8399.9760,5,Central,17499.950,Corporate,10/9/2017,Standard Class,Indiana,Copiers
4905,Technology,Seattle,United States,Raymond Buch,0.0,1,3/23/2018,CA-2018-140151,98115.0,Canon,Canon imageCLASS 2200 Advanced Copier,6719.9808,4,West,13999.960,Consumer,3/25/2018,First Class,Washington,Copiers
5297,Technology,Newark,United States,Hunter Lopez,0.0,1,11/17/2018,CA-2018-166709,19711.0,Canon,Canon imageCLASS 2200 Advanced Copier,5039.9856,3,East,10499.970,Consumer,11/22/2018,Standard Class,Delaware,Copiers
3273,Office Supplies,Detroit,United States,Adrian Barton,0.0,1,12/17/2017,CA-2017-117121,48205.0,GBC,GBC Ibimaster 500 Manual ProClick Binding System,4946.3700,13,Central,9892.740,Consumer,12/21/2017,Standard Class,Michigan,Binders
3232,Office Supplies,Minneapolis,United States,Sanjit Chand,0.0,1,9/23/2015,CA-2015-116904,55407.0,Ibico,Ibico EPK-21 Electric Binding System,4630.4755,5,Central,9449.950,Consumer,9/28/2015,Standard Class,Minnesota,Binders
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7101,Office Supplies,Chicago,United States,Henry Goldwyn,0.8,1,12/7/2018,US-2018-122714,60653.0,Ibico,Ibico EPK-21 Electric Binding System,-2929.4845,5,Central,1889.990,Corporate,12/13/2018,Standard Class,Illinois,Binders
9993,Technology,Louisville,United States,Sharelle Roach,0.7,1,4/17/2018,CA-2018-134845,80027.0,Other,Lexmark MX611dhe Monochrome Laser Printer,-3399.9800,5,West,2549.985,Home Office,4/23/2018,Standard Class,Colorado,Machines
3558,Office Supplies,San Antonio,United States,Luke Foster,0.8,1,7/26/2015,CA-2015-169019,78207.0,GBC,GBC DocuBind P400 Electric Binding System,-3701.8928,8,Central,2177.584,Consumer,7/30/2015,Standard Class,Texas,Binders
683,Technology,Burlington,United States,Grant Thornton,0.5,1,11/4/2018,US-2018-168116,27217.0,Cubify,Cubify CubeX 3D Printer Triple Head Print,-3839.9904,4,South,7999.980,Corporate,11/4/2018,Same Day,North Carolina,Machines


In [9]:
df_sample = df_sample.groupby(['Category', 'Sub-Category']).agg(Total_Profit = ('Profit', 'sum')).reset_index()

In [10]:
df_sample

Unnamed: 0,Category,Sub-Category,Total_Profit
0,Furniture,Bookcases,-3472.556
1,Furniture,Chairs,26590.1663
2,Furniture,Furnishings,13059.1436
3,Furniture,Tables,-17725.4811
4,Office Supplies,Appliances,18138.0054
5,Office Supplies,Art,6527.787
6,Office Supplies,Binders,30221.7633
7,Office Supplies,Envelopes,6964.1767
8,Office Supplies,Fasteners,949.5182
9,Office Supplies,Labels,5546.254


In [11]:
df_sample.sort_values(by='Total_Profit', ascending=False) #High to Low

Unnamed: 0,Category,Sub-Category,Total_Profit
14,Technology,Copiers,55617.8249
16,Technology,Phones,44515.7306
13,Technology,Accessories,41936.6357
10,Office Supplies,Paper,34053.5693
6,Office Supplies,Binders,30221.7633
1,Furniture,Chairs,26590.1663
11,Office Supplies,Storage,21278.8264
4,Office Supplies,Appliances,18138.0054
2,Furniture,Furnishings,13059.1436
7,Office Supplies,Envelopes,6964.1767
