# DataFrame I

In [1]:
import numpy as np
import pandas as pd

In [2]:
pokemons = pd.read_csv('datasets/pokemon.csv', index_col = [0]).squeeze(True)
pokemons

Name
Bulbasaur          Grass, Poison
Ivysaur            Grass, Poison
Venusaur           Grass, Poison
Charmander                  Fire
Charmeleon                  Fire
                      ...       
Iron Valiant     Fairy, Fighting
Koraidon        Fighting, Dragon
Miraidon        Electric, Dragon
Walking Wake       Water, Dragon
Iron Leaves       Grass, Psychic
Name: Type, Length: 1010, dtype: object

In [3]:
nba = pd.read_csv('datasets/nba.csv')
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0


#### some methds and attributes are common in series and dataframes.

In [4]:
print(pokemons.shape)
print(nba.shape)

(1010,)
(592, 7)


In [5]:
print(pokemons.size)
print(nba.size)

1010
4144


In [6]:
print(pokemons.ndim)
print(nba.ndim)

1
2


In [7]:
print(pokemons.dtype)
print('*' * 20)
print(nba.dtypes) # Note that here we have 'dtypes'

object
********************
Name         object
Team         object
Position     object
Height       object
Weight      float64
College      object
Salary      float64
dtype: object


In [8]:
print(pokemons.index)
print('*' * 100)
print(nba.index)

Index(['Bulbasaur', 'Ivysaur', 'Venusaur', 'Charmander', 'Charmeleon',
       'Charizard', 'Squirtle', 'Wartortle', 'Blastoise', 'Caterpie',
       ...
       'Wo-Chien', 'Chien-Pao', 'Ting-Lu', 'Chi-Yu', 'Roaring Moon',
       'Iron Valiant', 'Koraidon', 'Miraidon', 'Walking Wake', 'Iron Leaves'],
      dtype='object', name='Name', length=1010)
****************************************************************************************************
RangeIndex(start=0, stop=592, step=1)


#### some attributes are present only in dataframes

In [9]:
try:
    pokemons.columns
except AttributeError as ex:
    print(ex)

'Series' object has no attribute 'columns'


In [10]:
# pokemons.columns # error
nba.columns

Index(['Name', 'Team', 'Position', 'Height', 'Weight', 'College', 'Salary'], dtype='object')

#### some attributes are present only in series.

In [11]:
pokemons.hasnans

False

In [12]:
try:
    nba.hasnans
except AttributeError as ex:
    print(ex)

'DataFrame' object has no attribute 'hasnans'


### Then we have some methods which are same in series and dataframes.

In [13]:
nba.head()

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0


In [14]:
nba.tail()

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0
591,,,,,,,


In [15]:
nba.head(10).tail(5)

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
5,Trent Forrest,Atlanta Hawks,G,6-4,210.0,Florida State,508891.0
6,AJ Griffin,Atlanta Hawks,F,6-6,220.0,Duke,3712920.0
7,Mouhamed Gueye,Atlanta Hawks,F,6-11,210.0,Washington State,1119563.0
8,De'Andre Hunter,Atlanta Hawks,F-G,6-8,221.0,Virginia,20089286.0
9,Jalen Johnson,Atlanta Hawks,F,6-8,219.0,Duke,2925360.0


In [16]:
nba.describe(include='all')

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
count,591,591,584,585,584.0,578,488.0
unique,591,30,7,20,,182,
top,Saddiq Bey,Dallas Mavericks,G,6-5,,Kentucky,
freq,1,23,229,74,,29,
mean,,,,,214.763699,,9218978.0
std,,,,,23.460612,,11319270.0
min,,,,,160.0,,508891.0
25%,,,,,198.0,,1980599.0
50%,,,,,215.0,,4018638.0
75%,,,,,230.0,,11696940.0


In [17]:
nba.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 592 entries, 0 to 591
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Name      591 non-null    object 
 1   Team      591 non-null    object 
 2   Position  584 non-null    object 
 3   Height    585 non-null    object 
 4   Weight    584 non-null    float64
 5   College   578 non-null    object 
 6   Salary    488 non-null    float64
dtypes: float64(2), object(5)
memory usage: 32.5+ KB
