# Series

    Series is a 1 DS in pandas.

    Associative containers -- key, value
    [10, 20, 30, 40, 50]
      0,   1, 2,  3,  4

    {'name':'Abhishek', 'age' :23, 'city' :'New Delhi'}

    pandas asscoiative container -- series(keys comes from index and values are provided in a list)

In [3]:
import numpy as np
import pandas as pd

0    10
1    20
2    30
3    40
dtype: int64

In [4]:
s = pd.Series([10,20,30,40])
s

0    10
1    20
2    30
3    40
dtype: int64

In [6]:
list(s.index)

[0, 1, 2, 3]

In [8]:
s.values, type(s.values)

(array([10, 20, 30, 40], dtype=int64), numpy.ndarray)

In [10]:
z = s.items() # It returns us the key value pairs
# iterator

In [11]:
list(z)

[(0, 10), (1, 20), (2, 30), (3, 40)]

In [12]:
list(z)

[]

# Explicit index

In [13]:
s = pd.Series([10, 20, 30, 40], index = list('abcd'))
s

a    10
b    20
c    30
d    40
dtype: int64

In [14]:
s.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [15]:
type(s.index)

pandas.core.indexes.base.Index

In [16]:
s.values

array([10, 20, 30, 40], dtype=int64)

In [17]:
s['a']

10

In [18]:
s[0]

  s[0]


10

In [19]:
capitals = {
    'USA' : 'Washington D.C.',
    'Canada' : 'Ottawa',
    'UK' : 'London',
    "France":'Paris'
}

In [20]:
s = pd.Series(capitals)

In [21]:
s

USA       Washington D.C.
Canada             Ottawa
UK                 London
France              Paris
dtype: object

In [22]:
s.index

Index(['USA', 'Canada', 'UK', 'France'], dtype='object')

In [23]:
s.values

array(['Washington D.C.', 'Ottawa', 'London', 'Paris'], dtype=object)

In [24]:
for country, capital in s.items():
    print(f"Capital({country}) = {capital}")

Capital(USA) = Washington D.C.
Capital(Canada) = Ottawa
Capital(UK) = London
Capital(France) = Paris


# Fancy Indexing

In [25]:
s

USA       Washington D.C.
Canada             Ottawa
UK                 London
France              Paris
dtype: object

In [28]:
s[['USA', 'UK']] # series

USA    Washington D.C.
UK              London
dtype: object

In [29]:
type(s[['USA', 'UK']] )

pandas.core.series.Series

# Boolean masking

In [30]:
s = pd.Series([i ** 2 for i in range(1, 11)], index = list('abcdefghij'))
s

a      1
b      4
c      9
d     16
e     25
f     36
g     49
h     64
i     81
j    100
dtype: int64

In [32]:
mask = s > 30
mask # boolean series

a    False
b    False
c    False
d    False
e    False
f     True
g     True
h     True
i     True
j     True
dtype: bool

In [33]:
s[mask]

f     36
g     49
h     64
i     81
j    100
dtype: int64

# Slicing

In [34]:
s

a      1
b      4
c      9
d     16
e     25
f     36
g     49
h     64
i     81
j    100
dtype: int64

In [35]:
s[0:5] # last point is not included

a     1
b     4
c     9
d    16
e    25
dtype: int64

In [37]:
s['a':'f': 2] # last point is included

a     1
c     9
e    25
dtype: int64

# Point of confusion

In [41]:
s = pd.Series([i * 34 for i in range(1, 11)], index = range(1,11))
s

1      34
2      68
3     102
4     136
5     170
6     204
7     238
8     272
9     306
10    340
dtype: int64

In [42]:
s.iloc[2]
# accesor --> .iloc[] .loc[]

102

In [43]:
s.loc[2]

68

In [44]:
s.loc[[2, 10, 2+7-8]]

2      68
10    340
1      34
dtype: int64

In [45]:
s.iloc[[1, 0, -5+10]]

2     68
1     34
6    204
dtype: int64

In [46]:
s

1      34
2      68
3     102
4     136
5     170
6     204
7     238
8     272
9     306
10    340
dtype: int64

In [47]:
s.iloc[0: 4] # end point is not included

1     34
2     68
3    102
4    136
dtype: int64

In [48]:
s.loc[1:6]

1     34
2     68
3    102
4    136
5    170
6    204
dtype: int64

# Series Methods

In [59]:
#pd.read_csv('datasets/google_stock_price.csv', usecols = ['Price', 'Date'])
pd.read_csv('datasets/google_stock_price.csv', usecols = ['Price']).squeeze()
pd.read_csv('datasets/google_stock_price.csv', usecols = ['Price']).squeeze('columns')
google = pd.read_csv('datasets/google_stock_price.csv', usecols = ['Price']).squeeze(True)
google

0         2.490664
1         2.515820
2         2.758411
3         2.770615
4         2.614201
           ...    
4788    132.080002
4789    132.998001
4790    135.570007
4791    137.050003
4792    138.429993
Name: Price, Length: 4793, dtype: float64

In [60]:
len(google)

4793

In [61]:
max(google)

151.863495

In [62]:
min(google)

2.47049

In [66]:
google.size

4793

In [67]:
google.max()

151.863495

In [68]:
%timeit -n 10000 google.max()

17.3 µs ± 786 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [69]:
%timeit -n 10000 max(google)

444 µs ± 95.4 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [70]:
google.std()

37.274752943868094

In [71]:
google.product()

  return umr_prod(a, axis, dtype, out, keepdims, initial, where)


inf

In [72]:
google?

In [73]:
google.count() # Missing values are not counted

4793

In [74]:
google.size # all values are counted

4793

In [75]:
google.is_unique # 

False

In [76]:
google.value_counts()

Price
14.719826     4
49.000000     4
67.500000     3
13.113846     3
15.317586     3
             ..
15.364161     1
15.290189     1
15.287449     1
15.525307     1
138.429993    1
Name: count, Length: 4652, dtype: int64

In [78]:
# df['col'].is_unique

# df['col'][df['col'].value_counts() > 1].size != 0

In [102]:
h = pokemon.head(20)
h

0      Boobasaur
1        Ivysaur
2       Venusaur
3     Charmander
4     Charmeleon
5      Charizard
6       Squirtle
7      Wartortle
8      Blastoise
9       Caterpie
10       Metapod
11    Butterfree
12        Weedle
13        Kakuna
14      Beedrill
15        Pidgey
16     Pidgeotto
17       Pidgeot
18       Rattata
19      Raticate
Name: Name, dtype: object

In [103]:
dict(h)

{0: 'Boobasaur',
 1: 'Ivysaur',
 2: 'Venusaur',
 3: 'Charmander',
 4: 'Charmeleon',
 5: 'Charizard',
 6: 'Squirtle',
 7: 'Wartortle',
 8: 'Blastoise',
 9: 'Caterpie',
 10: 'Metapod',
 11: 'Butterfree',
 12: 'Weedle',
 13: 'Kakuna',
 14: 'Beedrill',
 15: 'Pidgey',
 16: 'Pidgeotto',
 17: 'Pidgeot',
 18: 'Rattata',
 19: 'Raticate'}

In [104]:
list(h)

['Boobasaur',
 'Ivysaur',
 'Venusaur',
 'Charmander',
 'Charmeleon',
 'Charizard',
 'Squirtle',
 'Wartortle',
 'Blastoise',
 'Caterpie',
 'Metapod',
 'Butterfree',
 'Weedle',
 'Kakuna',
 'Beedrill',
 'Pidgey',
 'Pidgeotto',
 'Pidgeot',
 'Rattata',
 'Raticate']

In [110]:
sorted(h, reverse=True)

['Weedle',
 'Wartortle',
 'Venusaur',
 'Squirtle',
 'Rattata',
 'Raticate',
 'Pidgey',
 'Pidgeotto',
 'Pidgeot',
 'Metapod',
 'Kakuna',
 'Ivysaur',
 'Charmeleon',
 'Charmander',
 'Charizard',
 'Caterpie',
 'Butterfree',
 'Boobasaur',
 'Blastoise',
 'Beedrill']

In [111]:
sorted(h, key = len)

['Weedle',
 'Kakuna',
 'Pidgey',
 'Ivysaur',
 'Metapod',
 'Pidgeot',
 'Rattata',
 'Venusaur',
 'Squirtle',
 'Caterpie',
 'Beedrill',
 'Raticate',
 'Boobasaur',
 'Charizard',
 'Wartortle',
 'Blastoise',
 'Pidgeotto',
 'Charmander',
 'Charmeleon',
 'Butterfree']

# head() and tail() method

In [80]:
pokemon = pd.read_csv('datasets/pokemon.csv', usecols = [0]).squeeze(True)
pokemon

0          Bulbasaur
1            Ivysaur
2           Venusaur
3         Charmander
4         Charmeleon
            ...     
1005    Iron Valiant
1006        Koraidon
1007        Miraidon
1008    Walking Wake
1009     Iron Leaves
Name: Name, Length: 1010, dtype: object

In [81]:
pokemon.head()

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Name, dtype: object

In [82]:
pokemon.head(n=10)

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
5     Charizard
6      Squirtle
7     Wartortle
8     Blastoise
9      Caterpie
Name: Name, dtype: object

In [83]:
pokemon.tail()

1005    Iron Valiant
1006        Koraidon
1007        Miraidon
1008    Walking Wake
1009     Iron Leaves
Name: Name, dtype: object

In [84]:
pokemon.tail(n=10)

1000        Wo-Chien
1001       Chien-Pao
1002         Ting-Lu
1003          Chi-Yu
1004    Roaring Moon
1005    Iron Valiant
1006        Koraidon
1007        Miraidon
1008    Walking Wake
1009     Iron Leaves
Name: Name, dtype: object

In [90]:
# 10-19 tk ki entry 
s = pokemon.head(10)
s

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
5     Charizard
6      Squirtle
7     Wartortle
8     Blastoise
9      Caterpie
Name: Name, dtype: object

In [91]:
ans = [i for i in pokemon.values if i not in s.values]
pd.Series([pokemon[value] for value in range(10, 20)])

0       Metapod
1    Butterfree
2        Weedle
3        Kakuna
4      Beedrill
5        Pidgey
6     Pidgeotto
7       Pidgeot
8       Rattata
9      Raticate
dtype: object

In [89]:
# 10-19 [2nd slot 10 ka]

In [93]:
pokemon.head(20).tail(10)
pokemon[10:20]

10       Metapod
11    Butterfree
12        Weedle
13        Kakuna
14      Beedrill
15        Pidgey
16     Pidgeotto
17       Pidgeot
18       Rattata
19      Raticate
Name: Name, dtype: object

In [94]:
pokemon

0          Bulbasaur
1            Ivysaur
2           Venusaur
3         Charmander
4         Charmeleon
            ...     
1005    Iron Valiant
1006        Koraidon
1007        Miraidon
1008    Walking Wake
1009     Iron Leaves
Name: Name, Length: 1010, dtype: object

In [95]:
h = pokemon.head(3)

In [96]:
h

0    Bulbasaur
1      Ivysaur
2     Venusaur
Name: Name, dtype: object

In [97]:
h[0] = 'Boobasaur'
h

0    Boobasaur
1      Ivysaur
2     Venusaur
Name: Name, dtype: object

In [98]:
pokemon

0          Boobasaur
1            Ivysaur
2           Venusaur
3         Charmander
4         Charmeleon
            ...     
1005    Iron Valiant
1006        Koraidon
1007        Miraidon
1008    Walking Wake
1009     Iron Leaves
Name: Name, Length: 1010, dtype: object

In [101]:
def ans(series, m, n):
    return series.head(m+n).tail(n)

ans(pokemon, 5, 19)

5      Charizard
6       Squirtle
7      Wartortle
8      Blastoise
9       Caterpie
10       Metapod
11    Butterfree
12        Weedle
13        Kakuna
14      Beedrill
15        Pidgey
16     Pidgeotto
17       Pidgeot
18       Rattata
19      Raticate
20       Spearow
21        Fearow
22         Ekans
23         Arbok
Name: Name, dtype: object

# Inclusion in series

In [112]:
pokemon

0          Boobasaur
1            Ivysaur
2           Venusaur
3         Charmander
4         Charmeleon
            ...     
1005    Iron Valiant
1006        Koraidon
1007        Miraidon
1008    Walking Wake
1009     Iron Leaves
Name: Name, Length: 1010, dtype: object

In [113]:
23 in pokemon

True

In [114]:
-90 in pokemon

False

In [115]:
'Ivysaur' in pokemon

False

In [116]:
'Ivysaur' in pokemon.values

True

# sorting methods --  index(sort_index), values(sort_values)

In [120]:
s = pokemon.sort_values()
s

459    Abomasnow
62          Abra
358        Absol
616     Accelgor
680    Aegislash
         ...    
570      Zoroark
569        Zorua
40         Zubat
633     Zweilous
717      Zygarde
Name: Name, Length: 1010, dtype: object

In [127]:
s.sort_index()

0          Boobasaur
1            Ivysaur
2           Venusaur
3         Charmander
4         Charmeleon
            ...     
1005    Iron Valiant
1006        Koraidon
1007        Miraidon
1008    Walking Wake
1009     Iron Leaves
Name: Name, Length: 1010, dtype: object

In [129]:
pokemon

0          Boobasaur
1            Ivysaur
2           Venusaur
3         Charmander
4         Charmeleon
            ...     
1005    Iron Valiant
1006        Koraidon
1007        Miraidon
1008    Walking Wake
1009     Iron Leaves
Name: Name, Length: 1010, dtype: object

In [133]:
pokemon2 = pokemon.sort_values().sort_index()
pokemon2

0          Boobasaur
1            Ivysaur
2           Venusaur
3         Charmander
4         Charmeleon
            ...     
1005    Iron Valiant
1006        Koraidon
1007        Miraidon
1008    Walking Wake
1009     Iron Leaves
Name: Name, Length: 1010, dtype: object

In [134]:
pokemon

0          Boobasaur
1            Ivysaur
2           Venusaur
3         Charmander
4         Charmeleon
            ...     
1005    Iron Valiant
1006        Koraidon
1007        Miraidon
1008    Walking Wake
1009     Iron Leaves
Name: Name, Length: 1010, dtype: object

In [135]:
pokemon2 is pokemon

False

In [136]:
pokemon2 == pokemon

0       True
1       True
2       True
3       True
4       True
        ... 
1005    True
1006    True
1007    True
1008    True
1009    True
Name: Name, Length: 1010, dtype: bool

# Broadcasting

In [141]:
# 99 % operations ek new series bnate he

In [137]:
google

0         2.490664
1         2.515820
2         2.758411
3         2.770615
4         2.614201
           ...    
4788    132.080002
4789    132.998001
4790    135.570007
4791    137.050003
4792    138.429993
Name: Price, Length: 4793, dtype: float64

In [143]:
google.add(2)
google + 2

0         4.490664
1         4.515820
2         4.758411
3         4.770615
4         4.614201
           ...    
4788    134.080002
4789    134.998001
4790    137.570007
4791    139.050003
4792    140.429993
Name: Price, Length: 4793, dtype: float64

In [146]:
google.subtract(10)
google-10

0        -7.509336
1        -7.484180
2        -7.241589
3        -7.229385
4        -7.385799
           ...    
4788    122.080002
4789    122.998001
4790    125.570007
4791    127.050003
4792    128.429993
Name: Price, Length: 4793, dtype: float64

In [145]:
pokemon.head(5)[1] is pokemon[1]

True

In [148]:
google.mul(2)
google * 2

0         4.981328
1         5.031640
2         5.516822
3         5.541230
4         5.228402
           ...    
4788    264.160004
4789    265.996002
4790    271.140014
4791    274.100006
4792    276.859986
Name: Price, Length: 4793, dtype: float64

In [150]:
google.div(2)
google/2

0        1.245332
1        1.257910
2        1.379206
3        1.385307
4        1.307100
          ...    
4788    66.040001
4789    66.499000
4790    67.785004
4791    68.525002
4792    69.214996
Name: Price, Length: 4793, dtype: float64

In [152]:
google.pow(3)
google ** 3

0       1.545060e+01
1       1.592351e+01
2       2.098828e+01
3       2.126809e+01
4       1.786557e+01
            ...     
4788    2.304152e+06
4789    2.352531e+06
4790    2.491672e+06
4791    2.574170e+06
4792    2.652715e+06
Name: Price, Length: 4793, dtype: float64

# Math methods on series

In [153]:
import numpy as np
import pandas as pd # map filter 

In [154]:
google = pd.read_csv('datasets/google_stock_price.csv', usecols = [1]).squeeze()
google

0         2.490664
1         2.515820
2         2.758411
3         2.770615
4         2.614201
           ...    
4788    132.080002
4789    132.998001
4790    135.570007
4791    137.050003
4792    138.429993
Name: Price, Length: 4793, dtype: float64

In [155]:
google.mean()


40.211376870018775

In [156]:
google.sum()

192733.129338

In [157]:
google.product()

  return umr_prod(a, axis, dtype, out, keepdims, initial, where)


inf

In [158]:
google.median()

26.327717

In [160]:
google.mode(), type(google.mode())

(0    14.719826
 1    49.000000
 Name: Price, dtype: float64,
 pandas.core.series.Series)

In [161]:
google.value_counts()

Price
14.719826     4
49.000000     4
67.500000     3
13.113846     3
15.317586     3
             ..
15.364161     1
15.290189     1
15.287449     1
15.525307     1
138.429993    1
Name: count, Length: 4652, dtype: int64

In [162]:
google.std()

37.274752943868094

In [163]:
google.min()

2.47049

In [164]:
google.max()

151.863495

In [165]:
google.count()

4793

In [166]:
len(google)

4793

In [167]:
max(google)

151.863495

In [168]:
min(google)

2.47049

In [175]:
google.describe() # dataframe -- 

count    4793.000000
mean       40.211377
std        37.274753
min         2.470490
25%        12.767395
50%        26.327717
75%        56.311001
max       151.863495
Name: Price, dtype: float64

In [176]:
google.median()

26.327717

In [177]:
google.describe()['50%']

26.327717

In [178]:
google.describe()['75%']

56.311001

# get() method

In [179]:
google

0         2.490664
1         2.515820
2         2.758411
3         2.770615
4         2.614201
           ...    
4788    132.080002
4789    132.998001
4790    135.570007
4791    137.050003
4792    138.429993
Name: Price, Length: 4793, dtype: float64

In [183]:
pokemon = pd.read_csv('datasets/pokemon.csv', index_col = 'Name').squeeze(True)
pokemon

Name
Bulbasaur          Grass, Poison
Ivysaur            Grass, Poison
Venusaur           Grass, Poison
Charmander                  Fire
Charmeleon                  Fire
                      ...       
Iron Valiant     Fairy, Fighting
Koraidon        Fighting, Dragon
Miraidon        Electric, Dragon
Walking Wake       Water, Dragon
Iron Leaves       Grass, Psychic
Name: Type, Length: 1010, dtype: object

In [185]:
pokemon['Charmander']

'Fire'

In [186]:
pokemon['Charmender']

KeyError: 'Charmender'

In [187]:
pokemon[100]

  pokemon[100]


'Electric'

In [188]:
pokemon[1000000]

  pokemon[1000000]


IndexError: index 1000000 is out of bounds for axis 0 with size 1010

In [189]:
pokemon

Name
Bulbasaur          Grass, Poison
Ivysaur            Grass, Poison
Venusaur           Grass, Poison
Charmander                  Fire
Charmeleon                  Fire
                      ...       
Iron Valiant     Fairy, Fighting
Koraidon        Fighting, Dragon
Miraidon        Electric, Dragon
Walking Wake       Water, Dragon
Iron Leaves       Grass, Psychic
Name: Type, Length: 1010, dtype: object

In [191]:
pokemon.get('Boobasaur')

In [192]:
pokemon.get(0)

  pokemon.get(0)


'Grass, Poison'

In [193]:
pokemon.get('Boobasaur', default = 'The key was not found') # default = None

'The key was not found'

In [196]:
pokemon.get(['Bulbasaur', 'Iron Leaves'])

Name
Bulbasaur       Grass, Poison
Iron Leaves    Grass, Psychic
Name: Type, dtype: object

In [197]:
pokemon.get(['Bulbasaur', 'Iron Leaves', 'Nonsense'], default = 'One of the keys was missing so no answer 😊')

'One of the keys was missing so no answer 😊'

# apply() and map()
    apply() method takes a function and evaluate thats function for each value in the series.

In [198]:
pokemon = pd.read_csv('datasets/pokemon.csv', usecols = [0]).squeeze(True)
pokemon

0          Bulbasaur
1            Ivysaur
2           Venusaur
3         Charmander
4         Charmeleon
            ...     
1005    Iron Valiant
1006        Koraidon
1007        Miraidon
1008    Walking Wake
1009     Iron Leaves
Name: Name, Length: 1010, dtype: object

In [199]:
# apply() method
pokemon.apply(len)

0        9
1        7
2        8
3       10
4       10
        ..
1005    12
1006     8
1007     8
1008    12
1009    11
Name: Name, Length: 1010, dtype: int64

In [200]:
pokemon

0          Bulbasaur
1            Ivysaur
2           Venusaur
3         Charmander
4         Charmeleon
            ...     
1005    Iron Valiant
1006        Koraidon
1007        Miraidon
1008    Walking Wake
1009     Iron Leaves
Name: Name, Length: 1010, dtype: object

In [201]:
pokemon.apply(lambda name : name.count('a'))

0       2
1       1
2       1
3       2
4       1
       ..
1005    2
1006    1
1007    1
1008    2
1009    1
Name: Name, Length: 1010, dtype: int64

In [202]:
# map() method is used for mapping the values

In [207]:
pokemon = pd.read_csv('datasets/pokemon.csv', index_col = [0]).squeeze(True).head(10)
pokemon

Name
Bulbasaur     Grass, Poison
Ivysaur       Grass, Poison
Venusaur      Grass, Poison
Charmander             Fire
Charmeleon             Fire
Charizard      Fire, Flying
Squirtle              Water
Wartortle             Water
Blastoise             Water
Caterpie                Bug
Name: Type, dtype: object

In [209]:
mapped_pokemons = pokemon.map({
    'Grass, Poison': 'GP', 
    'Fire' : 'F',
    'Water' : 'W'
})

In [210]:
mapped_pokemons

Name
Bulbasaur      GP
Ivysaur        GP
Venusaur       GP
Charmander      F
Charmeleon      F
Charizard     NaN
Squirtle        W
Wartortle       W
Blastoise       W
Caterpie      NaN
Name: Type, dtype: object

In [211]:
mapped_pokemons.size

10

In [212]:
mapped_pokemons.count()

8

# Dataframes

In [1]:
import numpy as np
import pandas as pd

In [2]:
nba = pd.read_csv('datasets/nba.csv')
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0


In [4]:
s = pd.read_csv('datasets/pokemon.csv', usecols = [0]).squeeze(True)
s

0          Bulbasaur
1            Ivysaur
2           Venusaur
3         Charmander
4         Charmeleon
            ...     
1005    Iron Valiant
1006        Koraidon
1007        Miraidon
1008    Walking Wake
1009     Iron Leaves
Name: Name, Length: 1010, dtype: object

In [6]:
s.index
nba.index

RangeIndex(start=0, stop=592, step=1)

In [9]:
# s.columns
nba.columns

Index(['Name', 'Team', 'Position', 'Height', 'Weight', 'College', 'Salary'], dtype='object')

In [11]:
s.size
nba.size

4144

In [13]:
s.shape
nba.shape

(592, 7)

In [15]:
s.dtype
nba.dtypes

Name         object
Team         object
Position     object
Height       object
Weight      float64
College      object
Salary      float64
dtype: object

In [18]:
print(s.info())
print('*' * 55)
nba.info()

<class 'pandas.core.series.Series'>
RangeIndex: 1010 entries, 0 to 1009
Series name: Name
Non-Null Count  Dtype 
--------------  ----- 
1010 non-null   object
dtypes: object(1)
memory usage: 8.0+ KB
None
*******************************************************
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 592 entries, 0 to 591
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Name      591 non-null    object 
 1   Team      591 non-null    object 
 2   Position  584 non-null    object 
 3   Height    585 non-null    object 
 4   Weight    584 non-null    float64
 5   College   578 non-null    object 
 6   Salary    488 non-null    float64
dtypes: float64(2), object(5)
memory usage: 32.5+ KB


In [19]:
s.describe()

count          1010
unique         1010
top       Bulbasaur
freq              1
Name: Name, dtype: object

In [21]:
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0


In [22]:
nba.describe(include = 'all')

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
count,591,591,584,585,584.0,578,488.0
unique,591,30,7,20,,182,
top,Saddiq Bey,Dallas Mavericks,G,6-5,,Kentucky,
freq,1,23,229,74,,29,
mean,,,,,214.763699,,9218978.0
std,,,,,23.460612,,11319270.0
min,,,,,160.0,,508891.0
25%,,,,,198.0,,1980599.0
50%,,,,,215.0,,4018638.0
75%,,,,,230.0,,11696940.0


In [23]:
s

0          Bulbasaur
1            Ivysaur
2           Venusaur
3         Charmander
4         Charmeleon
            ...     
1005    Iron Valiant
1006        Koraidon
1007        Miraidon
1008    Walking Wake
1009     Iron Leaves
Name: Name, Length: 1010, dtype: object

In [24]:
s.axes

[RangeIndex(start=0, stop=1010, step=1)]

In [25]:
nba.axes

[RangeIndex(start=0, stop=592, step=1),
 Index(['Name', 'Team', 'Position', 'Height', 'Weight', 'College', 'Salary'], dtype='object')]

In [26]:
nba.head()

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0


In [28]:
nba.tail(n = 3)

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0
591,,,,,,,


# selecting a single column from a dataframe

In [29]:
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0


In [37]:
nba['Name'].loc[3]
nba['Name'].iloc[3]
nba['Name'][3]
nba['Name']

0             Saddiq Bey
1      Bogdan Bogdanovic
2            Kobe Bufkin
3           Clint Capela
4         Bruno Fernando
             ...        
587         Ryan Rollins
588        Landry Shamet
589     Tristan Vukcevic
590         Delon Wright
591                  NaN
Name: Name, Length: 592, dtype: object

In [38]:
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0


In [39]:
names = nba['Name']
names # DO NOT MUTATE becoz it is a view

0             Saddiq Bey
1      Bogdan Bogdanovic
2            Kobe Bufkin
3           Clint Capela
4         Bruno Fernando
             ...        
587         Ryan Rollins
588        Landry Shamet
589     Tristan Vukcevic
590         Delon Wright
591                  NaN
Name: Name, Length: 592, dtype: object

In [40]:
names[0] = 'Whatever'
names

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  names[0] = 'Whatever'


0               Whatever
1      Bogdan Bogdanovic
2            Kobe Bufkin
3           Clint Capela
4         Bruno Fernando
             ...        
587         Ryan Rollins
588        Landry Shamet
589     Tristan Vukcevic
590         Delon Wright
591                  NaN
Name: Name, Length: 592, dtype: object

In [41]:
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Whatever,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0


In [42]:
names[0] = 'Saddiq Bey'
names

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  names[0] = 'Saddiq Bey'


0             Saddiq Bey
1      Bogdan Bogdanovic
2            Kobe Bufkin
3           Clint Capela
4         Bruno Fernando
             ...        
587         Ryan Rollins
588        Landry Shamet
589     Tristan Vukcevic
590         Delon Wright
591                  NaN
Name: Name, Length: 592, dtype: object

In [43]:
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0


In [44]:
names = nba['Name'].copy()
names

0             Saddiq Bey
1      Bogdan Bogdanovic
2            Kobe Bufkin
3           Clint Capela
4         Bruno Fernando
             ...        
587         Ryan Rollins
588        Landry Shamet
589     Tristan Vukcevic
590         Delon Wright
591                  NaN
Name: Name, Length: 592, dtype: object

In [45]:
names[0] = 'Whatever'
names

0               Whatever
1      Bogdan Bogdanovic
2            Kobe Bufkin
3           Clint Capela
4         Bruno Fernando
             ...        
587         Ryan Rollins
588        Landry Shamet
589     Tristan Vukcevic
590         Delon Wright
591                  NaN
Name: Name, Length: 592, dtype: object

In [46]:
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0


# selecting multiple columns from a dataframe
# @FancyIndexing

In [47]:
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0


In [48]:
nba[['Name', 'Team']]
# It creates a copy

Unnamed: 0,Name,Team
0,Saddiq Bey,Atlanta Hawks
1,Bogdan Bogdanovic,Atlanta Hawks
2,Kobe Bufkin,Atlanta Hawks
3,Clint Capela,Atlanta Hawks
4,Bruno Fernando,Atlanta Hawks
...,...,...
587,Ryan Rollins,Washington Wizards
588,Landry Shamet,Washington Wizards
589,Tristan Vukcevic,Washington Wizards
590,Delon Wright,Washington Wizards


In [50]:
nba[['Name']].squeeze(True) # nba['Name'].copy()

0             Saddiq Bey
1      Bogdan Bogdanovic
2            Kobe Bufkin
3           Clint Capela
4         Bruno Fernando
             ...        
587         Ryan Rollins
588        Landry Shamet
589     Tristan Vukcevic
590         Delon Wright
591                  NaN
Name: Name, Length: 592, dtype: object

In [51]:
nba[['Name', 'Team', 'Name']]

Unnamed: 0,Name,Team,Name.1
0,Saddiq Bey,Atlanta Hawks,Saddiq Bey
1,Bogdan Bogdanovic,Atlanta Hawks,Bogdan Bogdanovic
2,Kobe Bufkin,Atlanta Hawks,Kobe Bufkin
3,Clint Capela,Atlanta Hawks,Clint Capela
4,Bruno Fernando,Atlanta Hawks,Bruno Fernando
...,...,...,...
587,Ryan Rollins,Washington Wizards,Ryan Rollins
588,Landry Shamet,Washington Wizards,Landry Shamet
589,Tristan Vukcevic,Washington Wizards,Tristan Vukcevic
590,Delon Wright,Washington Wizards,Delon Wright


In [54]:
nba[['Team', 'Name']]

Unnamed: 0,Team,Name
0,Atlanta Hawks,Saddiq Bey
1,Atlanta Hawks,Bogdan Bogdanovic
2,Atlanta Hawks,Kobe Bufkin
3,Atlanta Hawks,Clint Capela
4,Atlanta Hawks,Bruno Fernando
...,...,...
587,Washington Wizards,Ryan Rollins
588,Washington Wizards,Landry Shamet
589,Washington Wizards,Tristan Vukcevic
590,Washington Wizards,Delon Wright


In [55]:
nba[['Team', 'Name']]['Name'].iloc[587]

'Ryan Rollins'

In [56]:
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0


# Adding column into your dataframe

In [57]:
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0


In [58]:
pokemon = pd.read_csv('datasets/pokemon.csv')
pokemon

Unnamed: 0,Name,Type
0,Bulbasaur,"Grass, Poison"
1,Ivysaur,"Grass, Poison"
2,Venusaur,"Grass, Poison"
3,Charmander,Fire
4,Charmeleon,Fire
...,...,...
1005,Iron Valiant,"Fairy, Fighting"
1006,Koraidon,"Fighting, Dragon"
1007,Miraidon,"Electric, Dragon"
1008,Walking Wake,"Water, Dragon"


In [59]:
pokemon['Sport'] = 'Basketball'
pokemon

Unnamed: 0,Name,Type,Sport
0,Bulbasaur,"Grass, Poison",Basketball
1,Ivysaur,"Grass, Poison",Basketball
2,Venusaur,"Grass, Poison",Basketball
3,Charmander,Fire,Basketball
4,Charmeleon,Fire,Basketball
...,...,...,...
1005,Iron Valiant,"Fairy, Fighting",Basketball
1006,Koraidon,"Fighting, Dragon",Basketball
1007,Miraidon,"Electric, Dragon",Basketball
1008,Walking Wake,"Water, Dragon",Basketball


In [61]:
pokemon.insert(loc = 1, value = 'Alpha', column = 'Sport2')
pokemon

Unnamed: 0,Name,Sport2,Type,Sport
0,Bulbasaur,Alpha,"Grass, Poison",Basketball
1,Ivysaur,Alpha,"Grass, Poison",Basketball
2,Venusaur,Alpha,"Grass, Poison",Basketball
3,Charmander,Alpha,Fire,Basketball
4,Charmeleon,Alpha,Fire,Basketball
...,...,...,...,...
1005,Iron Valiant,Alpha,"Fairy, Fighting",Basketball
1006,Koraidon,Alpha,"Fighting, Dragon",Basketball
1007,Miraidon,Alpha,"Electric, Dragon",Basketball
1008,Walking Wake,Alpha,"Water, Dragon",Basketball


In [65]:
pokemon = pd.read_csv('datasets/pokemon.csv')
pokemon

Unnamed: 0,Name,Type
0,Bulbasaur,"Grass, Poison"
1,Ivysaur,"Grass, Poison"
2,Venusaur,"Grass, Poison"
3,Charmander,Fire
4,Charmeleon,Fire
...,...,...
1005,Iron Valiant,"Fairy, Fighting"
1006,Koraidon,"Fighting, Dragon"
1007,Miraidon,"Electric, Dragon"
1008,Walking Wake,"Water, Dragon"


In [64]:
# name_length
# pokemon['name_length'] = len(pokemon['Name'])
# pokemon['name_length'] = 1010

Unnamed: 0,Name,Type,name_length
0,Bulbasaur,"Grass, Poison",1010
1,Ivysaur,"Grass, Poison",1010
2,Venusaur,"Grass, Poison",1010
3,Charmander,Fire,1010
4,Charmeleon,Fire,1010
...,...,...,...
1005,Iron Valiant,"Fairy, Fighting",1010
1006,Koraidon,"Fighting, Dragon",1010
1007,Miraidon,"Electric, Dragon",1010
1008,Walking Wake,"Water, Dragon",1010


In [66]:
# apply()
pokemon

Unnamed: 0,Name,Type
0,Bulbasaur,"Grass, Poison"
1,Ivysaur,"Grass, Poison"
2,Venusaur,"Grass, Poison"
3,Charmander,Fire
4,Charmeleon,Fire
...,...,...
1005,Iron Valiant,"Fairy, Fighting"
1006,Koraidon,"Fighting, Dragon"
1007,Miraidon,"Electric, Dragon"
1008,Walking Wake,"Water, Dragon"


In [68]:
pokemon['name_length'] = pokemon['Name'].apply(len)
pokemon

Unnamed: 0,Name,Type,name_length
0,Bulbasaur,"Grass, Poison",9
1,Ivysaur,"Grass, Poison",7
2,Venusaur,"Grass, Poison",8
3,Charmander,Fire,10
4,Charmeleon,Fire,10
...,...,...,...
1005,Iron Valiant,"Fairy, Fighting",12
1006,Koraidon,"Fighting, Dragon",8
1007,Miraidon,"Electric, Dragon",8
1008,Walking Wake,"Water, Dragon",12


In [70]:
pokemon['test'] = pokemon['Type'].map({'Grass, Poison':1, 'Fire':2, 'Fairy, Fighting':3})
pokemon

Unnamed: 0,Name,Type,name_length,test
0,Bulbasaur,"Grass, Poison",9,1.0
1,Ivysaur,"Grass, Poison",7,1.0
2,Venusaur,"Grass, Poison",8,1.0
3,Charmander,Fire,10,2.0
4,Charmeleon,Fire,10,2.0
...,...,...,...,...
1005,Iron Valiant,"Fairy, Fighting",12,3.0
1006,Koraidon,"Fighting, Dragon",8,
1007,Miraidon,"Electric, Dragon",8,
1008,Walking Wake,"Water, Dragon",12,


# value_counts() method

In [1]:
import numpy as np
import pandas as pd

In [2]:
pokemon = pd.read_csv('datasets/pokemon.csv')
pokemon.head()

Unnamed: 0,Name,Type
0,Bulbasaur,"Grass, Poison"
1,Ivysaur,"Grass, Poison"
2,Venusaur,"Grass, Poison"
3,Charmander,Fire
4,Charmeleon,Fire


In [5]:
pokemon['Type'].value_counts()

Type
Water               74
Normal              74
Grass               46
Psychic             39
Fire                36
                    ..
Fighting, Ice        1
Fire, Dragon         1
Normal, Dragon       1
Psychic, Steel       1
Fighting, Dragon     1
Name: count, Length: 200, dtype: int64

In [7]:
pokemon['Type'].value_counts().index.tolist()

['Water',
 'Normal',
 'Grass',
 'Psychic',
 'Fire',
 'Electric',
 'Fighting',
 'Normal, Flying',
 'Bug',
 'Fairy',
 'Rock',
 'Ground',
 'Ghost',
 'Ice',
 'Poison',
 'Grass, Poison',
 'Dark',
 'Bug, Flying',
 'Dragon',
 'Bug, Poison',
 'Steel',
 'Water, Ground',
 'Psychic, Fairy',
 'Water, Flying',
 'Fire, Fighting',
 'Dark, Flying',
 'Grass, Flying',
 'Psychic, Flying',
 'Rock, Water',
 'Steel, Psychic',
 'Water, Psychic',
 'Bug, Grass',
 'Fire, Flying',
 'Rock, Ground',
 'Bug, Steel',
 'Normal, Psychic',
 'Electric, Flying',
 'Ghost, Grass',
 'Grass, Dark',
 'Grass, Fairy',
 'Water, Rock',
 'Bug, Fighting',
 'Bug, Rock',
 'Steel, Ghost',
 'Bug, Electric',
 'Dragon, Ground',
 'Rock, Steel',
 'Ice, Water',
 'Normal, Fairy',
 'Water, Dragon',
 'Water, Dark',
 'Dragon, Flying',
 'Rock, Flying',
 'Electric, Steel',
 'Water, Fairy',
 'Water, Ice',
 'Dragon, Ice',
 'Dark, Dragon',
 'Water, Ghost',
 'Flying',
 'Dark, Fairy',
 'Poison, Ground',
 'Grass, Ghost',
 'Dark, Steel',
 'Poison, Dark',

In [8]:
pokemon['Type'].unique()

array(['Grass, Poison', 'Fire', 'Fire, Flying', 'Water', 'Bug',
       'Bug, Flying', 'Bug, Poison', 'Normal, Flying', 'Normal', 'Poison',
       'Electric', 'Ground', 'Poison, Ground', 'Fairy', 'Normal, Fairy',
       'Poison, Flying', 'Bug, Grass', 'Fighting', 'Water, Fighting',
       'Psychic', 'Water, Poison', 'Rock, Ground', 'Water, Psychic',
       'Electric, Steel', 'Water, Ice', 'Ghost, Poison', 'Grass, Psychic',
       'Ground, Rock', 'Grass', 'Psychic, Fairy', 'Ice, Psychic',
       'Water, Flying', 'Rock, Water', 'Rock, Flying', 'Ice, Flying',
       'Electric, Flying', 'Dragon', 'Dragon, Flying', 'Water, Electric',
       'Fairy, Flying', 'Psychic, Flying', 'Water, Fairy', 'Rock',
       'Grass, Flying', 'Water, Ground', 'Dark', 'Dark, Flying', 'Ghost',
       'Normal, Psychic', 'Bug, Steel', 'Ground, Flying', 'Steel, Ground',
       'Bug, Rock', 'Bug, Fighting', 'Dark, Ice', 'Fire, Rock',
       'Ice, Ground', 'Water, Rock', 'Steel, Flying', 'Dark, Fire',
       'Water, D

In [9]:
l = ['B','B','A','B','C','A','B','B','A','C']

In [10]:
from collections import Counter

In [11]:
Counter(l)

Counter({'B': 5, 'A': 3, 'C': 2})

In [12]:
s = pd.Series(['Abhishek', 'Amrusha', 'Abhishek', 'Priyanka', 'Amrusha', np.nan, np.nan])

In [13]:
s

0    Abhishek
1     Amrusha
2    Abhishek
3    Priyanka
4     Amrusha
5         NaN
6         NaN
dtype: object

In [14]:
s.value_counts() # does not tell us how many times null value comes

Abhishek    2
Amrusha     2
Priyanka    1
Name: count, dtype: int64

In [15]:
s.value_counts(dropna = False)

Abhishek    2
Amrusha     2
NaN         2
Priyanka    1
Name: count, dtype: int64

In [16]:
s.value_counts().index

Index(['Abhishek', 'Amrusha', 'Priyanka'], dtype='object')

In [17]:
import seaborn as sns

In [18]:
iris = sns.load_dataset('iris')
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [22]:
np.array(iris['species'].value_counts().index.tolist())

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [23]:
# Generally it is used for categorical columns

In [24]:
titanic = sns.load_dataset('titanic')

In [25]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [26]:
titanic.shape # 891 rows

(891, 15)

In [27]:
titanic['survived'].hasnans

False

In [28]:
titanic['survived'].value_counts()

survived
0    549
1    342
Name: count, dtype: int64

In [32]:
titanic['deck'].hasnans

True

In [34]:
titanic['deck'].value_counts().sort_index()

deck
A    15
B    47
C    59
D    33
E    32
F    13
G     4
Name: count, dtype: int64

In [37]:
titanic['deck'].value_counts(dropna = False)

deck
NaN    688
C       59
B       47
D       33
E       32
A       15
F       13
G        4
Name: count, dtype: int64

In [39]:
titanic['deck'].value_counts(dropna = False, normalize = True) * 100

deck
NaN    77.216611
C       6.621773
B       5.274972
D       3.703704
E       3.591470
A       1.683502
F       1.459035
G       0.448934
Name: proportion, dtype: float64

# dropna() and fillna()

    we have two options for null values -- either we drop them or either we fill them with something
    for droping -- dropna() and for filling -- fillna()

In [41]:
nba = pd.read_csv('datasets/nba.csv')
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0


In [54]:
l = {}
for column in nba.columns:
    l[column] = nba[column].value_counts(dropna = False)[np.nan] # nba[column].value_counts(dropna = False)['NaN']  XXX
    
l

{'Name': 1,
 'Team': 1,
 'Position': 8,
 'Height': 7,
 'Weight': 8,
 'College': 14,
 'Salary': 104}

In [56]:
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0


In [59]:
nba = nba.dropna()
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
585,Eugene Omoruyi,Washington Wizards,F,6-6,235.0,Oregon,559782.0
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357.0
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0


In [61]:
l = {}
for column in nba.columns:
    l[column] = nba[column].value_counts(dropna = False).get(np.nan, 0)
    
l

{'Name': 0,
 'Team': 0,
 'Position': 0,
 'Height': 0,
 'Weight': 0,
 'College': 0,
 'Salary': 0}

In [62]:
# EDA -- Data collection, Data preprocessing -- Data cleaning, Data analyiszing, Data visulization
# ETL -- power BI, tranform load

In [63]:
nba = pd.read_csv('datasets/nba.csv')
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0


In [65]:
# nba.dropna(how = 'any') by default
nba.dropna(how = 'all') # removes null row -- jisme every value is null

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357.0
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,


In [66]:
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0


In [67]:
l = {}
for column in nba.columns:
    l[column] = nba[column].value_counts(dropna = False).get(np.nan, 0)
    
l

{'Name': 1,
 'Team': 1,
 'Position': 8,
 'Height': 7,
 'Weight': 8,
 'College': 14,
 'Salary': 104}

In [68]:
nba.dropna(subset = 'Salary')

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
585,Eugene Omoruyi,Washington Wizards,F,6-6,235.0,Oregon,559782.0
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357.0
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0


In [69]:
nba.dropna(subset = ['Salary', 'College']) # OR either salary  null or either college null

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
585,Eugene Omoruyi,Washington Wizards,F,6-6,235.0,Oregon,559782.0
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357.0
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0


In [70]:
nba.dropna(subset = ['Salary', 'College'], how = 'all')

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357.0
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
