In [98]:
import pandas as pd
import numpy as np


## Working with Pandas DataFrames:

In [99]:
pd.read_csv("data/pokemon.csv")

Unnamed: 0,Pokemon,Type
0,Bulbasaur,Grass / Poison
1,Ivysaur,Grass / Poison
2,Venusaur,Grass / Poison
3,Charmander,Fire
4,Charmeleon,Fire
...,...,...
804,Stakataka,Rock / Steel
805,Blacephalon,Fire / Ghost
806,Zeraora,Electric
807,Meltan,Steel


In [100]:
pokemon = pd.read_csv("data/pokemon.csv", index_col= "Pokemon")
pokemon


Unnamed: 0_level_0,Type
Pokemon,Unnamed: 1_level_1
Bulbasaur,Grass / Poison
Ivysaur,Grass / Poison
Venusaur,Grass / Poison
Charmander,Fire
Charmeleon,Fire
...,...
Stakataka,Rock / Steel
Blacephalon,Fire / Ghost
Zeraora,Electric
Meltan,Steel


### converting the dataframe into squeeze format

In [101]:
pokemon = pokemon.squeeze()
pokemon.shape  # 2 columns converted to one column.
pokemon

Pokemon
Bulbasaur      Grass / Poison
Ivysaur        Grass / Poison
Venusaur       Grass / Poison
Charmander               Fire
Charmeleon               Fire
                    ...      
Stakataka        Rock / Steel
Blacephalon      Fire / Ghost
Zeraora              Electric
Meltan                  Steel
Melmetal                Steel
Name: Type, Length: 809, dtype: object

In [102]:
type(pokemon)  # now it converted from DataFrame to Series

pandas.core.series.Series

### Working with Google_stocks.csv Data set:

In [103]:
google = pd.read_csv("data/google_stocks.csv", parse_dates= ["Date"], index_col= "Date").squeeze()

In [104]:
google.head(5)

Date
2004-08-19    49.98
2004-08-20    53.95
2004-08-23    54.50
2004-08-24    52.24
2004-08-25    52.80
Name: Close, dtype: float64

In [105]:
google.dtype


dtype('float64')

### Working with Revolutionary_war.csv Data set:

- Here we can not squeeze this data set as it has more than 2 columns. so we took only the needed columns (Start Date & State) and we squeezed them
- for that we used "usecols"


In [106]:
battles = pd.read_csv("data/revolutionary_war.csv", 
                      parse_dates= ["Start Date"], 
                      index_col= "Start Date", 
                      usecols = ["Start Date", "State"]).squeeze()

In [107]:
battles

Start Date
1774-09-01    Massachusetts
1774-12-14    New Hampshire
1775-04-19    Massachusetts
1775-04-19    Massachusetts
1775-04-20         Virginia
                  ...      
1782-09-11         Virginia
1782-09-13              NaN
1782-10-18              NaN
1782-12-06              NaN
1783-01-22         Virginia
Name: State, Length: 232, dtype: object

### we have missing values in the battles data set .so we delete them.

In [108]:
battles.sort_values()  #ascending by default
battles.sort_values(ascending= False)  #descending order

# here Nan will be placed at the end by default irrespective of ascending or descending order.

Start Date
1783-01-22    Virginia
1781-09-28    Virginia
1779-05-10    Virginia
1781-09-05    Virginia
1781-07-06    Virginia
                ...   
1782-08-08         NaN
1782-08-25         NaN
1782-09-13         NaN
1782-10-18         NaN
1782-12-06         NaN
Name: State, Length: 232, dtype: object

In [109]:
battles.sort_values(na_position= "last")  # by default Nan is at last
battles.sort_values(na_position= "first")  # Nan at first

Start Date
1775-09-17         NaN
1775-12-31         NaN
1776-03-03         NaN
1776-03-25         NaN
1776-05-18         NaN
                ...   
1781-07-06    Virginia
1781-07-01    Virginia
1781-06-26    Virginia
1781-04-25    Virginia
1783-01-22    Virginia
Name: State, Length: 232, dtype: object

In [110]:
### Delete missing values from battles dataset

battles.dropna()

Start Date
1774-09-01     Massachusetts
1774-12-14     New Hampshire
1775-04-19     Massachusetts
1775-04-19     Massachusetts
1775-04-20          Virginia
                   ...      
1782-08-15          Virginia
1782-08-19          Virginia
1782-08-26    South Carolina
1782-09-11          Virginia
1783-01-22          Virginia
Name: State, Length: 162, dtype: object

### Sorting index uning `sort_index()` method:

In [111]:
pokemon.sort_index(ascending = True) #by default ascending = True

Pokemon
Abomasnow        Grass / Ice
Abra                 Psychic
Absol                   Dark
Accelgor                 Bug
Aegislash      Steel / Ghost
                  ...       
Zoroark                 Dark
Zorua                   Dark
Zubat        Poison / Flying
Zweilous       Dark / Dragon
Zygarde      Dragon / Ground
Name: Type, Length: 809, dtype: object

In [112]:
battles.sort_index(ascending= False, na_position= "first")

# NaT means Not a Time (missing value for datetime objects)

Start Date
NaT              New Jersey
NaT                Virginia
NaT                     NaN
NaT                     NaN
1783-01-22         Virginia
                  ...      
1775-04-20         Virginia
1775-04-19    Massachusetts
1775-04-19    Massachusetts
1774-12-14    New Hampshire
1774-09-01    Massachusetts
Name: State, Length: 232, dtype: object

In [113]:
google.nsmallest(8)  

# 8 smallest values in the google stocks data set

Date
2004-09-03    49.82
2004-09-01    49.94
2004-08-19    49.98
2004-09-02    50.57
2004-09-07    50.60
2004-08-30    50.81
2004-09-08    50.96
2004-09-09    50.96
Name: Close, dtype: float64

In [114]:
google.nlargest(4)

# 8 largest values in the google stocks data set

Date
2019-04-29    1287.58
2019-04-26    1272.18
2018-07-26    1268.33
2019-10-25    1265.13
Name: Close, dtype: float64

In [115]:
battles = battles.sort_values(ascending= True)

In [116]:
battles.sort_values(inplace= True)

# modifies the original battles series permanently.
# if you need an orginal data set, you need to create a copy before using inplace= True

In [117]:
battles

Start Date
1781-09-06    Connecticut
1779-07-05    Connecticut
1777-04-27    Connecticut
1777-09-03       Delaware
1777-05-17        Florida
                 ...     
1782-08-08            NaN
1782-08-25            NaN
1782-09-13            NaN
1782-10-18            NaN
1782-12-06            NaN
Name: State, Length: 232, dtype: object

### using `value_counts()` methods():

In [118]:
pokemon.value_counts() 

# counts the each unique value in the series

Type
Normal                65
Water                 61
Grass                 38
Psychic               35
Fire                  30
                      ..
Fire / Psychic         1
Normal / Ground        1
Psychic / Fighting     1
Dark / Ghost           1
Fire / Ghost           1
Name: count, Length: 159, dtype: int64

In [119]:
len(pokemon.value_counts()) 
# number of unique values in the series

159

### normalize= True or False :

In [120]:
pokemon_percentage = pokemon.value_counts(normalize= True) 

# gives the relative frequency of each unique value in the series

In [121]:
pokemon_percentage * 100 # in percentage form

Type
Normal                8.034611
Water                 7.540173
Grass                 4.697157
Psychic               4.326329
Fire                  3.708282
                        ...   
Fire / Psychic        0.123609
Normal / Ground       0.123609
Psychic / Fighting    0.123609
Dark / Ghost          0.123609
Fire / Ghost          0.123609
Name: proportion, Length: 159, dtype: float64

In [122]:
google.min()

np.float64(49.82)

In [123]:
google.max()

np.float64(1287.58)

In [124]:
len(google)   # total rows in the series

3824

## Assigning the `Bins` to the Data (buckets):

In [125]:
buckets = [0, 200, 400, 600, 800, 1000, 1200, 1400]  # defining the bins

In [126]:
google.value_counts()

Close
287.68     3
194.27     3
307.10     3
288.92     3
290.41     3
          ..
292.43     1
293.41     1
296.75     1
295.46     1
1265.13    1
Name: count, Length: 3696, dtype: int64

In [127]:
google.value_counts(bins= buckets)

# assigning the data into bins

(200.0, 400.0]      1568
(-0.001, 200.0]      595
(400.0, 600.0]       575
(1000.0, 1200.0]     406
(600.0, 800.0]       380
(800.0, 1000.0]      207
(1200.0, 1400.0]      93
Name: count, dtype: int64

In [128]:
google.value_counts(bins= buckets).sort_index()

# sorting the bins in ascending order

(-0.001, 200.0]      595
(200.0, 400.0]      1568
(400.0, 600.0]       575
(600.0, 800.0]       380
(800.0, 1000.0]      207
(1000.0, 1200.0]     406
(1200.0, 1400.0]      93
Name: count, dtype: int64

In [129]:
battles.value_counts(dropna= False)  

#to check Nan values in the series use dropna= False

State
NaN               70
South Carolina    31
New York          28
New Jersey        24
Virginia          21
Massachusetts     11
Pennsylvania      10
North Carolina     9
Florida            8
Georgia            6
Rhode Island       3
Vermont            3
Connecticut        3
Indiana            1
Louisiana          1
New Hampshire      1
Delaware           1
Ohio               1
Name: count, dtype: int64

In [130]:
battles.index.value_counts(dropna= False)

Start Date
NaT           4
1781-04-15    2
1781-09-13    2
1777-08-22    2
1782-01-11    2
             ..
1776-09-16    1
1781-10-25    1
1777-07-08    1
1779-07-22    1
1782-12-06    1
Name: count, Length: 218, dtype: int64

### Using `apply` Method to apply different function on the series Object: 

In [131]:
funcs = [len, min, max]
for funcs_name in funcs:
    print(funcs_name(google))

3824
49.82
1287.58


In [132]:
google

Date
2004-08-19      49.98
2004-08-20      53.95
2004-08-23      54.50
2004-08-24      52.24
2004-08-25      52.80
               ...   
2019-10-21    1246.15
2019-10-22    1242.80
2019-10-23    1259.13
2019-10-24    1260.99
2019-10-25    1265.13
Name: Close, Length: 3824, dtype: float64

In [133]:
google.apply(round)

Date
2004-08-19      50
2004-08-20      54
2004-08-23      54
2004-08-24      52
2004-08-25      53
              ... 
2019-10-21    1246
2019-10-22    1243
2019-10-23    1259
2019-10-24    1261
2019-10-25    1265
Name: Close, Length: 3824, dtype: int64

In [134]:
google.apply(func=round)  #rounding off the values to the nearest integer.

Date
2004-08-19      50
2004-08-20      54
2004-08-23      54
2004-08-24      52
2004-08-25      53
              ... 
2019-10-21    1246
2019-10-22    1243
2019-10-23    1259
2019-10-24    1261
2019-10-25    1265
Name: Close, Length: 3824, dtype: int64

In [135]:
pokemon

Pokemon
Bulbasaur      Grass / Poison
Ivysaur        Grass / Poison
Venusaur       Grass / Poison
Charmander               Fire
Charmeleon               Fire
                    ...      
Stakataka        Rock / Steel
Blacephalon      Fire / Ghost
Zeraora              Electric
Meltan                  Steel
Melmetal                Steel
Name: Type, Length: 809, dtype: object

### Here in Pokemon data, some are using Single Wepons, and some are usingle multiple wepons, which is seperated by "/". if we need to count the values of who using single and multiple wepons:

In [139]:
def single_or_multiple(pokemon_type):
    if "/" in pokemon_type:
        return "Multiple"
    else:
        return "Single"

In [140]:
pokemon.apply(single_or_multiple).value_counts()

Type
Multiple    405
Single      404
Name: count, dtype: int64