In [1]:
import numpy as np
import pandas as pd

In [2]:
pokemon = pd.read_csv('data/pokemon.csv')
print(pokemon)
print(type(pokemon)) # type is dataframe but we can convert it to series if we have one column only

         Pokemon            Type
0      Bulbasaur  Grass / Poison
1        Ivysaur  Grass / Poison
2       Venusaur  Grass / Poison
3     Charmander            Fire
4     Charmeleon            Fire
..           ...             ...
804    Stakataka    Rock / Steel
805  Blacephalon    Fire / Ghost
806      Zeraora        Electric
807       Meltan           Steel
808     Melmetal           Steel

[809 rows x 2 columns]
<class 'pandas.core.frame.DataFrame'>


In [3]:
pokemon = pd.read_csv('data/pokemon.csv', index_col= 'Pokemon') # we make pokemon column to index then we have one column
pokemon

Unnamed: 0_level_0,Type
Pokemon,Unnamed: 1_level_1
Bulbasaur,Grass / Poison
Ivysaur,Grass / Poison
Venusaur,Grass / Poison
Charmander,Fire
Charmeleon,Fire
...,...
Stakataka,Rock / Steel
Blacephalon,Fire / Ghost
Zeraora,Electric
Meltan,Steel


In [4]:
# squeeze convert the one column dataframe to the series
pokemon = pd.read_csv('data/pokemon.csv', index_col= 'Pokemon').squeeze('columns') 
print(pokemon)
print(type(pokemon))

Pokemon
Bulbasaur      Grass / Poison
Ivysaur        Grass / Poison
Venusaur       Grass / Poison
Charmander               Fire
Charmeleon               Fire
                    ...      
Stakataka        Rock / Steel
Blacephalon      Fire / Ghost
Zeraora              Electric
Meltan                  Steel
Melmetal                Steel
Name: Type, Length: 809, dtype: object
<class 'pandas.core.series.Series'>


In [5]:
google = pd.read_csv('data/google_stocks.csv') # Date column 'll convert to string to make it data variable use parse_date
google

Unnamed: 0,Date,Close
0,2004-08-19,49.98
1,2004-08-20,53.95
2,2004-08-23,54.50
3,2004-08-24,52.24
4,2004-08-25,52.80
...,...,...
3819,2019-10-21,1246.15
3820,2019-10-22,1242.80
3821,2019-10-23,1259.13
3822,2019-10-24,1260.99


In [6]:
google = pd.read_csv('data/google_stocks.csv', parse_dates=['Date'], index_col='Date').squeeze('columns')
google

Date
2004-08-19      49.98
2004-08-20      53.95
2004-08-23      54.50
2004-08-24      52.24
2004-08-25      52.80
               ...   
2019-10-21    1246.15
2019-10-22    1242.80
2019-10-23    1259.13
2019-10-24    1260.99
2019-10-25    1265.13
Name: Close, Length: 3824, dtype: float64

In [7]:
battles = pd.read_csv('data/revolutionary_war.csv')
battles

Unnamed: 0,Battle,Start Date,State
0,Powder Alarm,9/1/1774,Massachusetts
1,Storming of Fort William and Mary,12/14/1774,New Hampshire
2,Battles of Lexington and Concord,4/19/1775,Massachusetts
3,Siege of Boston,4/19/1775,Massachusetts
4,Gunpowder Incident,4/20/1775,Virginia
...,...,...,...
227,Siege of Fort Henry,9/11/1782,Virginia
228,Grand Assault on Gibraltar,9/13/1782,
229,Action of 18 October 1782,10/18/1782,
230,Action of 6 December 1782,12/6/1782,


In [8]:
battles = pd.read_csv('data/revolutionary_war.csv', parse_dates=['Start Date'], index_col='Start Date').squeeze('columns')
battles

Unnamed: 0_level_0,Battle,State
Start Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1774-09-01,Powder Alarm,Massachusetts
1774-12-14,Storming of Fort William and Mary,New Hampshire
1775-04-19,Battles of Lexington and Concord,Massachusetts
1775-04-19,Siege of Boston,Massachusetts
1775-04-20,Gunpowder Incident,Virginia
...,...,...
1782-09-11,Siege of Fort Henry,Virginia
1782-09-13,Grand Assault on Gibraltar,
1782-10-18,Action of 18 October 1782,
1782-12-06,Action of 6 December 1782,


In [26]:
# in above code squeeze method doesn't works beacuse we have two column without index
# so load just one column except index column 
battles = pd.read_csv('data/revolutionary_war.csv', parse_dates=['Start Date'], usecols=['State', 'Start Date'], index_col='Start Date').squeeze('columns')
type(battles)

pandas.core.series.Series

In [10]:
battles.sort_values()

Start Date
1781-09-06    Connecticut
1779-07-05    Connecticut
1777-04-27    Connecticut
1777-09-03       Delaware
1777-05-17        Florida
                 ...     
1782-08-08            NaN
1782-08-25            NaN
1782-09-13            NaN
1782-10-18            NaN
1782-12-06            NaN
Name: State, Length: 232, dtype: object

In [11]:
battles.sort_values(ascending = False) # na_position default value is 'last'

Start Date
1783-01-22    Virginia
1781-09-28    Virginia
1779-05-10    Virginia
1781-09-05    Virginia
1781-07-06    Virginia
                ...   
1782-08-08         NaN
1782-08-25         NaN
1782-09-13         NaN
1782-10-18         NaN
1782-12-06         NaN
Name: State, Length: 232, dtype: object

In [12]:
battles.sort_values(ascending = False, na_position = 'first')

Start Date
1775-09-17            NaN
1775-12-31            NaN
1776-03-03            NaN
1776-03-25            NaN
1776-05-18            NaN
                 ...     
1777-05-17        Florida
1777-09-03       Delaware
1781-09-06    Connecticut
1779-07-05    Connecticut
1777-04-27    Connecticut
Name: State, Length: 232, dtype: object

In [13]:
battles.dropna().sort_values() # first drops nan values

Start Date
1781-09-06    Connecticut
1779-07-05    Connecticut
1777-04-27    Connecticut
1777-09-03       Delaware
1777-05-17        Florida
                 ...     
1781-07-06       Virginia
1781-07-01       Virginia
1781-06-26       Virginia
1781-04-25       Virginia
1783-01-22       Virginia
Name: State, Length: 162, dtype: object

In [14]:
battles.sort_index()

Start Date
1774-09-01    Massachusetts
1774-12-14    New Hampshire
1775-04-19    Massachusetts
1775-04-19    Massachusetts
1775-04-20         Virginia
                  ...      
1783-01-22         Virginia
NaT              New Jersey
NaT                Virginia
NaT                     NaN
NaT                     NaN
Name: State, Length: 232, dtype: object

In [15]:
print(' 7 largest values of google serie : ', google.nlargest(7))
print('------------------')
print(' 3 smallest values of google serie : ', google.nsmallest(3))
print('------------------')
print('max value of google serie : ', google.max())
print('min value of google serie : ', google.min())
print('number of unique values in battles serie : ', battles.nunique())

 7 largest values of google serie :  Date
2019-04-29    1287.58
2019-04-26    1272.18
2018-07-26    1268.33
2019-10-25    1265.13
2019-04-23    1264.55
2018-07-25    1263.70
2019-04-25    1263.45
Name: Close, dtype: float64
------------------
 3 smallest values of google serie :  Date
2004-09-03    49.82
2004-09-01    49.94
2004-08-19    49.98
Name: Close, dtype: float64
------------------
max value of google serie :  1287.58
min value of google serie :  49.82
number of unique values in battles serie :  17


In [16]:
battles.value_counts( ascending= True)

Ohio               1
New Hampshire      1
Indiana            1
Delaware           1
Louisiana          1
Rhode Island       3
Connecticut        3
Vermont            3
Georgia            6
Florida            8
North Carolina     9
Pennsylvania      10
Massachusetts     11
Virginia          21
New Jersey        24
New York          28
South Carolina    31
Name: State, dtype: int64

In [17]:
battles.value_counts(normalize= True)

South Carolina    0.191358
New York          0.172840
New Jersey        0.148148
Virginia          0.129630
Massachusetts     0.067901
Pennsylvania      0.061728
North Carolina    0.055556
Florida           0.049383
Georgia           0.037037
Rhode Island      0.018519
Connecticut       0.018519
Vermont           0.018519
New Hampshire     0.006173
Delaware          0.006173
Indiana           0.006173
Louisiana         0.006173
Ohio              0.006173
Name: State, dtype: float64

In [18]:
battles.value_counts(normalize= True)*100

South Carolina    19.135802
New York          17.283951
New Jersey        14.814815
Virginia          12.962963
Massachusetts      6.790123
Pennsylvania       6.172840
North Carolina     5.555556
Florida            4.938272
Georgia            3.703704
Rhode Island       1.851852
Connecticut        1.851852
Vermont            1.851852
New Hampshire      0.617284
Delaware           0.617284
Indiana            0.617284
Louisiana          0.617284
Ohio               0.617284
Name: State, dtype: float64

In [19]:
(battles.value_counts(normalize= True)*100).round(2)

South Carolina    19.14
New York          17.28
New Jersey        14.81
Virginia          12.96
Massachusetts      6.79
Pennsylvania       6.17
North Carolina     5.56
Florida            4.94
Georgia            3.70
Rhode Island       1.85
Connecticut        1.85
Vermont            1.85
New Hampshire      0.62
Delaware           0.62
Indiana            0.62
Louisiana          0.62
Ohio               0.62
Name: State, dtype: float64

In [20]:
b = [0, 200, 400, 600, 1000, 1300]
google.value_counts(bins = b)

(200.0, 400.0]      1568
(-0.001, 200.0]      595
(600.0, 1000.0]      587
(400.0, 600.0]       575
(1000.0, 1300.0]     499
Name: Close, dtype: int64

In [21]:
google.value_counts(bins = 6).sort_index()

(48.581, 256.113]      1204
(256.113, 462.407]     1104
(462.407, 668.7]        507
(668.7, 874.993]        380
(874.993, 1081.287]     292
(1081.287, 1287.58]     337
Name: Close, dtype: int64

In [22]:
battles.index.value_counts()

1781-04-25    2
1781-05-22    2
1780-08-18    2
1781-09-13    2
1782-03-16    2
             ..
1778-06-30    1
1778-07-03    1
1778-07-27    1
1778-08-21    1
1783-01-22    1
Name: Start Date, Length: 217, dtype: int64

In [23]:
google # we can round this numbers with round function and to do this we must use apply method

Date
2004-08-19      49.98
2004-08-20      53.95
2004-08-23      54.50
2004-08-24      52.24
2004-08-25      52.80
               ...   
2019-10-21    1246.15
2019-10-22    1242.80
2019-10-23    1259.13
2019-10-24    1260.99
2019-10-25    1265.13
Name: Close, Length: 3824, dtype: float64

In [24]:
google.apply(round) # apply method gives a function and apply that to the every value in serie

Date
2004-08-19      50
2004-08-20      54
2004-08-23      54
2004-08-24      52
2004-08-25      53
              ... 
2019-10-21    1246
2019-10-22    1243
2019-10-23    1259
2019-10-24    1261
2019-10-25    1265
Name: Close, Length: 3824, dtype: int64