# Pandas - Series

In [None]:


import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [5]:
# Each value in the series in indexed
obj = Series([5, 10, 15, 20, 25])
obj

0     5
1    10
2    15
3    20
4    25
dtype: int64

In [9]:
obj.values
obj.index

RangeIndex(start=0, stop=5, step=1)

In [38]:
# Associative arrays 
ww2_cas = Series([8300000, 4300000, 3000000, 2100000, 400000], index = ['USSR', 'Germany', 'China', 'Japan', 'USA'])
ww2_cas.index
ww2_cas.values
ww2_cas


USSR       8300000
Germany    4300000
China      3000000
Japan      2100000
USA         400000
dtype: int64

In [14]:
ww2_cas['USA']

400000

In [17]:
# Bulk operations
ww2_cas[ww2_cas > 4000000]


USSR       8300000
Germany    4300000
dtype: int64

In [20]:
# key Exists
'USSR' in ww2_cas

True

In [43]:
# Series to dictiionary
ww2_dict = ww2_cas.to_dict()
ww2_dict

{'China': 3000000,
 'Germany': 4300000,
 'Japan': 2100000,
 'USA': 400000,
 'USSR': 8300000}

In [45]:
# From dict to Series
pd.Series(ww2_dict)

China      3000000
Germany    4300000
Japan      2100000
USA         400000
USSR       8300000
dtype: int64

In [51]:
# Now use the dict -> Series 
countries = ['USSR', 'Germany', 'China', 'Japan', 'USA', 'India']
pd.Series(ww2_dict, index=countries)

USSR       8300000.0
Germany    4300000.0
China      3000000.0
Japan      2100000.0
USA         400000.0
India            NaN
dtype: float64

In [50]:
countries = ['USSR']
pd.Series(ww2_dict, index=countries)

USSR    8300000
dtype: int64

In [77]:
# Check wheter a key is having/not having a null value

# Usage of pretty print
import pprint
pp = pprint.PrettyPrinter(indent=4)

print("\nPrinting Dict :\n")
pp.pprint(ww2_dict)

o = pd.Series(ww2_dict, index=countries)

print("\nisnull output :\n")
pp.pprint(pd.isnull(o))

print("\nnotnull output :\n")
pp.pprint(pd.notnull(o))


Printing Dict :

{   'China': 3000000,
    'Germany': 4300000,
    'Japan': 2100000,
    'USA': 400000,
    'USSR': 8300000}

isnull output :

USSR       False
Germany    False
China      False
Japan      False
USA        False
India       True
dtype: bool

notnull output :

USSR        True
Germany     True
China       True
Japan       True
USA         True
India      False
dtype: bool


In [84]:
s1 = Series([8300000, 4300000, 3000000, 2100000, 400000], index = ['USSR', 'Germany', 'China', 'Japan', 'USA'])
s2 = Series([1000000, 1000000, 1000000, 1000000, 100000], index = ['China', 'Japan', 'USA', 'USSR', 'Germany'])

# Stuff added up correctly even though the idexing is bot aligned
s3 = s1 + s2
s3

China      4000000
Germany    4400000
Japan      3100000
USA        1400000
USSR       9300000
dtype: int64

In [90]:
# Explicit naming of series
s1.name = "Crap1"
s2.name = "Crap2"
s1.index.name = 'Countries'
s1

Countries
USSR       8300000
Germany    4300000
China      3000000
Japan      2100000
USA         400000
Name: Crap1, dtype: int64

# Pandas - DataFrame

In [97]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [133]:
import webbrowser
website = 'https://en.wikipedia.org/wiki/List_of_all-time_NFL_win%E2%80%93loss_records'
#data = pd.read_html(website, header =0, flavor = 'bs4')
data = pd.read_html(website, header =0)
nfl_frame = data[1]
nfl_frame


Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division
0,1,Dallas Cowboys,882,502,374,6,0.573,1960,NFC East
1,2,Green Bay Packers,1336,737,562,37,0.565,1921,NFC North
2,3,Chicago Bears,1370,749,579,42,0.562,1920,NFC North
3,4,Miami Dolphins,800,445,351,4,0.559,1966,AFC East
4,5,New England Patriots[b],884,489,386,9,0.558,1960,AFC East
5,6,Minnesota Vikings,870,470,390,10,0.546,1961,NFC North
6,7,Baltimore Ravens,352,190,161,1,0.541,1996,AFC North
7,8,New York Giants,1305,687,585,33,0.539,1925,NFC East
8,9,Denver Broncos,884,470,404,10,0.537,1960,AFC West
9,10,San Francisco 49ers,1002,528,460,14,0.534,1950,NFC West


In [134]:
# Select First 10 Frames
nfl_frame = nfl_frame[1:10]

pp.pprint(nfl_frame.columns)
pp.pprint(nfl_frame.Team)

Index(['Rank', 'Team', 'GP', 'Won', 'Lost', 'Tied', 'Pct.', 'First NFL Season',
       'Division'],
      dtype='object')
1          Green Bay Packers
2              Chicago Bears
3             Miami Dolphins
4    New England Patriots[b]
5          Minnesota Vikings
6           Baltimore Ravens
7            New York Giants
8             Denver Broncos
9        San Francisco 49ers
Name: Team, dtype: object


In [135]:
# Example of index call
pp.pprint(nfl_frame['First NFL Season'])

1    1921
2    1920
3    1966
4    1960
5    1961
6    1996
7    1925
8    1960
9    1950
Name: First NFL Season, dtype: int64


In [136]:
# Grab a list of columns
DataFrame(nfl_frame, columns=['Team', 'GP', 'First NFL Season' ])

Unnamed: 0,Team,GP,First NFL Season
1,Green Bay Packers,1336,1921
2,Chicago Bears,1370,1920
3,Miami Dolphins,800,1966
4,New England Patriots[b],884,1960
5,Minnesota Vikings,870,1961
6,Baltimore Ravens,352,1996
7,New York Giants,1305,1925
8,Denver Broncos,884,1960
9,San Francisco 49ers,1002,1950


In [137]:
# Give a column that does not exist
DataFrame(nfl_frame, columns=['Team', 'GP', 'First NFL Season', 'City' ])

Unnamed: 0,Team,GP,First NFL Season,City
1,Green Bay Packers,1336,1921,
2,Chicago Bears,1370,1920,
3,Miami Dolphins,800,1966,
4,New England Patriots[b],884,1960,
5,Minnesota Vikings,870,1961,
6,Baltimore Ravens,352,1996,
7,New York Giants,1305,1925,
8,Denver Broncos,884,1960,
9,San Francisco 49ers,1002,1950,


In [144]:
# Select some head, tail
nfl_frame.head(2)
nfl_frame.tail(3)

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division
7,8,New York Giants,1305,687,585,33,0.539,1925,NFC East
8,9,Denver Broncos,884,470,404,10,0.537,1960,AFC West
9,10,San Francisco 49ers,1002,528,460,14,0.534,1950,NFC West


In [165]:
# Print Second row (Second - 1, as indexing start from index 0)

print(nfl_frame.iloc[2])
print("\n")

# Print Second column
print(nfl_frame.iloc[:,2])

Rank                             4
Team                Miami Dolphins
GP                             800
Won                            445
Lost                           351
Tied                             4
Pct.                         0.559
First NFL Season              1966
Division                  AFC East
Name: 3, dtype: object


1    1336
2    1370
3     800
4     884
5     870
6     352
7    1305
8     884
9    1002
Name: GP, dtype: int64
