### Pandas

<b> Series</b> - (Array of data and data labels, its index)

In [3]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [4]:
obj = Series([1,2,3,4])

In [5]:
# it will show series with index
obj

0    1
1    2
2    3
3    4
dtype: int64

In [6]:
# Let's see values only

obj.values

array([1, 2, 3, 4], dtype=int64)

In [7]:
# Let's show the index
obj.index

RangeIndex(start=0, stop=4, step=1)

In [8]:
#Let's create a series with an index

ww2_cas = Series([870000,430000,400000,300000], index=['USSR','India','USA', 'Germany'])

In [9]:
ww2_cas

USSR       870000
India      430000
USA        400000
Germany    300000
dtype: int64

In [10]:
ww2_cas['India']

430000

In [15]:
ww2_cas[ww2_cas < 400000]

Germany    300000
dtype: int64

In [16]:
ww2_dict = ww2_cas.to_dict()

In [17]:
ww2_dict

{'Germany': 300000, 'India': 430000, 'USA': 400000, 'USSR': 870000}

In [18]:
ww2_dict['India']

430000

In [19]:
ww2_series = Series(ww2_dict)

In [20]:
ww2_series

Germany    300000
India      430000
USA        400000
USSR       870000
dtype: int64

In [21]:
countries = ['USA', 'Germany', 'India', 'USSR', 'China']

In [22]:
obj2 = Series(ww2_dict, index= countries)

In [23]:
obj2

USA        400000.0
Germany    300000.0
India      430000.0
USSR       870000.0
China           NaN
dtype: float64

In [25]:
# We can use isnull and notnull to find missing data
obj2.isnull()

USA        False
Germany    False
India      False
USSR       False
China       True
dtype: bool

In [27]:
obj2.notnull()

USA         True
Germany     True
India       True
USSR        True
China      False
dtype: bool

In [28]:
ww2_series

Germany    300000
India      430000
USA        400000
USSR       870000
dtype: int64

In [29]:
ww2_series + obj2

China            NaN
Germany     600000.0
India       860000.0
USA         800000.0
USSR       1740000.0
dtype: float64

In [30]:
obj2.name = 'World war 2 casualities'

In [31]:
obj2

USA        400000.0
Germany    300000.0
India      430000.0
USSR       870000.0
China           NaN
Name: World war 2 casualities, dtype: float64

In [33]:
obj2.index.name = ('Countries')

In [35]:
obj2

Countries
USA        400000.0
Germany    300000.0
India      430000.0
USSR       870000.0
China           NaN
Name: World war 2 casualities, dtype: float64

## DataFrames

In [5]:
import numpy as np
from pandas import Series, DataFrame
import pandas as pd
import webbrowser

In [6]:
# webiste = 'http://en.wikipedia.org/wiki/NFL_win-loss_records'
# webbrowser.open(webiste)

In [8]:
nfl_frame = pd.read_csv("NFL_win_loss.csv")

In [10]:
nfl_frame.head()

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division
0,1,Chicago Bears,741,555,42,0.57,1920,1338,NFC North
1,2,Dallas Cowboys,480,364,6,0.568,1960,850,NFC East
2,3,Green Bay Packers,720,547,37,0.566,1921,1304,NFC North
3,4,Miami Dolphins,429,335,4,0.561,1966,768,AFC East
4,5,New England Patriots,462,381,9,0.548,1960,852,AFC East


In [11]:
nfl_frame.columns

Index(['Rank', 'Team', 'Won', 'Lost', 'Tied', 'Pct.', 'First NFL Season',
       'Total Games', 'Division'],
      dtype='object')

In [14]:
nfl_frame.Team

0            Chicago Bears
1           Dallas Cowboys
2        Green Bay Packers
3           Miami Dolphins
4     New England Patriots
5      San Francisco 49ers
6         Baltimore Ravens
7        Minnesota Vikings
8          New York Giants
9           Denver Broncos
10      Indianapolis Colts
11         Oakland Raiders
12     Pittsburgh Steelers
13      Kansas City Chiefs
14     Washington Redskins
15        Cleveland Browns
16        Seattle Seahawks
17      San Diego Chargers
18        Los Angeles Rams
Name: Team, dtype: object

In [15]:
DataFrame(nfl_frame, columns=['Team', 'Total Games', 'Division'])

Unnamed: 0,Team,Total Games,Division
0,Chicago Bears,1338,NFC North
1,Dallas Cowboys,850,NFC East
2,Green Bay Packers,1304,NFC North
3,Miami Dolphins,768,AFC East
4,New England Patriots,852,AFC East
5,San Francisco 49ers,970,NFC West
6,Baltimore Ravens,320,AFC North
7,Minnesota Vikings,838,NFC North
8,New York Giants,1273,NFC East
9,Denver Broncos,852,AFC West


In [17]:
DataFrame(nfl_frame, columns=['Team', 'Total Games', 'Won', 'Lost', 'Ration'])

Unnamed: 0,Team,Total Games,Won,Lost,Ration
0,Chicago Bears,1338,741,555,
1,Dallas Cowboys,850,480,364,
2,Green Bay Packers,1304,720,547,
3,Miami Dolphins,768,429,335,
4,New England Patriots,852,462,381,
5,San Francisco 49ers,970,520,436,
6,Baltimore Ravens,320,173,146,
7,Minnesota Vikings,838,449,379,
8,New York Giants,1273,673,567,
9,Denver Broncos,852,456,386,


In [20]:
nfl_frame['Ration'] = nfl_frame.Won/nfl_frame.Lost

In [21]:
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division,Ration
0,1,Chicago Bears,741,555,42,0.57,1920,1338,NFC North,1.335135
1,2,Dallas Cowboys,480,364,6,0.568,1960,850,NFC East,1.318681
2,3,Green Bay Packers,720,547,37,0.566,1921,1304,NFC North,1.316271
3,4,Miami Dolphins,429,335,4,0.561,1966,768,AFC East,1.280597
4,5,New England Patriots,462,381,9,0.548,1960,852,AFC East,1.212598
5,6,San Francisco 49ers,520,436,14,0.543,1950,970,NFC West,1.192661
6,7,Baltimore Ravens,173,146,1,0.542,1996,320,AFC North,1.184932
7,8,Minnesota Vikings,449,379,10,0.542,1961,838,NFC North,1.184697
8,9,New York Giants,673,567,33,0.542,1925,1273,NFC East,1.186949
9,10,Denver Broncos,456,386,10,0.541,1960,852,AFC West,1.181347


In [31]:
data = {'City': ['India', 'USA', 'Russia', 'China'], 'Population': [50000000,40000100,5000000,549818545]}

In [43]:
city_frame = DataFrame(data)

In [44]:
city_frame

Unnamed: 0,City,Population
0,India,50000000
1,USA,40000100
2,Russia,5000000
3,China,549818545


In [39]:
city_frame = np.where(city_frame.Population < 40000100, 500000000, city_frame.Population)

In [41]:
city_frame

array([ 50000000,  40000100, 500000000, 549818545], dtype=int64)