# How to select series from a dataframe?


In [1]:
import pandas as pd

In [2]:
ufo = pd.read_csv('http://bit.ly/uforeports')

In [3]:
ufo.head()

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00
3,Abilene,,DISK,KS,6/1/1931 13:00
4,New York Worlds Fair,,LIGHT,NY,4/18/1933 19:00


In [4]:
ufo['City'].head()

0                  Ithaca
1             Willingboro
2                 Holyoke
3                 Abilene
4    New York Worlds Fair
Name: City, dtype: object

In [5]:
ufo[['City', 'Shape Reported']].head() # for multiple column , have to use double bracket

Unnamed: 0,City,Shape Reported
0,Ithaca,TRIANGLE
1,Willingboro,OTHER
2,Holyoke,OVAL
3,Abilene,DISK
4,New York Worlds Fair,LIGHT


In [6]:
ufo.loc[:,'City' : 'State'].head()  # all rows with from to column

Unnamed: 0,City,Colors Reported,Shape Reported,State
0,Ithaca,,TRIANGLE,NY
1,Willingboro,,OTHER,NJ
2,Holyoke,,OVAL,CO
3,Abilene,,DISK,KS
4,New York Worlds Fair,,LIGHT,NY


In [7]:
ufo.loc[:, ['City', 'Colors Reported']].head()

Unnamed: 0,City,Colors Reported
0,Ithaca,
1,Willingboro,
2,Holyoke,
3,Abilene,
4,New York Worlds Fair,


In [8]:
ufo[ufo.City == 'Abilene'] # one method

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
3,Abilene,,DISK,KS,6/1/1931 13:00
6654,Abilene,,TRIANGLE,TX,9/1/1991 1:00
8357,Abilene,,SPHERE,TX,7/15/1995 0:00
8783,Abilene,,,KS,10/14/1995 23:20
10883,Abilene,,,TX,10/19/1997 20:45


In [9]:
ufo[ufo['City'] == 'Abilene'] # second Method

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
3,Abilene,,DISK,KS,6/1/1931 13:00
6654,Abilene,,TRIANGLE,TX,9/1/1991 1:00
8357,Abilene,,SPHERE,TX,7/15/1995 0:00
8783,Abilene,,,KS,10/14/1995 23:20
10883,Abilene,,,TX,10/19/1997 20:45


In [10]:
ufo[ufo.City != 'Abilene'].head()

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00
4,New York Worlds Fair,,LIGHT,NY,4/18/1933 19:00
5,Valley City,,DISK,ND,9/15/1934 15:30


### Query Method to Slice the Row

In [11]:
ufo.query("City == 'Holyoke'")

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
2,Holyoke,,OVAL,CO,2/15/1931 14:00
771,Holyoke,,DISK,MA,1/20/1963 22:00
3221,Holyoke,,DISK,MA,6/5/1976 23:00
7982,Holyoke,,LIGHT,MA,4/1/1995 18:00
18211,Holyoke,,DIAMOND,MA,12/28/2000 18:00


In [12]:
#ufo.query('City in ["Ithaca", "Holyoke"]')
ufo.query("City in ['Ithaca', 'Holyoke']")

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00
771,Holyoke,,DISK,MA,1/20/1963 22:00
3221,Holyoke,,DISK,MA,6/5/1976 23:00
4068,Ithaca,,CIGAR,NY,6/1/1979 19:00
5631,Ithaca,,OTHER,MI,6/1/1987 17:00
6961,Ithaca,,OTHER,NY,1/10/1993 0:30
7573,Ithaca,RED GREEN,LIGHT,NY,10/15/1994 18:00
7982,Holyoke,,LIGHT,MA,4/1/1995 18:00
9088,Ithaca,,,NY,2/16/1996 21:45


## Value count Method 

to Count the unique Observation

In [13]:
ufo.head(2)

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00


In [15]:
ufo['City'].value_counts().head(10)

Seattle          187
New York City    161
Phoenix          137
Houston          108
Las Vegas        105
Portland         102
San Diego        101
Los Angeles       98
Chicago           73
Austin            62
Name: City, dtype: int64

In [16]:
ufo['Shape Reported'].value_counts().head(10)

LIGHT        2803
DISK         2122
TRIANGLE     1889
OTHER        1402
CIRCLE       1365
SPHERE       1054
FIREBALL     1039
OVAL          845
CIGAR         617
FORMATION     434
Name: Shape Reported, dtype: int64

In [20]:
ufo['State'].value_counts().head(10)

CA    2529
WA    1322
TX    1027
NY     914
FL     837
AZ     738
OH     667
IL     613
PA     598
MI     591
Name: State, dtype: int64

In [18]:
ufo['State'].value_counts(normalize = True).head(10)

# to get in % format

CA    0.138644
WA    0.072474
TX    0.056302
NY    0.050107
FL    0.045886
AZ    0.040458
OH    0.036566
IL    0.033606
PA    0.032783
MI    0.032400
Name: State, dtype: float64